From 5bcfdb5a8def74c478152342289275e191164e04 Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Thu, 24 Jul 2025 14:11:26 +0200 Subject: [PATCH 01/10] migrate to uv Signed-off-by: alessiodevoto --- .flake8 | 1 + .github/workflows/style.yml | 10 ++--- .github/workflows/test.yml | 10 ++--- .gitignore | 2 +- Makefile | 12 ++--- README.md | 4 +- pyproject.toml | 89 ++++++++++++++++++++----------------- 7 files changed, 67 insertions(+), 61 deletions(-) diff --git a/.flake8 b/.flake8 index c8ce289f..06b60df0 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,6 @@ [flake8] max-line-length = 120 +exclude = .venv,venv,.git,__pycache__,build,dist per-file-ignores = __init__.py:F401 evaluation/benchmarks/infinite_bench/create_huggingface_dataset.py:E501 diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 19f6b008..75cec708 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -15,13 +15,13 @@ jobs: with: python-version: 3.10.11 - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python3 - - echo "$HOME/.local/bin" >> $GITHUB_PATH # Add Poetry to the PATH + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + version: "latest" - name: Install dependencies - run: poetry install --with dev + run: uv sync --group dev - name: Run style checks run: make style diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 07eafff1..e98d66c9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,12 +15,12 @@ jobs: with: python-version: 3.10.11 - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python3 - - echo "$HOME/.local/bin" >> $GITHUB_PATH # Add Poetry to the PATH + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + version: "latest" - name: Install dependencies - run: poetry install --with dev + run: uv sync --group dev - run: make test diff --git a/.gitignore b/.gitignore index c3c82406..0d05a95d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ dev_notebooks/ results/ reports/ .DS_Store -poetry.lock +uv.lock *.parquet # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/Makefile b/Makefile index 9015c401..fbd14aa9 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -POETRY ?= $(shell which poetry) +UV ?= $(shell which uv) BUILD_VERSION:=$(APP_VERSION) TESTS_FILTER:= @@ -7,11 +7,11 @@ PYTEST_LOG=--log-cli-level=debug --log-format="%(asctime)s %(levelname)s [%(name .PHONY: isort isort: - $(POETRY) run isort . + $(UV) run isort . .PHONY: black black: - $(POETRY) run black . + $(UV) run black . PHONY: format format: isort black @@ -24,10 +24,10 @@ style: reports @echo -n > reports/copyright_errors.log @echo - -$(POETRY) run flake8 | tee -a reports/flake8_errors.log + -$(UV) run flake8 | tee -a reports/flake8_errors.log @if [ -s reports/flake8_errors.log ]; then exit 1; fi - -$(POETRY) run mypy . --check-untyped-defs | tee -a reports/mypy.log + -$(UV) run mypy . --check-untyped-defs | tee -a reports/mypy.log @if ! grep -Eq "Success: no issues found in [0-9]+ source files" reports/mypy.log ; then exit 1; fi @echo "Checking for SPDX-FileCopyrightText headers in Python files..." @@ -42,7 +42,7 @@ reports: .PHONY: test test: reports PYTHONPATH=. \ - $(POETRY) run pytest \ + $(UV) run pytest \ --cov-report xml:reports/coverage.xml \ --cov=kvpress/ \ --junitxml=./reports/junit.xml \ diff --git a/README.md b/README.md index 84c18797..8b8b348f 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,12 @@ If possible, install flash attention: pip install flash-attn --no-build-isolation ``` -For a local installation with all dev dependencies, use poetry: +For a local installation with all dev dependencies, use uv: ```bash git clone https://github.com/NVIDIA/kvpress.git cd kvpress -poetry install --with dev +uv sync --dev ``` ## Usage diff --git a/pyproject.toml b/pyproject.toml index 9f282be3..3e1f0571 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,50 +1,55 @@ -[tool.poetry] +[project] name = "kvpress" -authors = ["Simon Jegou", "Maximilian Jeblick", "Alessio Devoto", "Jiwei Liu", "David Austin"] -description = "Efficiently compress the KV cache of any pretrained transformer" version = "0.2.8" +description = "Efficiently compress the KV cache of any pretrained transformer" +authors = [ + { name = "Simon Jegou" }, + { name = "Maximilian Jeblick" }, + { name = "Alessio Devoto" }, + { name = "Jiwei Liu" }, + { name = "David Austin" }, +] +requires-python = ">=3.10" readme = "README.md" +dependencies = [ + "numpy>=2.0.0,<3", + "torch>=2.3.1,<3", + "transformers>=4.48.0", + "sentencepiece>=0.2.0,<0.3", + "protobuf>=5.27.2,<6", + "datasets>=2.21.0,<3", + "pandas>=2.2.2,<3", + "accelerate>=1.0.0,<2", + "requests>=2.32.3,<3", + "cachetools>=5.5.2,<6", +] -[tool.poetry.dependencies] -python = ">=3.10" -numpy = "^2.0.0" -torch = "^2.3.1" -transformers = ">=4.48.0" -sentencepiece = "^0.2.0" -protobuf = "^5.27.2" -datasets = "^2.21.0" -pandas = "^2.2.2" -accelerate = "^1.0.0" -requests = "^2.32.3" -cachetools = "^5.5.2" - -[tool.poetry.group.dev] -optional = true - -[tool.poetry.group.dev.dependencies] -pytest = "^7.0.0" -flake8 = "^7.0.0" -isort = "^5.13.2" -black = "^24.8.0" -mypy = "^1.13.0" -pytest-cov = "^5.0.0" -pytest-dependency = "^0.6.0" -pytest-html = ">=4.1.1, <5.0.0" -types-pyyaml = "^6.0" -ipykernel = "^6.29.4" -bs4 = "^0.0.2" -nvitop = "^1.3.2" -bert-score = "^0.3.13" -rouge = "^1.0.1" -nltk = "^3.9.1" -tqdm = "^4.66.4" -scipy = "^1.13.1" -matplotlib = "^3.9.0" -fire = "^0.6.0" +[dependency-groups] +dev = [ + "pytest>=7.0.0,<8", + "flake8>=7.0.0,<8", + "isort>=5.13.2,<6", + "black>=24.8.0,<25", + "mypy>=1.13.0,<2", + "pytest-cov>=5.0.0,<6", + "pytest-dependency>=0.6.0,<0.7", + "pytest-html>=4.1.1, <5.0.0", + "types-pyyaml~=6.0", + "ipykernel>=6.29.4,<7", + "bs4>=0.0.2,<0.0.3", + "nvitop>=1.3.2,<2", + "bert-score>=0.3.13,<0.4", + "rouge>=1.0.1,<2", + "nltk>=3.9.1,<4", + "tqdm>=4.66.4,<5", + "scipy>=1.13.1,<2", + "matplotlib>=3.9.0,<4", + "fire>=0.6.0,<0.7", +] [build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +requires = ["hatchling"] +build-backend = "hatchling.build" [tool.black] line-length = 120 @@ -73,4 +78,4 @@ disable_error_code = ["attr-defined"] [[tool.mypy.overrides]] module = "kvpress.pipeline" -disable_error_code = ["attr-defined", "assignment", "override"] \ No newline at end of file +disable_error_code = ["attr-defined", "assignment", "override"] From fdcf270f6551d0f29761387c06b04c2734c95651 Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Fri, 1 Aug 2025 12:11:14 +0000 Subject: [PATCH 02/10] transformers dependency Signed-off-by: alessiodevoto --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6f4b6d66..8c190e7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ readme = "README.md" dependencies = [ "numpy>=2.0.0,<3", "torch>=2.3.1,<3", - "transformers>=4.48.0, <4.54", + "transformers>=4.48.0, <4.54.0", "sentencepiece>=0.2.0,<0.3", "protobuf>=5.27.2,<6", "datasets>=2.21.0,<3", From f85e55e04f2233b7bde812df4943c787e6f01602 Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Fri, 1 Aug 2025 12:12:06 +0000 Subject: [PATCH 03/10] typo Signed-off-by: alessiodevoto --- .flake8 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.flake8 b/.flake8 index 26639104..e63af634 100644 --- a/.flake8 +++ b/.flake8 @@ -1,7 +1,6 @@ [flake8] -exclude = .venv,venv,.git,__pycache__,build,dist, .mypy_cache -max-line-length = 120 -exclude = .venv,venv,.git,__pycache__,build,dist +exclude = .venv,venv,.git,__pycache__,build,dist, .mypy_cache, .pytest_cache +max-line-length = 120 per-file-ignores = __init__.py:F401 evaluation/benchmarks/infinite_bench/create_huggingface_dataset.py:E501 From f863a61bbd4077edd277d879c6794d008adde7bd Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Fri, 1 Aug 2025 12:55:00 +0000 Subject: [PATCH 04/10] better dependecy groups Signed-off-by: alessiodevoto --- .github/workflows/style.yml | 4 ++-- .github/workflows/test.yml | 5 ++--- pyproject.toml | 14 ++++++++------ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 75cec708..3b422cc2 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -18,10 +18,10 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v6 with: - version: "latest" + enable_cache: true - name: Install dependencies - run: uv sync --group dev + run: uv sync --locked --all-groups - name: Run style checks run: make style diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e98d66c9..7dda6ef9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,7 +2,6 @@ name: Test on: push: - branches: [ main ] pull_request: jobs: @@ -18,9 +17,9 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v6 with: - version: "latest" + enable_cache: true - name: Install dependencies - run: uv sync --group dev + run: uv sync --locked --all-groups - run: make test diff --git a/pyproject.toml b/pyproject.toml index 8c190e7e..37e0f22d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,14 @@ dependencies = [ ] [dependency-groups] +eval = [ + "rouge>=1.0.1,<2", + "nltk>=3.9.1,<4", + "tqdm>=4.66.4,<5", + "scipy>=1.13.1,<2", + "fire>=0.6.0,<0.7", + "bert-score>=0.3.13,<0.4", +] dev = [ "pytest>=7.0.0,<8", "flake8>=7.0.0,<8", @@ -38,13 +46,7 @@ dev = [ "ipykernel>=6.29.4,<7", "bs4>=0.0.2,<0.0.3", "nvitop>=1.3.2,<2", - "bert-score>=0.3.13,<0.4", - "rouge>=1.0.1,<2", - "nltk>=3.9.1,<4", - "tqdm>=4.66.4,<5", - "scipy>=1.13.1,<2", "matplotlib>=3.9.0,<4", - "fire>=0.6.0,<0.7", ] [build-system] From 6dfa2d528d185ddf76991eb4b624a9dd8b1e4868 Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Fri, 1 Aug 2025 12:59:34 +0000 Subject: [PATCH 05/10] remove lock Signed-off-by: alessiodevoto --- .github/workflows/style.yml | 2 +- .github/workflows/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 3b422cc2..ca254d5b 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -21,7 +21,7 @@ jobs: enable_cache: true - name: Install dependencies - run: uv sync --locked --all-groups + run: uv sync --all-groups - name: Run style checks run: make style diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7dda6ef9..f6ed8a9a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,6 +20,6 @@ jobs: enable_cache: true - name: Install dependencies - run: uv sync --locked --all-groups + run: uv sync --all-groups - run: make test From d1edb165af08f6b28f5810a3817b961f8d83c8a6 Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Fri, 1 Aug 2025 13:11:47 +0000 Subject: [PATCH 06/10] poetry --> uv Signed-off-by: alessiodevoto --- .github/workflows/style.yml | 2 +- .github/workflows/test.yml | 3 ++- README.md | 2 +- evaluation/README.md | 1 + pyproject.toml | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index ca254d5b..8ba50a9c 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -18,7 +18,7 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v6 with: - enable_cache: true + enable-cache: true - name: Install dependencies run: uv sync --all-groups diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f6ed8a9a..289fe312 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,6 +2,7 @@ name: Test on: push: + branches: [ main ] pull_request: jobs: @@ -17,7 +18,7 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v6 with: - enable_cache: true + enable-cache: true - name: Install dependencies run: uv sync --all-groups diff --git a/README.md b/README.md index fbef4fc9..11e991e8 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ For a local installation with all dev dependencies, use uv: ```bash git clone https://github.com/NVIDIA/kvpress.git cd kvpress -uv sync --dev +uv sync --all-groups ``` ## Usage diff --git a/evaluation/README.md b/evaluation/README.md index d5ba00e1..bd78f400 100644 --- a/evaluation/README.md +++ b/evaluation/README.md @@ -5,6 +5,7 @@ We support evaluation for all the presses implemented in the library, on a variety of popular benchmarks. ### Quick Start 🚀 +> Evaluation requires some additional packages. You can install them with `uv sync --group eval` Running evaluation is straightforward! Make sure you are in the `evaluation` directory, then: diff --git a/pyproject.toml b/pyproject.toml index 37e0f22d..647161c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,7 @@ skip = ["venv", ".venv"] ignore_missing_imports = true allow_redefinition = true strict_optional = false -exclude = "(.eggs|.git|.hg|.mypy_cache|.nox|.tox|venv|.venv|doc-venv|.svn|_build|buck-out|build|dist|notebooks|tools|tmp|tests|bundles)" +exclude = "(.eggs|.git|.hg|.mypy_cache|.nox|.tox|venv|.venv|doc-venv|.svn|_build|buck-out|build|dist|notebooks|tools|tmp|tests|bundles|.pytest_cache|reports)" disable_error_code = ["union-attr", "operator", "call-overload", "arg-type"] [[tool.mypy.overrides]] From 45cca5f8bdd5216b841255a6c6d5b96e693cb64c Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Fri, 1 Aug 2025 13:22:49 +0000 Subject: [PATCH 07/10] fix docstrings and disable thinking Signed-off-by: alessiodevoto --- kvpress/pipeline.py | 5 ++++- kvpress/presses/block_press.py | 7 ++++--- kvpress/presses/keydiff_press.py | 6 ++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/kvpress/pipeline.py b/kvpress/pipeline.py index c4da1e4a..606f8575 100644 --- a/kvpress/pipeline.py +++ b/kvpress/pipeline.py @@ -135,7 +135,10 @@ def preprocess( else: separator = "\n" + "#" * len(context) context = self.tokenizer.apply_chat_template( - [{"role": "user", "content": context + separator}], add_generation_prompt=True, tokenize=False + [{"role": "user", "content": context + separator}], + add_generation_prompt=True, + tokenize=False, + enable_thinking=False, ) context, question_suffix = context.split(separator) diff --git a/kvpress/presses/block_press.py b/kvpress/presses/block_press.py index 6ae50ca5..6d8da788 100644 --- a/kvpress/presses/block_press.py +++ b/kvpress/presses/block_press.py @@ -16,10 +16,11 @@ class BlockPress(BasePress): BlockPress: Block-wise iterative KV cache compression. Applies compression in fixed-size blocks. Iteratively scores and prunes tokens block by block, maintaining - a buffer of previously kept tokens for context. Mathematically equivalent - to global compression when scoring uses only local information. + a buffer of previously kept tokens for context. Mathematically equivalent to global compression when + scoring uses only local information. It was introduced in the KeyDiff paper as part of the KeyDiff press, + but it can also work as a standalone press. - Based on BlockPress (https://arxiv.org/abs/2504.15364). + Based on the KeyDiff paper (https://arxiv.org/abs/2504.15364). Parameters ---------- diff --git a/kvpress/presses/keydiff_press.py b/kvpress/presses/keydiff_press.py index 3932eea9..b2bba83f 100644 --- a/kvpress/presses/keydiff_press.py +++ b/kvpress/presses/keydiff_press.py @@ -21,6 +21,12 @@ class KeyDiffPress(ScorerPress): Based on KeyDiff (https://arxiv.org/abs/2504.15364). + Note: The original press in the KeyDiff paper implements a block-wise iterative compression. + In KVPress, the iterative compression is implemented in the BlockPress class. + Therefore, to replicate the paper's implementation, please use: + + `press = BlockPress(press=KeyDiffPress(compression_ratio=0.x), block_size=N)` + Parameters ---------- compression_ratio : float, default=0.0 From 471bbd44a3688a13114238e39781c119fce4866a Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Tue, 5 Aug 2025 16:40:17 +0000 Subject: [PATCH 08/10] update uv Signed-off-by: alessiodevoto --- .flake8 | 4 ++-- .github/workflows/python-publish.yml | 18 +++++++---------- README.md | 30 +++++++++++++++++++++------- kvpress/presses/kvzip_press.py | 5 ++++- pyproject.toml | 15 +++++++++++--- 5 files changed, 48 insertions(+), 24 deletions(-) diff --git a/.flake8 b/.flake8 index e63af634..456a8107 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,6 @@ [flake8] -exclude = .venv,venv,.git,__pycache__,build,dist, .mypy_cache, .pytest_cache -max-line-length = 120 +exclude = .venv,venv,.git,__pycache__,build,dist,.mypy_cache,.pytest_cache +max-line-length = 120 per-file-ignores = __init__.py:F401 evaluation/benchmarks/infinite_bench/create_huggingface_dataset.py:E501 diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 6a67207a..8ef09a04 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -3,6 +3,7 @@ name: Upload Python Package on: release: types: [published] + pull_request: permissions: contents: read @@ -18,14 +19,9 @@ jobs: uses: actions/setup-python@v3 with: python-version: 3.10.11 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Build package - run: python -m build - - name: Publish package - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + + - name: Install uv + uses: astral-sh/setup-uv@v6 + + - name: Build + run: uv build --no-sources \ No newline at end of file diff --git a/README.md b/README.md index 11e991e8..192178f3 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,6 @@ Deploying long-context LLMs is costly due to the linear growth of the key-value pip install kvpress ``` -If possible, install flash attention: -```bash -pip install flash-attn --no-build-isolation -``` - For a local installation with all dev dependencies, use uv: ```bash @@ -31,7 +26,7 @@ uv sync --all-groups ## Usage -kvpress provides a set of "presses" that compress the KV cache during the prefilling-phase. Each press is associated with a `compression_ratio` attribute that measures the compression of the cache. The easiest way to use a press is through our custom `KVPressTextGenerationPipeline`. It is automatically registered as a transformers pipeline with the name "kv-press-text-generation" when kvpress is imported and handles chat templates and tokenization for you: +KVPress provides a set of "presses" that compress the KV cache during the prefilling-phase. Each press is associated with a `compression_ratio` attribute that measures the compression of the cache. The easiest way to use a press is through our custom `KVPressTextGenerationPipeline`. It is automatically registered as a transformers pipeline with the name "kv-press-text-generation" when kvpress is imported and handles chat templates and tokenization for you: ```python from transformers import pipeline @@ -208,4 +203,25 @@ with press(model): However, the `generate` method does not allow to exclude the question from the compression, which would artificially favors methods such as SnapKV. Ideally, we want a compression method that works whatever comes after the context (_e.g._ for use cases such as chat or document question answering). Finally the `generate` method does not allow to provide generation for multiple questions at once. - \ No newline at end of file + + + +## Advances installation settings +To install optional packages, you can use [uv](https://docs.astral.sh/uv/). +To install with flash attention, just run: + +```bash +git clone https://github.com/NVIDIA/kvpress.git +cd kvpress +uv sync --extra flash-attn +``` + +To install with dependencies for evaluation, run + +```bash +git clone https://github.com/NVIDIA/kvpress.git +cd kvpress +uv sync --extra eval +``` + +Notice that optional dependecies can be combined. Installation with uv is editable by default, meaning that changes to local files are applied automatically. \ No newline at end of file diff --git a/kvpress/presses/kvzip_press.py b/kvpress/presses/kvzip_press.py index f41f7478..0c68fd79 100644 --- a/kvpress/presses/kvzip_press.py +++ b/kvpress/presses/kvzip_press.py @@ -101,7 +101,10 @@ def __call__(self, model: PreTrainedModel) -> Generator: dummy_context = "dummy context" separator = "\n" + "#" * len(dummy_context) temp_context = tokenizer.apply_chat_template( - [{"role": "user", "content": dummy_context + separator}], add_generation_prompt=True, tokenize=False + [{"role": "user", "content": dummy_context + separator}], + add_generation_prompt=True, + tokenize=False, + enable_thinking=False, ) context, suffix_text = temp_context.split(separator) prefix_text = context.split(dummy_context)[0] diff --git a/pyproject.toml b/pyproject.toml index 647161c7..bf9e226c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "kvpress" -version = "0.2.8" +version = "0.2.10" description = "Efficiently compress the KV cache of any pretrained transformer" authors = [ { name = "Simon Jegou" }, @@ -24,7 +24,7 @@ dependencies = [ "cachetools>=5.5.2,<6", ] -[dependency-groups] +[project.optional-dependencies] eval = [ "rouge>=1.0.1,<2", "nltk>=3.9.1,<4", @@ -33,6 +33,11 @@ eval = [ "fire>=0.6.0,<0.7", "bert-score>=0.3.13,<0.4", ] +flash-attn = [ + "flash-attn" +] + +[dependency-groups] dev = [ "pytest>=7.0.0,<8", "flake8>=7.0.0,<8", @@ -49,10 +54,14 @@ dev = [ "matplotlib>=3.9.0,<4", ] + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" +[tool.uv] +no-build-isolation-package = ["flash-attn"] + [tool.black] line-length = 120 target_version = ["py310"] @@ -80,4 +89,4 @@ disable_error_code = ["attr-defined"] [[tool.mypy.overrides]] module = "kvpress.pipeline" -disable_error_code = ["attr-defined", "assignment", "override"] +disable_error_code = ["attr-defined", "assignment", "override"] \ No newline at end of file From ce4233718954e04e29eb72fdc62592d6eefe7265 Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Tue, 5 Aug 2025 16:49:27 +0000 Subject: [PATCH 09/10] update publish pipeline Signed-off-by: alessiodevoto --- .github/workflows/python-publish.yml | 9 +++++++-- README.md | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 8ef09a04..b8819c88 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -3,7 +3,6 @@ name: Upload Python Package on: release: types: [published] - pull_request: permissions: contents: read @@ -24,4 +23,10 @@ jobs: uses: astral-sh/setup-uv@v6 - name: Build - run: uv build --no-sources \ No newline at end of file + run: uv build --no-sources + + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md index 192178f3..2a4c08da 100644 --- a/README.md +++ b/README.md @@ -206,7 +206,7 @@ However, the `generate` method does not allow to exclude the question from the c -## Advances installation settings +## Advanced installation settings To install optional packages, you can use [uv](https://docs.astral.sh/uv/). To install with flash attention, just run: From e8babd1b4142ec6c5386053ea780ba478bdd1729 Mon Sep 17 00:00:00 2001 From: alessiodevoto Date: Tue, 5 Aug 2025 16:52:31 +0000 Subject: [PATCH 10/10] readme Signed-off-by: alessiodevoto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2a4c08da..4e412ae5 100644 --- a/README.md +++ b/README.md @@ -224,4 +224,4 @@ cd kvpress uv sync --extra eval ``` -Notice that optional dependecies can be combined. Installation with uv is editable by default, meaning that changes to local files are applied automatically. \ No newline at end of file +Notice that optional dependecies can be combined. \ No newline at end of file