From f8d0734d4927cf73f3c940c26b19478c56277ccd Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 28 Jan 2026 22:13:17 +0000 Subject: [PATCH] feat: Add TIFF support to document and image generate routes - Add tiff.py utility for extracting pages from multi-page TIFF files - Add .tif to SUPPORTED_IMAGE_FILETYPES - Add .tif and .tiff to SUPPORTED_DOCUMENT_FILETYPES - Update CLI generate.py to handle multi-page TIFFs in both image and document commands - Update version to 0.17.18 Co-Authored-By: dinesh@vlm.run --- vlmrun/cli/_cli/chat.py | 6 +++-- vlmrun/cli/_cli/generate.py | 35 +++++++++++++++++++++++--- vlmrun/common/tiff.py | 49 +++++++++++++++++++++++++++++++++++++ vlmrun/constants.py | 13 ++++++++-- vlmrun/version.py | 2 +- 5 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 vlmrun/common/tiff.py diff --git a/vlmrun/cli/_cli/chat.py b/vlmrun/cli/_cli/chat.py index 1741b1e..300398c 100644 --- a/vlmrun/cli/_cli/chat.py +++ b/vlmrun/cli/_cli/chat.py @@ -598,7 +598,9 @@ def chat( response_content = "" usage_data: Optional[Dict[str, Any]] = None response_id: Optional[str] = None - extra_body: Optional[Dict[str, Any]] = {"session_id": session_id} if session_id else None + extra_body: Optional[Dict[str, Any]] = ( + {"session_id": session_id} if session_id else None + ) start_time = time.time() @@ -613,7 +615,7 @@ def chat( ), handle_api_errors(), ): - + response = client.agent.completions.create( model=model, messages=messages, diff --git a/vlmrun/cli/_cli/generate.py b/vlmrun/cli/_cli/generate.py index ff16b39..6572cad 100644 --- a/vlmrun/cli/_cli/generate.py +++ b/vlmrun/cli/_cli/generate.py @@ -1,6 +1,7 @@ """Generation API commands.""" from pathlib import Path +from typing import List import typer from PIL import Image @@ -9,6 +10,28 @@ from vlmrun.client import VLMRun from vlmrun.client.types import PredictionResponse from vlmrun.common.image import _open_image_with_exif +from vlmrun.common.tiff import tiff_images + + +def _is_tiff_file(path: Path) -> bool: + """Check if a file is a TIFF file.""" + return path.suffix.lower() in (".tif", ".tiff") + + +def _load_images_from_file(path: Path) -> List[Image.Image]: + """Load images from a file, handling multi-page TIFFs. + + Args: + path: Path to the image file + + Returns: + List of PIL Image objects + """ + if _is_tiff_file(path): + return [page.image for page in tiff_images(path)] + else: + return [_open_image_with_exif(path)] + app = typer.Typer(help="Generation operations", no_args_is_help=True) @@ -28,8 +51,8 @@ def image( if not Path(image).is_file(): raise typer.Abort(f"Image file does not exist: {image}") - img: Image.Image = _open_image_with_exif(image) - response: PredictionResponse = client.image.generate(images=[img], domain=domain) + images: List[Image.Image] = _load_images_from_file(image) + response: PredictionResponse = client.image.generate(images=images, domain=domain) rprint(response) @@ -48,5 +71,11 @@ def document( if not Path(path).is_file(): raise typer.Abort(f"Document file does not exist: {path}") - response = client.document.generate(file=path, domain=domain) + if _is_tiff_file(path): + images: List[Image.Image] = [page.image for page in tiff_images(path)] + response: PredictionResponse = client.image.generate( + images=images, domain=domain + ) + else: + response = client.document.generate(file=path, domain=domain) rprint(response) diff --git a/vlmrun/common/tiff.py b/vlmrun/common/tiff.py new file mode 100644 index 0000000..fb7ef86 --- /dev/null +++ b/vlmrun/common/tiff.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, Union + +from PIL import Image + +from vlmrun.common.logging import logger + + +@dataclass +class TiffPage: + image: Image.Image + """The image of the page.""" + page_number: int + """The page number of the page.""" + + +def tiff_images( + path: Path, +) -> Iterable[Union[Image.Image, TiffPage]]: + """Extract images from a multi-page TIFF file. + + Args: + path: Path to the TIFF file + + Yields: + TiffPage objects containing the image and page number + """ + if path.suffix.lower() not in (".tif", ".tiff"): + raise ValueError( + f"Unsupported file type: {path.suffix}. Supported types are .tif, .tiff" + ) + + logger.debug( + f"Extracting images from TIFF [path={path}, size={path.stat().st_size / 1024 / 1024:.2f} MB]" + ) + + img = Image.open(path) + n_frames = getattr(img, "n_frames", 1) + + logger.debug(f"TIFF has {n_frames} frame(s)") + + for i in range(n_frames): + img.seek(i) + frame = img.copy().convert("RGB") + yield TiffPage(image=frame, page_number=i) + + img.close() + logger.debug(f"Closed TIFF file [path={path}]") diff --git a/vlmrun/constants.py b/vlmrun/constants.py index a6670bf..466284c 100644 --- a/vlmrun/constants.py +++ b/vlmrun/constants.py @@ -18,8 +18,17 @@ VLMRUN_TMP_DIR.mkdir(parents=True, exist_ok=True) SUPPORTED_VIDEO_FILETYPES = [".mp4", ".mov", ".avi", ".mkv", ".webm"] -SUPPORTED_IMAGE_FILETYPES = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"] -SUPPORTED_DOCUMENT_FILETYPES = [".pdf", ".doc", ".docx"] +SUPPORTED_IMAGE_FILETYPES = [ + ".jpg", + ".jpeg", + ".png", + ".gif", + ".bmp", + ".tif", + ".tiff", + ".webp", +] +SUPPORTED_DOCUMENT_FILETYPES = [".pdf", ".doc", ".docx", ".tif", ".tiff"] SUPPORTED_AUDIO_FILETYPES = [".mp3", ".wav", ".m4a", ".flac", ".ogg"] # All supported file types for the chat CLI diff --git a/vlmrun/version.py b/vlmrun/version.py index 6b27eee..bf4d019 100644 --- a/vlmrun/version.py +++ b/vlmrun/version.py @@ -1 +1 @@ -__version__ = "0.5.4" +__version__ = "0.17.18"