Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions vlmrun/cli/_cli/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,9 @@ def chat(
response_content = ""
usage_data: Optional[Dict[str, Any]] = None
response_id: Optional[str] = None
extra_body: Optional[Dict[str, Any]] = {"session_id": session_id} if session_id else None
extra_body: Optional[Dict[str, Any]] = (
{"session_id": session_id} if session_id else None
)

start_time = time.time()

Expand All @@ -613,7 +615,7 @@ def chat(
),
handle_api_errors(),
):

response = client.agent.completions.create(
model=model,
messages=messages,
Expand Down
35 changes: 32 additions & 3 deletions vlmrun/cli/_cli/generate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Generation API commands."""

from pathlib import Path
from typing import List

import typer
from PIL import Image
Expand All @@ -9,6 +10,28 @@
from vlmrun.client import VLMRun
from vlmrun.client.types import PredictionResponse
from vlmrun.common.image import _open_image_with_exif
from vlmrun.common.tiff import tiff_images


def _is_tiff_file(path: Path) -> bool:
"""Check if a file is a TIFF file."""
return path.suffix.lower() in (".tif", ".tiff")


def _load_images_from_file(path: Path) -> List[Image.Image]:
"""Load images from a file, handling multi-page TIFFs.

Args:
path: Path to the image file

Returns:
List of PIL Image objects
"""
if _is_tiff_file(path):
return [page.image for page in tiff_images(path)]
else:
return [_open_image_with_exif(path)]


app = typer.Typer(help="Generation operations", no_args_is_help=True)

Expand All @@ -28,8 +51,8 @@ def image(
if not Path(image).is_file():
raise typer.Abort(f"Image file does not exist: {image}")

img: Image.Image = _open_image_with_exif(image)
response: PredictionResponse = client.image.generate(images=[img], domain=domain)
images: List[Image.Image] = _load_images_from_file(image)
response: PredictionResponse = client.image.generate(images=images, domain=domain)
rprint(response)


Expand All @@ -48,5 +71,11 @@ def document(
if not Path(path).is_file():
raise typer.Abort(f"Document file does not exist: {path}")

response = client.document.generate(file=path, domain=domain)
if _is_tiff_file(path):
images: List[Image.Image] = [page.image for page in tiff_images(path)]
response: PredictionResponse = client.image.generate(
images=images, domain=domain
)
else:
response = client.document.generate(file=path, domain=domain)
rprint(response)
49 changes: 49 additions & 0 deletions vlmrun/common/tiff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, Union

from PIL import Image

from vlmrun.common.logging import logger


@dataclass
class TiffPage:
image: Image.Image
"""The image of the page."""
page_number: int
"""The page number of the page."""


def tiff_images(
path: Path,
) -> Iterable[Union[Image.Image, TiffPage]]:
"""Extract images from a multi-page TIFF file.

Args:
path: Path to the TIFF file

Yields:
TiffPage objects containing the image and page number
"""
if path.suffix.lower() not in (".tif", ".tiff"):
raise ValueError(
f"Unsupported file type: {path.suffix}. Supported types are .tif, .tiff"
)

logger.debug(
f"Extracting images from TIFF [path={path}, size={path.stat().st_size / 1024 / 1024:.2f} MB]"
)

img = Image.open(path)
n_frames = getattr(img, "n_frames", 1)

logger.debug(f"TIFF has {n_frames} frame(s)")

for i in range(n_frames):
img.seek(i)
frame = img.copy().convert("RGB")
yield TiffPage(image=frame, page_number=i)

img.close()
logger.debug(f"Closed TIFF file [path={path}]")
13 changes: 11 additions & 2 deletions vlmrun/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,17 @@
VLMRUN_TMP_DIR.mkdir(parents=True, exist_ok=True)

SUPPORTED_VIDEO_FILETYPES = [".mp4", ".mov", ".avi", ".mkv", ".webm"]
SUPPORTED_IMAGE_FILETYPES = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"]
SUPPORTED_DOCUMENT_FILETYPES = [".pdf", ".doc", ".docx"]
SUPPORTED_IMAGE_FILETYPES = [
".jpg",
".jpeg",
".png",
".gif",
".bmp",
".tif",
".tiff",
".webp",
]
SUPPORTED_DOCUMENT_FILETYPES = [".pdf", ".doc", ".docx", ".tif", ".tiff"]
SUPPORTED_AUDIO_FILETYPES = [".mp3", ".wav", ".m4a", ".flac", ".ogg"]

# All supported file types for the chat CLI
Expand Down
2 changes: 1 addition & 1 deletion vlmrun/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.4"
__version__ = "0.17.18"