From 9d0a2762c71cb7f07de6cca1db8a6e4fac634bed Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 28 Dec 2025 01:51:02 +0000 Subject: [PATCH 1/2] Add Docker support and Gradio web UI Add containerized deployment with NVIDIA CUDA support and a Gradio-based web interface for easier usage. Based on community PR #13. --- .dockerignore | 6 +++++ Dockerfile | 30 +++++++++++++++++++++++ compose.yml | 16 +++++++++++++ gradio_web.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 compose.yml create mode 100644 gradio_web.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..2890ee4d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +.dockerignore +.gitignore +*.md +Dockerfile +compose.yml +data/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..0f8bb0aa --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 + +# Install Python 3.13 +RUN apt-get update && apt-get install -y wget software-properties-common build-essential && apt-get clean && rm -rf /var/lib/apt/lists/* +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get update && apt-get install -y python3.13 python3.13-venv python3.13-dev ninja-build && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Install Sharp and dependencies +RUN mkdir /app +COPY pyproject.toml requirements.txt requirements.in /app/ +COPY src/ /app/src/ +WORKDIR /app +RUN python3.13 -m venv .venv +ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" +ENV FORCE_CUDA="1" +RUN .venv/bin/pip install ninja +RUN .venv/bin/pip install -r requirements.txt +RUN .venv/bin/pip install gradio +RUN ln -s /app/.venv/bin/sharp /usr/local/bin/sharp + +# Test run to download model and check if it works +RUN wget https://apple.github.io/ml-sharp/thumbnails/Unsplash_-5wkyNA2BPc_0000-0001.jpg -O /tmp/test.jpg +RUN sharp predict -i /tmp/test.jpg -o /tmp/test +RUN rm /tmp/test.jpg /tmp/test -rf + +# Copy other files +COPY . /app + +# Start Gradio web server +CMD [".venv/bin/python3.13", "-u", "/app/gradio_web.py"] diff --git a/compose.yml b/compose.yml new file mode 100644 index 00000000..bb24cacc --- /dev/null +++ b/compose.yml @@ -0,0 +1,16 @@ +services: + sharp: + build: + context: . + dockerfile: Dockerfile + volumes: + - ./data:/app/data + ports: + - "7860:7860" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] diff --git a/gradio_web.py b/gradio_web.py new file mode 100644 index 00000000..7d057740 --- /dev/null +++ b/gradio_web.py @@ -0,0 +1,66 @@ +import gradio as gr +import subprocess +import os +import shutil +import time + + +def predict(image): + # Ensure data directory exists + os.makedirs("/app/data", exist_ok=True) + + input_path = "/app/data/input.jpg" + + # Save/Copy input image + # image provided by gradio (type='filepath') is a temp path + shutil.copy(image, input_path) + + # Run sharp command + # sharp predict -i /app/data/input.jpg -o /app/data/output --render + cmd = [ + "sharp", + "predict", + "-i", + input_path, + "-o", + "/app/data/output", + "--render", + ] + + # Execute command + try: + t = time.time() + print("Sharp started") + subprocess.run(cmd, check=True, capture_output=True) + print(f"Sharp command took {round(time.time() - t, 3)} seconds") + except subprocess.CalledProcessError as e: + print(f"Error running sharp: {e}") + print(f"Stdout: {e.stdout.decode()}") + print(f"Stderr: {e.stderr.decode()}") + return None + + # Find output videos + rgb_video = "/app/data/output/input.mp4" + depth_video = "/app/data/output/input.depth.mp4" + + if os.path.exists(rgb_video) and os.path.exists(depth_video): + return rgb_video, depth_video + elif os.path.exists(rgb_video): + return rgb_video, None + + return None, None + + +demo = gr.Interface( + fn=predict, + inputs=gr.Image(type="filepath", label="Input Image"), + outputs=[gr.Video(label="RGB Video"), gr.Video(label="Depth Video")], + title="Sharp 3D View Synthesis", + description="Upload an image to generate a 3D view synthesis video.", +) + +if __name__ == "__main__": + print( + "Sharp Monocular View Synthesis in Less Than a Second (https://github.com/apple/ml-sharp)" + ) + demo.launch(server_name="0.0.0.0", server_port=7860) From b8bc98769f06d894f8e8adcdc7cb1acff0d27e15 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 28 Dec 2025 01:58:58 +0000 Subject: [PATCH 2/2] Improve security and add Docker user guide Security improvements: - Add optional authentication via environment variables - Run container as non-root user (sharp) - Add file type validation for uploads - Add file size limits (configurable, default 50MB) - Add concurrency limits to prevent resource exhaustion - Add resource limits in compose.yml (CPU/memory) - Improve error handling (log details server-side only) - Add request timeout (5 minutes) - Add container healthcheck Documentation: - Add comprehensive DOCKER.md user guide - Document all environment variables - Include production deployment recommendations - Add troubleshooting section --- DOCKER.md | 262 ++++++++++++++++++++++++++++++++++++++++++++++++++ Dockerfile | 26 +++-- compose.yml | 18 ++++ gradio_web.py | 185 ++++++++++++++++++++++++++++------- 4 files changed, 451 insertions(+), 40 deletions(-) create mode 100644 DOCKER.md diff --git a/DOCKER.md b/DOCKER.md new file mode 100644 index 00000000..5fd35bbd --- /dev/null +++ b/DOCKER.md @@ -0,0 +1,262 @@ +# Docker Setup Guide for SHARP + +This guide covers running SHARP using Docker with the Gradio web interface. + +## Prerequisites + +- **Docker** 20.10 or later +- **Docker Compose** v2.0 or later +- **NVIDIA GPU** with CUDA support +- **NVIDIA Container Toolkit** ([installation guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)) + +### Verify NVIDIA Container Toolkit + +```bash +docker run --rm --gpus all nvidia/cuda:12.9.1-base-ubuntu24.04 nvidia-smi +``` + +## Quick Start + +```bash +# Clone the repository +git clone https://github.com/apple/ml-sharp.git +cd ml-sharp + +# Build and run +docker compose up --build +``` + +Open http://localhost:7860 in your browser. + +## Configuration + +### Environment Variables + +Configure the application by setting environment variables in `compose.yml`: + +| Variable | Default | Description | +|----------|---------|-------------| +| `SHARP_AUTH_USERNAME` | *(none)* | Username for web authentication | +| `SHARP_AUTH_PASSWORD` | *(none)* | Password for web authentication | +| `SHARP_MAX_FILE_SIZE_MB` | `50` | Maximum upload file size in MB | +| `SHARP_PORT` | `7860` | Web server port | +| `SHARP_DATA_DIR` | `/app/data` | Data directory inside container | + +### Enabling Authentication + +Edit `compose.yml` to uncomment and set credentials: + +```yaml +environment: + - SHARP_AUTH_USERNAME=admin + - SHARP_AUTH_PASSWORD=your-secure-password +``` + +**Important:** Always enable authentication when exposing the service to a network. + +### Resource Limits + +The default configuration limits resources to prevent abuse: + +```yaml +deploy: + resources: + limits: + cpus: "8" + memory: 32G +``` + +Adjust these based on your hardware capabilities. + +## Usage + +### Web Interface + +1. Navigate to http://localhost:7860 +2. Upload an image (JPEG, PNG, GIF, BMP, or WebP) +3. Wait for processing (typically under 1 second on GPU) +4. Download the generated RGB and depth videos + +### Supported Image Formats + +- JPEG/JPG +- PNG +- GIF +- BMP +- WebP + +Maximum file size: 50MB (configurable) + +### Output + +The web interface generates two videos: +- **RGB Video**: Photorealistic 3D view synthesis +- **Depth Video**: Depth map visualization + +Output files are saved to the `./data` directory on your host machine. + +## Advanced Usage + +### Running in Background + +```bash +docker compose up -d --build +``` + +### Viewing Logs + +```bash +docker compose logs -f +``` + +### Stopping the Service + +```bash +docker compose down +``` + +### Rebuilding After Changes + +```bash +docker compose up --build --force-recreate +``` + +### Using a Custom Port + +Edit `compose.yml`: + +```yaml +ports: + - "8080:7860" # Access at localhost:8080 +``` + +Or set the environment variable: + +```yaml +environment: + - SHARP_PORT=8080 +ports: + - "8080:8080" +``` + +## Production Deployment + +### Security Recommendations + +1. **Enable Authentication**: Always set `SHARP_AUTH_USERNAME` and `SHARP_AUTH_PASSWORD` + +2. **Use a Reverse Proxy**: Deploy behind nginx or Traefik for: + - SSL/TLS termination + - Rate limiting + - Additional security headers + +3. **Network Isolation**: Bind to localhost and use a reverse proxy: + ```yaml + ports: + - "127.0.0.1:7860:7860" + ``` + +4. **Regular Updates**: Keep the Docker image updated for security patches + +### Example nginx Configuration + +```nginx +server { + listen 443 ssl http2; + server_name sharp.example.com; + + ssl_certificate /path/to/cert.pem; + ssl_certificate_key /path/to/key.pem; + + location / { + proxy_pass http://127.0.0.1:7860; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_read_timeout 300s; + } +} +``` + +## Troubleshooting + +### GPU Not Detected + +```bash +# Verify NVIDIA Container Toolkit +nvidia-ctk --version + +# Check Docker GPU access +docker run --rm --gpus all nvidia/cuda:12.9.1-base-ubuntu24.04 nvidia-smi +``` + +### Out of Memory + +Reduce resource limits or process smaller images: + +```yaml +environment: + - SHARP_MAX_FILE_SIZE_MB=20 +``` + +### Build Fails + +Ensure you have sufficient disk space (the image requires ~15GB): + +```bash +docker system df +docker system prune # Clean unused resources +``` + +### Container Won't Start + +Check logs for errors: + +```bash +docker compose logs sharp +``` + +### Permission Denied on Data Directory + +Ensure the `./data` directory is writable: + +```bash +mkdir -p data +chmod 755 data +``` + +## Development + +### Building Manually + +```bash +docker build -t sharp:latest . +``` + +### Running Without Compose + +```bash +docker run --gpus all -p 7860:7860 -v $(pwd)/data:/app/data sharp:latest +``` + +### Accessing Container Shell + +```bash +docker compose exec sharp bash +``` + +## Architecture + +The Docker setup includes: + +- **Base Image**: `nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04` +- **Python**: 3.13 +- **Security**: Runs as non-root user (`sharp`) +- **Web Interface**: Gradio on port 7860 +- **Model**: Auto-downloaded on first build (~cached in image) + +## License + +See the main [README.md](README.md) for license information. diff --git a/Dockerfile b/Dockerfile index 0f8bb0aa..8bf62fed 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,12 +5,17 @@ RUN apt-get update && apt-get install -y wget software-properties-common build-e RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update && apt-get install -y python3.13 python3.13-venv python3.13-dev ninja-build && apt-get clean && rm -rf /var/lib/apt/lists/* +# Create non-root user for security +RUN useradd -m -u 1000 -s /bin/bash sharp + # Install Sharp and dependencies -RUN mkdir /app -COPY pyproject.toml requirements.txt requirements.in /app/ -COPY src/ /app/src/ +RUN mkdir /app && chown sharp:sharp /app +COPY --chown=sharp:sharp pyproject.toml requirements.txt requirements.in /app/ +COPY --chown=sharp:sharp src/ /app/src/ WORKDIR /app -RUN python3.13 -m venv .venv + +# Create virtual environment and install dependencies +RUN python3.13 -m venv .venv && chown -R sharp:sharp .venv ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" ENV FORCE_CUDA="1" RUN .venv/bin/pip install ninja @@ -23,8 +28,17 @@ RUN wget https://apple.github.io/ml-sharp/thumbnails/Unsplash_-5wkyNA2BPc_0000-0 RUN sharp predict -i /tmp/test.jpg -o /tmp/test RUN rm /tmp/test.jpg /tmp/test -rf -# Copy other files -COPY . /app +# Copy other files and set ownership +COPY --chown=sharp:sharp . /app + +# Create data directory with proper permissions +RUN mkdir -p /app/data && chown -R sharp:sharp /app/data + +# Switch to non-root user +USER sharp + +# Expose port +EXPOSE 7860 # Start Gradio web server CMD [".venv/bin/python3.13", "-u", "/app/gradio_web.py"] diff --git a/compose.yml b/compose.yml index bb24cacc..9f5a0b93 100644 --- a/compose.yml +++ b/compose.yml @@ -7,10 +7,28 @@ services: - ./data:/app/data ports: - "7860:7860" + environment: + # Optional: Set authentication credentials + # SHARP_AUTH_USERNAME: "admin" + # SHARP_AUTH_PASSWORD: "changeme" + # Optional: Configure limits + # SHARP_MAX_FILE_SIZE_MB: "50" + # SHARP_PORT: "7860" + - NVIDIA_VISIBLE_DEVICES=all deploy: resources: + limits: + cpus: "8" + memory: 32G reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7860/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s diff --git a/gradio_web.py b/gradio_web.py index 7d057740..0530ffa7 100644 --- a/gradio_web.py +++ b/gradio_web.py @@ -1,66 +1,183 @@ -import gradio as gr -import subprocess +"""Gradio web interface for SHARP 3D view synthesis.""" + +import imghdr +import logging import os import shutil +import subprocess import time +from pathlib import Path + +import gradio as gr + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + +# Configuration via environment variables +DATA_DIR = Path(os.getenv("SHARP_DATA_DIR", "/app/data")) +OUTPUT_DIR = DATA_DIR / "output" +ALLOWED_IMAGE_TYPES = {"jpeg", "png", "gif", "bmp", "webp"} +MAX_FILE_SIZE_MB = int(os.getenv("SHARP_MAX_FILE_SIZE_MB", "50")) + + +def validate_image(image_path: str) -> None: + """Validate that the uploaded file is a valid image. + + Args: + image_path: Path to the uploaded image file. + + Raises: + ValueError: If the file is not a valid image type. + """ + file_type = imghdr.what(image_path) + if file_type not in ALLOWED_IMAGE_TYPES: + raise ValueError( + f"Invalid file type: {file_type}. " + f"Allowed types: {', '.join(sorted(ALLOWED_IMAGE_TYPES))}" + ) + + # Check file size + file_size_mb = os.path.getsize(image_path) / (1024 * 1024) + if file_size_mb > MAX_FILE_SIZE_MB: + raise ValueError( + f"File too large: {file_size_mb:.1f}MB. Maximum: {MAX_FILE_SIZE_MB}MB" + ) + + +def predict(image: str) -> tuple[str | None, str | None]: + """Process an image through SHARP to generate 3D view synthesis videos. + Args: + image: Path to the input image file (provided by Gradio). + + Returns: + Tuple of (rgb_video_path, depth_video_path) or (None, None) on error. + """ + if image is None: + return None, None + + try: + # Validate the uploaded image + validate_image(image) + except ValueError as e: + logger.warning(f"Image validation failed: {e}") + raise gr.Error(str(e)) -def predict(image): # Ensure data directory exists - os.makedirs("/app/data", exist_ok=True) + DATA_DIR.mkdir(parents=True, exist_ok=True) + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - input_path = "/app/data/input.jpg" + input_path = DATA_DIR / "input.jpg" - # Save/Copy input image - # image provided by gradio (type='filepath') is a temp path + # Copy input image shutil.copy(image, input_path) # Run sharp command - # sharp predict -i /app/data/input.jpg -o /app/data/output --render cmd = [ "sharp", "predict", "-i", - input_path, + str(input_path), "-o", - "/app/data/output", + str(OUTPUT_DIR), "--render", ] # Execute command try: t = time.time() - print("Sharp started") - subprocess.run(cmd, check=True, capture_output=True) - print(f"Sharp command took {round(time.time() - t, 3)} seconds") + logger.info("Starting SHARP prediction") + result = subprocess.run(cmd, check=True, capture_output=True, timeout=300) + elapsed = round(time.time() - t, 3) + logger.info(f"SHARP prediction completed in {elapsed} seconds") + except subprocess.TimeoutExpired: + logger.error("SHARP prediction timed out after 5 minutes") + raise gr.Error("Processing timed out. Please try a smaller image.") except subprocess.CalledProcessError as e: - print(f"Error running sharp: {e}") - print(f"Stdout: {e.stdout.decode()}") - print(f"Stderr: {e.stderr.decode()}") - return None + # Log detailed error server-side only + logger.error(f"SHARP command failed with exit code {e.returncode}") + logger.error(f"stdout: {e.stdout.decode() if e.stdout else 'N/A'}") + logger.error(f"stderr: {e.stderr.decode() if e.stderr else 'N/A'}") + # Return generic error to user + raise gr.Error("Failed to process image. Please try again with a different image.") # Find output videos - rgb_video = "/app/data/output/input.mp4" - depth_video = "/app/data/output/input.depth.mp4" + rgb_video = OUTPUT_DIR / "input.mp4" + depth_video = OUTPUT_DIR / "input.depth.mp4" - if os.path.exists(rgb_video) and os.path.exists(depth_video): - return rgb_video, depth_video - elif os.path.exists(rgb_video): - return rgb_video, None + rgb_path = str(rgb_video) if rgb_video.exists() else None + depth_path = str(depth_video) if depth_video.exists() else None - return None, None + if rgb_path: + return rgb_path, depth_path + logger.warning("No output videos were generated") + raise gr.Error("No output was generated. Please try a different image.") + + +def create_demo() -> gr.Interface: + """Create and configure the Gradio interface. + + Returns: + Configured Gradio Interface instance. + """ + return gr.Interface( + fn=predict, + inputs=gr.Image(type="filepath", label="Input Image"), + outputs=[ + gr.Video(label="RGB Video"), + gr.Video(label="Depth Video"), + ], + title="SHARP 3D View Synthesis", + description=( + "Upload an image to generate a 3D view synthesis video. " + "SHARP creates photorealistic novel views from a single photograph." + ), + examples=[["data/teaser.jpg"]] if (DATA_DIR.parent / "data" / "teaser.jpg").exists() else None, + concurrency_limit=2, # Limit concurrent GPU operations + flagging_mode="never", # Disable flagging for security + ) + + +def get_auth() -> tuple[str, str] | None: + """Get authentication credentials from environment variables. + + Returns: + Tuple of (username, password) if configured, None otherwise. + """ + username = os.getenv("SHARP_AUTH_USERNAME") + password = os.getenv("SHARP_AUTH_PASSWORD") + if username and password: + return (username, password) + return None -demo = gr.Interface( - fn=predict, - inputs=gr.Image(type="filepath", label="Input Image"), - outputs=[gr.Video(label="RGB Video"), gr.Video(label="Depth Video")], - title="Sharp 3D View Synthesis", - description="Upload an image to generate a 3D view synthesis video.", -) if __name__ == "__main__": - print( - "Sharp Monocular View Synthesis in Less Than a Second (https://github.com/apple/ml-sharp)" + logger.info( + "SHARP - Sharp Monocular View Synthesis in Less Than a Second " + "(https://github.com/apple/ml-sharp)" + ) + + demo = create_demo() + + # Get optional authentication + auth = get_auth() + if auth: + logger.info("Authentication enabled") + else: + logger.warning( + "No authentication configured. Set SHARP_AUTH_USERNAME and " + "SHARP_AUTH_PASSWORD environment variables to enable authentication." + ) + + # Launch server + demo.launch( + server_name="0.0.0.0", + server_port=int(os.getenv("SHARP_PORT", "7860")), + auth=auth, + show_error=True, ) - demo.launch(server_name="0.0.0.0", server_port=7860)