diff --git a/README.md b/README.md
index 98d26bb4d3..a2e6337499 100644
--- a/README.md
+++ b/README.md
@@ -409,16 +409,16 @@ nm.deploy()
 nm.serve()
 ```
 
-### Deploy NeMo Multimodal Models Directly with Triton Inference Server
+### Deploy Megatron Multimodal Models Directly with Triton Inference Server
 
-You can also deploy NeMo multimodal models directly using Triton Inference Server without exporting to TensorRT-LLM. This provides a simpler deployment path while still leveraging Triton's scalable serving capabilities.
+You can also deploy Megatron multimodal models directly using Triton Inference Server without exporting to TensorRT-LLM. This provides a simpler deployment path while still leveraging Triton's scalable serving capabilities.
 
 ```python
 from nemo_deploy import DeployPyTriton
-from nemo_deploy.multimodal import NeMoMultimodalDeployable
+from nemo_deploy.multimodal import MegatronMultimodalDeployable
 
-model = NeMoMultimodalDeployable(
-    nemo_checkpoint_filepath="/path/to/model.nemo",
+model = MegatronMultimodalDeployable(
+    megatron_checkpoint_filepath="/path/to/model.nemo",
     tensor_parallel_size=1,
     pipeline_parallel_size=1,
 )
@@ -458,18 +458,17 @@ output = nq.query(
 print(output)
 ```
 
-### Query Directly Deployed NeMo Multimodal Models
+### Query Directly Deployed Megatron Multimodal Models
 
-For multimodal models deployed directly with `NeMoMultimodalDeployable`, use the `NemoQueryMultimodalPytorch` class:
+For multimodal models deployed directly with `MegatronMultimodalDeployable`, use the `NemoQueryMultimodalPytorch` class:
 
 ```python
 from nemo_deploy.multimodal import NemoQueryMultimodalPytorch
-from PIL import Image
 
 nq = NemoQueryMultimodalPytorch(url="localhost:8000", model_name="qwen")
 output = nq.query_multimodal(
     prompts=["What is in this image?"],
-    images=[Image.open("/path/to/image.jpg")],
+    images=["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"],
     max_length=100,
     top_k=1,
     top_p=0.0,
diff --git a/nemo_deploy/multimodal/nemo_multimodal_deployable.py b/nemo_deploy/multimodal/megatron_multimodal_deployable.py
similarity index 82%
rename from nemo_deploy/multimodal/nemo_multimodal_deployable.py
rename to nemo_deploy/multimodal/megatron_multimodal_deployable.py
index 5bcbb8a05a..3dc60d27c5 100644
--- a/nemo_deploy/multimodal/nemo_multimodal_deployable.py
+++ b/nemo_deploy/multimodal/megatron_multimodal_deployable.py
@@ -24,19 +24,19 @@
 from nemo_deploy import ITritonDeployable
 from nemo_deploy.utils import cast_output, str_ndarray2list
 from nemo_export_deploy_common.import_utils import (
-    MISSING_NEMO_MSG,
+    MISSING_MBRIDGE_MSG,
     MISSING_TRITON_MSG,
     UnavailableError,
     null_decorator,
 )
 
 try:
-    from nemo.collections.vlm.inference.base import generate, setup_model_and_tokenizer
-    from nemo.collections.vlm.inference.qwenvl_inference_wrapper import QwenVLInferenceWrapper
+    from megatron.bridge.inference.vlm.base import generate, setup_model_and_tokenizer
+    from megatron.bridge.inference.vlm.qwenvl_inference_wrapper import QwenVLInferenceWrapper
 
-    HAVE_NEMO = True
+    HAVE_MBRIDGE = True
 except (ImportError, ModuleNotFoundError):
-    HAVE_NEMO = False
+    HAVE_MBRIDGE = False
     from typing import Any
 
     generate = Any
@@ -67,42 +67,46 @@ def dict_to_str(messages):
     return json.dumps(messages)
 
 
-class NeMoMultimodalDeployable(ITritonDeployable):
-    """Triton inference server compatible deploy class for a NeMo multimodal model file.
+class MegatronMultimodalDeployable(ITritonDeployable):
+    """Triton inference server compatible deploy class for a Megatron multimodal model file.
 
     Args:
-        nemo_checkpoint_filepath (str): path for the nemo checkpoint.
-        tensor_parallel_size (int): tensor parallelism.
-        pipeline_parallel_size (int): pipeline parallelism.
+        megatron_checkpoint_filepath (str): path for the megatron checkpoint.
+        tensor_model_parallel_size (int): tensor parallelism.
+        pipeline_model_parallel_size (int): pipeline parallelism.
         params_dtype (torch.dtype): data type for model parameters.
         inference_batch_times_seqlen_threshold (int): sequence threshold.
+        inference_max_seq_length (int): maximum sequence length for inference.
     """
 
     def __init__(
         self,
-        nemo_checkpoint_filepath: str = None,
-        tensor_parallel_size: int = 1,
-        pipeline_parallel_size: int = 1,
+        megatron_checkpoint_filepath: str,
+        tensor_model_parallel_size: int = 1,
+        pipeline_model_parallel_size: int = 1,
         params_dtype: torch.dtype = torch.bfloat16,
         inference_batch_times_seqlen_threshold: int = 1000,
+        inference_max_seq_length: int = 8192,
     ):
         if not HAVE_TRITON:
             raise UnavailableError(MISSING_TRITON_MSG)
-        if not HAVE_NEMO:
-            raise UnavailableError(MISSING_NEMO_MSG)
+        if not HAVE_MBRIDGE:
+            raise UnavailableError(MISSING_MBRIDGE_MSG)
 
-        self.nemo_checkpoint_filepath = nemo_checkpoint_filepath
-        self.tensor_parallel_size = tensor_parallel_size
-        self.pipeline_parallel_size = pipeline_parallel_size
+        self.megatron_checkpoint_filepath = megatron_checkpoint_filepath
+        self.tensor_model_parallel_size = tensor_model_parallel_size
+        self.pipeline_model_parallel_size = pipeline_model_parallel_size
         self.params_dtype = params_dtype
         self.inference_batch_times_seqlen_threshold = inference_batch_times_seqlen_threshold
+        self.inference_max_seq_length = inference_max_seq_length
 
         self.inference_wrapped_model, self.processor = setup_model_and_tokenizer(
-            path=nemo_checkpoint_filepath,
-            tp_size=tensor_parallel_size,
-            pp_size=pipeline_parallel_size,
+            megatron_model_path=megatron_checkpoint_filepath,
+            tp=tensor_model_parallel_size,
+            pp=pipeline_model_parallel_size,
             params_dtype=params_dtype,
             inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold,
+            inference_max_seq_length=inference_max_seq_length,
         )
 
     def generate(
@@ -157,8 +161,16 @@ def apply_chat_template(self, messages, add_generation_prompt=True):
         )
         return text
 
-    def base64_to_image(self, image_base64):
-        """Convert base64-encoded image to PIL Image."""
+    def process_image_input(self, image_source):
+        """Process image input from base64-encoded string or HTTP URL.
+
+        Args:
+            image_source (str): Image source - either base64-encoded image string with data URI prefix
+                               (e.g., "data:image;base64,...") or HTTP/HTTPS URL (e.g., "http://example.com/image.jpg")
+
+        Returns:
+            Processed image content suitable for model inference.
+        """
         if isinstance(self.inference_wrapped_model, QwenVLInferenceWrapper):
             from qwen_vl_utils import process_vision_info
 
@@ -166,7 +178,7 @@ def base64_to_image(self, image_base64):
                 {
                     "role": "user",
                     "content": [
-                        {"type": "image", "image": f"data:image;base64,{image_base64}"},
+                        {"type": "image", "image": image_source},
                     ],
                 }
             ]
@@ -259,6 +271,12 @@ def _infer_fn(
         Returns:
             dict: sentences.
         """
+        # Handle temperature=0.0 for greedy decoding
+        if temperature == 0.0:
+            LOGGER.warning("temperature=0.0 detected. Setting top_k=1 for greedy sampling.")
+            top_k = 1
+            top_p = 0.0
+
         inference_params = CommonInferenceParams(
             temperature=float(temperature),
             top_k=int(top_k),
@@ -266,7 +284,7 @@ def _infer_fn(
             num_tokens_to_generate=num_tokens_to_generate,
         )
 
-        images = [self.base64_to_image(img_b64) for img_b64 in images]
+        images = [self.process_image_input(image_source) for image_source in images]
 
         results = self.generate(
             prompts,
diff --git a/nemo_deploy/multimodal/query_multimodal.py b/nemo_deploy/multimodal/query_multimodal.py
index 17d1e49a87..41590ca0a1 100644
--- a/nemo_deploy/multimodal/query_multimodal.py
+++ b/nemo_deploy/multimodal/query_multimodal.py
@@ -195,9 +195,16 @@ class NemoQueryMultimodalPytorch:
 
         nq = NemoQueryMultimodalPytorch(url="localhost", model_name="qwen")
 
-        # Encode image to base64
+        # Option 1: Use HTTP URL directly
+        output = nq.query_multimodal(
+            prompts=["Describe this image"],
+            images=["http://example.com/image.jpg"],
+            max_length=100,
+        )
+
+        # Option 2: Encode image to base64 with data URI prefix
         with open("image.jpg", "rb") as f:
-            image_base64 = base64.b64encode(f.read()).decode('utf-8')
+            image_base64 = "data:image;base64," + base64.b64encode(f.read()).decode('utf-8')
 
         output = nq.query_multimodal(
             prompts=["Describe this image"],
@@ -231,7 +238,8 @@ def query_multimodal(
 
         Args:
             prompts (List[str]): List of input text prompts.
-            images (List[str]): List of base64-encoded image strings.
+            images (List[str]): List of image strings - either base64-encoded with data URI prefix
+                               (e.g., "data:image;base64,...") or HTTP/HTTPS URLs (e.g., "http://example.com/image.jpg").
             max_length (Optional[int]): Maximum number of tokens to generate.
             max_batch_size (Optional[int]): Maximum batch size for inference.
             top_k (Optional[int]): Limits to the top K tokens to consider at each step.
diff --git a/nemo_deploy/service/fastapi_interface_to_pytriton_multimodal.py b/nemo_deploy/service/fastapi_interface_to_pytriton_multimodal.py
index 3955753ea3..df854e0e59 100644
--- a/nemo_deploy/service/fastapi_interface_to_pytriton_multimodal.py
+++ b/nemo_deploy/service/fastapi_interface_to_pytriton_multimodal.py
@@ -19,7 +19,7 @@
 import numpy as np
 import requests
 from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel, model_validator
+from pydantic import BaseModel
 from pydantic_settings import BaseSettings
 
 from nemo_deploy.multimodal.query_multimodal import NemoQueryMultimodalPytorch
@@ -82,18 +82,10 @@ class BaseMultimodalRequest(BaseModel):
     max_tokens: int = 50
     temperature: float = 1.0
     top_p: float = 0.0
-    top_k: int = 1
+    top_k: int = 0
     random_seed: Optional[int] = None
     max_batch_size: int = 4
 
-    @model_validator(mode="after")
-    def set_greedy_params(self):
-        """Validate parameters for greedy decoding."""
-        if self.temperature == 0 and self.top_p == 0:
-            logging.warning("Both temperature and top_p are 0. Setting top_k to 1 to ensure greedy sampling.")
-            self.top_k = 1
-        return self
-
 
 class MultimodalCompletionRequest(BaseMultimodalRequest):
     """Represents a request for multimodal text completion.
@@ -290,12 +282,33 @@ def dict_to_str(messages):
 
 @app.post("/v1/chat/completions/")
 async def chat_completions_v1(request: MultimodalChatCompletionRequest):
-    """Defines the multimodal chat completions endpoint and queries the model deployed on PyTriton server."""
+    """Defines the multimodal chat completions endpoint and queries the model deployed on PyTriton server.
+
+    Supports two image content formats (normalized internally to format 1):
+    1. {"type": "image", "image": "url_or_base64"}
+    2. {"type": "image_url", "image_url": {"url": "url_or_base64"}} (OpenAI-style, converted to format 1)
+    """
     url = f"http://{triton_settings.triton_service_ip}:{triton_settings.triton_service_port}"
 
     prompts = request.messages
     if not isinstance(request.messages, list):
         prompts = [request.messages]
+
+    # Normalize image_url format to image format for consistent processing
+    for message in prompts:
+        for content in message["content"]:
+            if content["type"] == "image_url":
+                # Convert OpenAI-style image_url to standard image format
+                if isinstance(content.get("image_url"), dict):
+                    image_data = content["image_url"]["url"]
+                else:
+                    image_data = content["image_url"]
+                # Transform to image format
+                content["type"] = "image"
+                content["image"] = image_data
+                # Remove image_url field
+                content.pop("image_url", None)
+
     # Serialize the dictionary to a JSON string represnetation to be able to convert to numpy array
     # (str_list2numpy) and back to list (str_ndarray2list) as required by PyTriton. Using the dictionaries directly
     # with these methods is not possible as they expect string type.
diff --git a/nemo_export_deploy_common/import_utils.py b/nemo_export_deploy_common/import_utils.py
index 103b7af05b..1543ab46cc 100644
--- a/nemo_export_deploy_common/import_utils.py
+++ b/nemo_export_deploy_common/import_utils.py
@@ -39,6 +39,9 @@
 MISSING_TENSORRT_LLM_MSG = "tensorrt_llm is not available. Please install it with `pip install tensorrt-llm`."
 MISSING_TENSORRT_MSG = "tensorrt is not available. Please install it with `pip install nvidia-tensorrt`."
 MISSING_NEMO_MSG = "nemo is not available. Please install it with `pip install nemo`."
+MISSING_MBRIDGE_MSG = (
+    "megatron.bridge is not available. Please install it from https://github.com/NVIDIA-NeMo/Megatron-Bridge"
+)
 MISSING_TORCHVISION_MSG = "torchvision is not available. Please install it with `pip install torchvision`."
 MISSING_MODELOPT_MSG = "modelopt is not available. Please install it with `pip install nvidia-modelopt[torch]`."
 MISSING_RAY_MSG = "ray is not available. Please install it with `pip install ray`."
diff --git a/scripts/deploy/multimodal/deploy_inframework_triton.py b/scripts/deploy/multimodal/deploy_inframework_triton.py
index fb11b7a4e2..2106b46e58 100644
--- a/scripts/deploy/multimodal/deploy_inframework_triton.py
+++ b/scripts/deploy/multimodal/deploy_inframework_triton.py
@@ -31,18 +31,18 @@
 
 multimodal_supported = True
 try:
-    from nemo_deploy.multimodal.nemo_multimodal_deployable import NeMoMultimodalDeployable
+    from nemo_deploy.multimodal.megatron_multimodal_deployable import MegatronMultimodalDeployable
 except Exception as e:
-    LOGGER.warning(f"Cannot import NeMoMultimodalDeployable, it will not be available. {type(e).__name__}: {e}")
+    LOGGER.warning(f"Cannot import MegatronMultimodalDeployable, it will not be available. {type(e).__name__}: {e}")
     multimodal_supported = False
 
 
 def get_args(argv):
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        description="Deploy nemo multimodal models to Triton",
+        description="Deploy megatron multimodal models to Triton",
     )
-    parser.add_argument("-nc", "--nemo_checkpoint", type=str, help="Source .nemo file")
+    parser.add_argument("-mc", "--megatron_checkpoint", type=str, help="Source megatron checkpoint path")
     parser.add_argument(
         "-tmn",
         "--triton_model_name",
@@ -88,14 +88,14 @@ def get_args(argv):
 
     parser.add_argument(
         "-tps",
-        "--tensor_parallel_size",
+        "--tensor_model_parallel_size",
         default=1,
         type=int,
         help="Tensor parallelism size",
     )
     parser.add_argument(
         "-pps",
-        "--pipeline_parallel_size",
+        "--pipeline_model_parallel_size",
         default=1,
         type=int,
         help="Pipeline parallelism size",
@@ -130,6 +130,13 @@ def get_args(argv):
         type=int,
         help="Inference batch times sequence length threshold",
     )
+    parser.add_argument(
+        "-imsl",
+        "--inference_max_seq_length",
+        default=8192,
+        type=int,
+        help="Maximum sequence length for inference",
+    )
     args = parser.parse_args(argv)
     return args
 
@@ -147,9 +154,9 @@ def nemo_deploy(argv):
     LOGGER.info(args)
 
     if not multimodal_supported:
-        raise ValueError("NeMoMultimodalDeployable is not supported in this environment.")
+        raise ValueError("MegatronMultimodalDeployable is not supported in this environment.")
 
-    if args.nemo_checkpoint is None:
+    if args.megatron_checkpoint is None:
         raise ValueError("In-Framework deployment requires a checkpoint folder.")
 
     # Convert dtype string to torch dtype
@@ -160,12 +167,13 @@ def nemo_deploy(argv):
     }
     params_dtype = dtype_map[args.params_dtype]
 
-    model = NeMoMultimodalDeployable(
-        nemo_checkpoint_filepath=args.nemo_checkpoint,
-        tensor_parallel_size=args.tensor_parallel_size,
-        pipeline_parallel_size=args.pipeline_parallel_size,
+    model = MegatronMultimodalDeployable(
+        megatron_checkpoint_filepath=args.megatron_checkpoint,
+        tensor_model_parallel_size=args.tensor_model_parallel_size,
+        pipeline_model_parallel_size=args.pipeline_model_parallel_size,
         params_dtype=params_dtype,
         inference_batch_times_seqlen_threshold=args.inference_batch_times_seqlen_threshold,
+        inference_max_seq_length=args.inference_max_seq_length,
     )
 
     if torch.distributed.is_initialized():
diff --git a/scripts/deploy/multimodal/query_ray_deployment.py b/scripts/deploy/multimodal/query_fastapi_inframework.py
similarity index 85%
rename from scripts/deploy/multimodal/query_ray_deployment.py
rename to scripts/deploy/multimodal/query_fastapi_inframework.py
index 971af56559..1ee17fe3d9 100644
--- a/scripts/deploy/multimodal/query_ray_deployment.py
+++ b/scripts/deploy/multimodal/query_fastapi_inframework.py
@@ -64,19 +64,16 @@ def load_image_from_path(image_path: str) -> str:
         image_path: Path to local image file or URL
 
     Returns:
-        Base64-encoded image string
+        Image string - HTTP URL directly or base64-encoded string for local files
     """
     if image_path.startswith(("http://", "https://")):
-        LOGGER.info(f"Loading image from URL: {image_path}")
-        response = requests.get(image_path, timeout=30)
-        response.raise_for_status()
-        image_content = response.content
+        LOGGER.info(f"Using image URL directly: {image_path}")
+        return image_path
     else:
-        LOGGER.info(f"Loading image from local path: {image_path}")
+        LOGGER.info(f"Loading and encoding image from local path: {image_path}")
         with open(image_path, "rb") as f:
             image_content = f.read()
-
-    return base64.b64encode(image_content).decode("utf-8")
+        return "data:image;base64," + base64.b64encode(image_content).decode("utf-8")
 
 
 def test_completions_endpoint(base_url: str, model_id: str, prompt: str = None, image_source: str = None) -> None:
@@ -114,8 +111,8 @@ def test_completions_endpoint(base_url: str, model_id: str, prompt: str = None,
     payload["prompt"] = text
 
     try:
-        image_base64 = load_image_from_path(image_source)
-        payload["image"] = image_base64
+        image_data = load_image_from_path(image_source)
+        payload["image"] = image_data
     except Exception as e:
         LOGGER.error(f"Failed to load image: {e}")
         return
@@ -130,7 +127,12 @@ def test_completions_endpoint(base_url: str, model_id: str, prompt: str = None,
 
 
 def test_chat_completions_endpoint(base_url: str, model_id: str, prompt: str = None, image_source: str = None) -> None:
-    """Test the chat completions endpoint for multimodal models."""
+    """Test the chat completions endpoint for multimodal models.
+
+    Supports two image content formats:
+    1. {"type": "image", "image": "url_or_base64"}
+    2. {"type": "image_url", "image_url": {"url": "url_or_base64"}} (OpenAI-style)
+    """
     url = f"{base_url}/v1/chat/completions/"
 
     # Use provided prompt or default
@@ -141,8 +143,10 @@ def test_chat_completions_endpoint(base_url: str, model_id: str, prompt: str = N
 
     content = []
     try:
-        image_base64 = load_image_from_path(image_source)
-        content.append({"type": "image", "image": image_base64})
+        image_data = load_image_from_path(image_source)
+        # Using format 1: {"type": "image", "image": "url_or_base64"}
+        # Alternative format 2: {"type": "image_url", "image_url": {"url": "url_or_base64"}}
+        content.append({"type": "image", "image": image_data})
     except Exception as e:
         LOGGER.error(f"Failed to load image: {e}")
         return
@@ -167,19 +171,6 @@ def test_chat_completions_endpoint(base_url: str, model_id: str, prompt: str = N
         LOGGER.error(f"Error: {response.text}")
 
 
-def test_models_endpoint(base_url: str) -> None:
-    """Test the models endpoint."""
-    url = f"{base_url}/v1/models"
-
-    LOGGER.info(f"Testing models endpoint at {url}")
-    response = requests.get(url)
-    LOGGER.info(f"Response status code: {response.status_code}")
-    if response.status_code == 200:
-        LOGGER.info(f"Response: {json.dumps(response.json(), indent=2)}")
-    else:
-        LOGGER.error(f"Error: {response.text}")
-
-
 def test_health_endpoint(base_url: str) -> None:
     """Test the health endpoint."""
     url = f"{base_url}/v1/health"
@@ -218,7 +209,6 @@ def main():
     test_completions_endpoint(base_url, args.model_id, args.prompt, args.image)
     test_chat_completions_endpoint(base_url, args.model_id, args.prompt, args.image)
     test_health_endpoint(base_url)
-    test_models_endpoint(base_url)
 
 
 if __name__ == "__main__":
diff --git a/scripts/deploy/multimodal/query_inframework.py b/scripts/deploy/multimodal/query_inframework.py
index a7ddf1cc63..24accc0966 100644
--- a/scripts/deploy/multimodal/query_inframework.py
+++ b/scripts/deploy/multimodal/query_inframework.py
@@ -17,7 +17,6 @@
 import logging
 import time
 
-import requests
 from transformers import AutoProcessor
 
 from nemo_deploy.multimodal.query_multimodal import NemoQueryMultimodalPytorch
@@ -32,19 +31,16 @@ def load_image_from_path(image_path: str) -> str:
         image_path: Path to local image file or URL
 
     Returns:
-        Base64-encoded image string
+        Image string - HTTP URL directly or base64-encoded string for local files
     """
     if image_path.startswith(("http://", "https://")):
-        LOGGER.info(f"Loading image from URL: {image_path}")
-        response = requests.get(image_path, timeout=30)
-        response.raise_for_status()
-        image_content = response.content
+        LOGGER.info(f"Using image URL directly: {image_path}")
+        return image_path
     else:
-        LOGGER.info(f"Loading image from local path: {image_path}")
+        LOGGER.info(f"Loading and encoding image from local path: {image_path}")
         with open(image_path, "rb") as f:
             image_content = f.read()
-
-    return base64.b64encode(image_content).decode("utf-8")
+        return "data:image;base64," + base64.b64encode(image_content).decode("utf-8")
 
 
 def get_args():
@@ -121,7 +117,7 @@ def query():
         with open(args.prompt_file, "r") as f:
             args.prompt = f.read()
 
-    image_base64 = load_image_from_path(args.image)
+    image_source = load_image_from_path(args.image)
 
     if "Qwen" in args.processor_name:
         processor = AutoProcessor.from_pretrained(args.processor_name)
@@ -146,7 +142,7 @@ def query():
     nemo_query = NemoQueryMultimodalPytorch(args.url, args.model_name)
     outputs = nemo_query.query_multimodal(
         prompts=[args.prompt],
-        images=[image_base64],
+        images=[image_source],
         max_length=args.max_output_len,
         max_batch_size=args.max_batch_size,
         top_k=args.top_k,
diff --git a/tests/unit_tests/deploy/test_fastapi_interface_to_pytriton_multimodal.py b/tests/unit_tests/deploy/test_fastapi_interface_to_pytriton_multimodal.py
index 304f811025..2da62db31e 100644
--- a/tests/unit_tests/deploy/test_fastapi_interface_to_pytriton_multimodal.py
+++ b/tests/unit_tests/deploy/test_fastapi_interface_to_pytriton_multimodal.py
@@ -89,7 +89,7 @@ def test_base_multimodal_request_defaults(self):
         assert request.max_tokens == 50
         assert request.temperature == 1.0
         assert request.top_p == 0.0
-        assert request.top_k == 1
+        assert request.top_k == 0
         assert request.random_seed is None
         assert request.max_batch_size == 4
 
@@ -112,11 +112,6 @@ def test_base_multimodal_request_custom_values(self):
         assert request.random_seed == 42
         assert request.max_batch_size == 8
 
-    def test_base_multimodal_request_greedy_validation(self):
-        """Test BaseMultimodalRequest validator for greedy sampling."""
-        request = BaseMultimodalRequest(model="test-model", temperature=0, top_p=0, top_k=5)
-        assert request.top_k == 1
-
     def test_multimodal_completion_request(self):
         """Test MultimodalCompletionRequest."""
         request = MultimodalCompletionRequest(
@@ -274,7 +269,7 @@ def test_completions_with_image(self, client, mock_triton_settings):
         request_data = {
             "model": "test-model",
             "prompt": "Describe this image",
-            "image": "base64_encoded_image_data",
+            "image": "data:image;base64,base64_encoded_image_data",
             "temperature": 0.7,
         }
 
@@ -291,7 +286,7 @@ def test_completions_with_image(self, client, mock_triton_settings):
 
             mock_query.assert_called_once()
             call_kwargs = mock_query.call_args[1]
-            assert call_kwargs["images"] == ["base64_encoded_image_data"]
+            assert call_kwargs["images"] == ["data:image;base64,base64_encoded_image_data"]
             assert call_kwargs["temperature"] == 0.7
 
     def test_completions_with_custom_params(self, client, mock_triton_settings):
@@ -357,7 +352,7 @@ def test_chat_completions_with_image(self, client, mock_triton_settings):
                 "role": "user",
                 "content": [
                     {"type": "text", "text": "What's in this image?"},
-                    {"type": "image", "image": "base64_image_data"},
+                    {"type": "image", "image": "data:image;base64,base64_image_data"},
                 ],
             }
         ]
@@ -376,7 +371,7 @@ def test_chat_completions_with_image(self, client, mock_triton_settings):
 
             mock_query.assert_called_once()
             call_kwargs = mock_query.call_args[1]
-            assert call_kwargs["images"] == ["base64_image_data"]
+            assert call_kwargs["images"] == ["data:image;base64,base64_image_data"]
 
     def test_chat_completions_multiple_images(self, client, mock_triton_settings):
         """Test /v1/chat/completions/ endpoint with multiple images."""
@@ -385,8 +380,8 @@ def test_chat_completions_multiple_images(self, client, mock_triton_settings):
                 "role": "user",
                 "content": [
                     {"type": "text", "text": "Compare these images"},
-                    {"type": "image", "image": "base64_image_1"},
-                    {"type": "image", "image": "base64_image_2"},
+                    {"type": "image", "image": "data:image;base64,base64_image_1"},
+                    {"type": "image", "image": "data:image;base64,base64_image_2"},
                 ],
             }
         ]
@@ -403,9 +398,64 @@ def test_chat_completions_multiple_images(self, client, mock_triton_settings):
 
             mock_query.assert_called_once()
             call_kwargs = mock_query.call_args[1]
-            assert call_kwargs["images"] == ["base64_image_1", "base64_image_2"]
+            assert call_kwargs["images"] == ["data:image;base64,base64_image_1", "data:image;base64,base64_image_2"]
             assert call_kwargs["max_length"] == 200
 
+    def test_chat_completions_with_image_url_format(self, client, mock_triton_settings):
+        """Test /v1/chat/completions/ endpoint with OpenAI-style image_url format."""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What's in this image?"},
+                    {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
+                ],
+            }
+        ]
+        request_data = {"model": "test-model", "messages": messages}
+
+        mock_output = {"choices": [{"text": [["I see a cat"]]}], "model": "test-model"}
+
+        with patch("nemo_deploy.service.fastapi_interface_to_pytriton_multimodal.query_multimodal_async") as mock_query:
+            mock_query.return_value = mock_output
+
+            response = client.post("/v1/chat/completions/", json=request_data)
+
+            assert response.status_code == 200
+            result = response.json()
+            assert result["choices"][0]["message"]["content"] == "I see a cat"
+
+            mock_query.assert_called_once()
+            call_kwargs = mock_query.call_args[1]
+            assert call_kwargs["images"] == ["https://example.com/image.jpg"]
+
+    def test_chat_completions_with_mixed_image_formats(self, client, mock_triton_settings):
+        """Test /v1/chat/completions/ endpoint with mixed image and image_url formats."""
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Compare these images"},
+                    {"type": "image", "image": "data:image;base64,base64_data"},
+                    {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
+                ],
+            }
+        ]
+        request_data = {"model": "test-model", "messages": messages}
+
+        mock_output = {"choices": [{"text": [["Comparison"]]}], "model": "test-model"}
+
+        with patch("nemo_deploy.service.fastapi_interface_to_pytriton_multimodal.query_multimodal_async") as mock_query:
+            mock_query.return_value = mock_output
+
+            response = client.post("/v1/chat/completions/", json=request_data)
+
+            assert response.status_code == 200
+
+            mock_query.assert_called_once()
+            call_kwargs = mock_query.call_args[1]
+            assert call_kwargs["images"] == ["data:image;base64,base64_data", "https://example.com/image.jpg"]
+
     def test_chat_completions_with_params(self, client, mock_triton_settings):
         """Test /v1/chat/completions/ endpoint with custom parameters."""
         messages = [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}]
@@ -452,7 +502,7 @@ def test_helper_fun(self):
                 url="http://localhost:8000",
                 model="test-model",
                 prompts=["test prompt"],
-                images=["image_data"],
+                images=["data:image;base64,image_data"],
                 temperature=0.7,
                 top_k=10,
                 top_p=0.9,
@@ -465,7 +515,7 @@ def test_helper_fun(self):
             mock_nq_class.assert_called_once_with(url="http://localhost:8000", model_name="test-model")
             mock_nq.query_multimodal.assert_called_once_with(
                 prompts=["test prompt"],
-                images=["image_data"],
+                images=["data:image;base64,image_data"],
                 temperature=0.7,
                 top_k=10,
                 top_p=0.9,
diff --git a/tests/unit_tests/deploy/test_nemo_multimodal_deployable.py b/tests/unit_tests/deploy/test_megatron_multimodal_deployable.py
similarity index 71%
rename from tests/unit_tests/deploy/test_nemo_multimodal_deployable.py
rename to tests/unit_tests/deploy/test_megatron_multimodal_deployable.py
index 538fe06653..e20e8ee30a 100644
--- a/tests/unit_tests/deploy/test_nemo_multimodal_deployable.py
+++ b/tests/unit_tests/deploy/test_megatron_multimodal_deployable.py
@@ -21,7 +21,7 @@
 from megatron.core.inference.common_inference_params import CommonInferenceParams
 from PIL import Image
 
-from nemo_deploy.multimodal.nemo_multimodal_deployable import NeMoMultimodalDeployable
+from nemo_deploy.multimodal.megatron_multimodal_deployable import MegatronMultimodalDeployable
 from nemo_export_deploy_common.import_utils import UnavailableError
 
 
@@ -43,18 +43,18 @@ def __init__(self, generated_text):
 
 @pytest.fixture
 def mock_setup_model_and_tokenizer():
-    with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.setup_model_and_tokenizer") as mock:
+    with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.setup_model_and_tokenizer") as mock:
         mock.return_value = (MockInferenceWrappedModel(), MockProcessor())
         yield mock
 
 
 @pytest.fixture
 def mock_triton_imports():
-    with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.HAVE_TRITON", True):
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.HAVE_NEMO", True):
-            with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.batch") as mock_batch:
-                with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.first_value") as mock_first_value:
-                    with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.Tensor") as mock_tensor:
+    with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.HAVE_TRITON", True):
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.HAVE_MBRIDGE", True):
+            with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.batch") as mock_batch:
+                with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.first_value") as mock_first_value:
+                    with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.Tensor") as mock_tensor:
                         mock_batch.return_value = lambda x: x
                         mock_first_value.return_value = lambda x: x
 
@@ -74,8 +74,8 @@ def create_tensor(**kwargs):
 
 @pytest.fixture
 def mock_utils():
-    with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.str_ndarray2list") as mock_str2list:
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.cast_output") as mock_cast:
+    with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.str_ndarray2list") as mock_str2list:
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.cast_output") as mock_cast:
             mock_str2list.return_value = ["test prompt 1", "test prompt 2"]
             mock_cast.return_value = np.array([b"Generated text 1", b"Generated text 2"])
             yield mock_str2list, mock_cast
@@ -94,23 +94,23 @@ def sample_image_base64():
 
 @pytest.fixture
 def deployable(mock_setup_model_and_tokenizer, mock_triton_imports):
-    return NeMoMultimodalDeployable(
-        nemo_checkpoint_filepath="test_checkpoint.nemo",
-        tensor_parallel_size=1,
-        pipeline_parallel_size=1,
+    return MegatronMultimodalDeployable(
+        megatron_checkpoint_filepath="test_checkpoint.nemo",
+        tensor_model_parallel_size=1,
+        pipeline_model_parallel_size=1,
         params_dtype=torch.bfloat16,
         inference_batch_times_seqlen_threshold=1000,
     )
 
 
-class TestNeMoMultimodalDeployable:
+class TestMegatronMultimodalDeployable:
     def test_initialization_success(self, mock_setup_model_and_tokenizer, mock_triton_imports):
-        """Test successful initialization of NeMoMultimodalDeployable."""
-        deployable = NeMoMultimodalDeployable(nemo_checkpoint_filepath="test_checkpoint.nemo")
+        """Test successful initialization of MegatronMultimodalDeployable."""
+        deployable = MegatronMultimodalDeployable(megatron_checkpoint_filepath="test_checkpoint.nemo")
 
-        assert deployable.nemo_checkpoint_filepath == "test_checkpoint.nemo"
-        assert deployable.tensor_parallel_size == 1
-        assert deployable.pipeline_parallel_size == 1
+        assert deployable.megatron_checkpoint_filepath == "test_checkpoint.nemo"
+        assert deployable.tensor_model_parallel_size == 1
+        assert deployable.pipeline_model_parallel_size == 1
         assert deployable.params_dtype == torch.bfloat16
         assert deployable.inference_batch_times_seqlen_threshold == 1000
         assert deployable.inference_wrapped_model is not None
@@ -118,35 +118,37 @@ def test_initialization_success(self, mock_setup_model_and_tokenizer, mock_trito
 
     def test_initialization_with_custom_params(self, mock_setup_model_and_tokenizer, mock_triton_imports):
         """Test initialization with custom parameters."""
-        deployable = NeMoMultimodalDeployable(
-            nemo_checkpoint_filepath="custom_checkpoint.nemo",
-            tensor_parallel_size=2,
-            pipeline_parallel_size=2,
+        deployable = MegatronMultimodalDeployable(
+            megatron_checkpoint_filepath="custom_checkpoint.nemo",
+            tensor_model_parallel_size=2,
+            pipeline_model_parallel_size=2,
             params_dtype=torch.float16,
             inference_batch_times_seqlen_threshold=2000,
         )
 
-        assert deployable.tensor_parallel_size == 2
-        assert deployable.pipeline_parallel_size == 2
+        assert deployable.tensor_model_parallel_size == 2
+        assert deployable.pipeline_model_parallel_size == 2
         assert deployable.params_dtype == torch.float16
         assert deployable.inference_batch_times_seqlen_threshold == 2000
 
     def test_initialization_calls_setup_model(self, mock_setup_model_and_tokenizer, mock_triton_imports):
         """Test that initialization calls setup_model_and_tokenizer with correct parameters."""
-        NeMoMultimodalDeployable(
-            nemo_checkpoint_filepath="test_checkpoint.nemo",
-            tensor_parallel_size=2,
-            pipeline_parallel_size=2,
+        MegatronMultimodalDeployable(
+            megatron_checkpoint_filepath="test_checkpoint.nemo",
+            tensor_model_parallel_size=2,
+            pipeline_model_parallel_size=2,
             params_dtype=torch.float16,
             inference_batch_times_seqlen_threshold=1500,
+            inference_max_seq_length=4096,
         )
 
         mock_setup_model_and_tokenizer.assert_called_once_with(
-            path="test_checkpoint.nemo",
-            tp_size=2,
-            pp_size=2,
+            megatron_model_path="test_checkpoint.nemo",
+            tp=2,
+            pp=2,
             params_dtype=torch.float16,
             inference_batch_times_seqlen_threshold=1500,
+            inference_max_seq_length=4096,
         )
 
     def test_generate_method(self, deployable, sample_image):
@@ -155,7 +157,7 @@ def test_generate_method(self, deployable, sample_image):
         images = [sample_image, sample_image]
         inference_params = CommonInferenceParams(temperature=0.7, top_k=10, top_p=0.9, num_tokens_to_generate=100)
 
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.generate") as mock_generate:
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.generate") as mock_generate:
             with patch.object(deployable, "apply_chat_template", side_effect=lambda x: x):
                 mock_generate.return_value = [MockResult("Generated text 1"), MockResult("Generated text 2")]
 
@@ -189,7 +191,7 @@ def test_generate_method_default_params(self, deployable, sample_image):
         prompts = ["Test prompt"]
         images = [sample_image]
 
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.generate") as mock_generate:
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.generate") as mock_generate:
             mock_generate.return_value = [MockResult("Generated text")]
 
             deployable.generate(prompts=prompts, images=images)
@@ -255,10 +257,10 @@ def test_infer_fn(self, deployable, sample_image_base64, sample_image):
         prompts = ["Test prompt 1", "Test prompt 2"]
         images = [sample_image_base64, sample_image_base64]
 
-        with patch.object(deployable, "base64_to_image") as mock_base64_to_image:
+        with patch.object(deployable, "process_image_input") as mock_process_image_input:
             with patch.object(deployable, "generate") as mock_generate:
-                # Mock base64_to_image to return PIL Images
-                mock_base64_to_image.return_value = sample_image
+                # Mock process_image_input to return PIL Images
+                mock_process_image_input.return_value = sample_image
                 mock_generate.return_value = [MockResult("Generated text 1"), MockResult("Generated text 2")]
 
                 result = deployable._infer_fn(
@@ -272,8 +274,8 @@ def test_infer_fn(self, deployable, sample_image_base64, sample_image):
                     max_batch_size=3,
                 )
 
-                # Check that base64_to_image was called for each image
-                assert mock_base64_to_image.call_count == 2
+                # Check that process_image_input was called for each image
+                assert mock_process_image_input.call_count == 2
 
                 # Check that generate was called with the right parameters
                 assert mock_generate.call_count == 1
@@ -301,16 +303,16 @@ def test_infer_fn_default_params(self, deployable, sample_image_base64, sample_i
         prompts = ["Test prompt"]
         images = [sample_image_base64]
 
-        with patch.object(deployable, "base64_to_image") as mock_base64_to_image:
+        with patch.object(deployable, "process_image_input") as mock_process_image_input:
             with patch.object(deployable, "generate") as mock_generate:
-                # Mock base64_to_image to return PIL Images
-                mock_base64_to_image.return_value = sample_image
+                # Mock process_image_input to return PIL Images
+                mock_process_image_input.return_value = sample_image
                 mock_generate.return_value = [MockResult("Generated text 1")]
 
                 result = deployable._infer_fn(prompts=prompts, images=images)
 
-                # Check that base64_to_image was called
-                assert mock_base64_to_image.call_count == 1
+                # Check that process_image_input was called
+                assert mock_process_image_input.call_count == 1
 
                 # Check that generate was called with the right parameters
                 assert mock_generate.call_count == 1
@@ -331,9 +333,45 @@ def test_infer_fn_default_params(self, deployable, sample_image_base64, sample_i
 
                 assert result["sentences"] == ["Generated text 1"]
 
+    def test_infer_fn_with_temperature_zero(self, deployable):
+        """Test _infer_fn with temperature=0.0 for greedy decoding."""
+        sample_image = Image.new("RGB", (100, 100))
+        sample_image_base64 = "data:image;base64,test_base64_string"
+
+        prompts = ["Test prompt"]
+        images = [sample_image_base64]
+
+        with patch.object(deployable, "process_image_input") as mock_process_image:
+            with patch.object(deployable, "generate") as mock_generate:
+                # Mock process_image_input to return PIL Images
+                mock_process_image.return_value = sample_image
+                mock_generate.return_value = [MockResult("Generated text")]
+
+                result = deployable._infer_fn(
+                    prompts=prompts,
+                    images=images,
+                    temperature=0.0,  # Should trigger greedy sampling handling
+                    top_k=5,  # Should be overridden to 1
+                    top_p=0.5,  # Should be overridden to 0.0
+                    num_tokens_to_generate=100,
+                )
+
+                # Check that generate was called with the right parameters
+                assert mock_generate.call_count == 1
+                call_args = mock_generate.call_args
+
+                # Check that inference_params has greedy sampling parameters
+                assert isinstance(call_args[0][2], CommonInferenceParams)
+                assert call_args[0][2].temperature == 0.0  # Kept as 0.0
+                assert call_args[0][2].top_k == 1  # Overridden for greedy sampling
+                assert call_args[0][2].top_p == 0.0  # Overridden for greedy sampling
+                assert call_args[0][2].num_tokens_to_generate == 100
+
+                assert result["sentences"] == ["Generated text"]
+
     def test_dict_to_str_function(self):
         """Test the dict_to_str utility function."""
-        from nemo_deploy.multimodal.nemo_multimodal_deployable import dict_to_str
+        from nemo_deploy.multimodal.megatron_multimodal_deployable import dict_to_str
 
         test_dict = {"key1": "value1", "key2": "value2"}
         result = dict_to_str(test_dict)
@@ -341,26 +379,29 @@ def test_dict_to_str_function(self):
         assert isinstance(result, str)
         assert json.loads(result) == test_dict
 
-    @patch("nemo_deploy.multimodal.nemo_multimodal_deployable.HAVE_TRITON", False)
+    @patch("nemo_deploy.multimodal.megatron_multimodal_deployable.HAVE_TRITON", False)
     def test_initialization_no_triton(self):
         """Test that initialization fails when Triton is not available."""
         with pytest.raises(UnavailableError):
-            NeMoMultimodalDeployable(nemo_checkpoint_filepath="test_checkpoint.nemo")
-
-    @patch("nemo_deploy.multimodal.nemo_multimodal_deployable.HAVE_NEMO", False)
-    def test_initialization_no_nemo(self):
-        """Test that initialization fails when NeMo is not available."""
-        with pytest.raises(UnavailableError, match="nemo is not available. Please install it with `pip install nemo`."):
-            NeMoMultimodalDeployable(nemo_checkpoint_filepath="test_checkpoint.nemo")
-
-    def test_initialization_missing_checkpoint(self, mock_triton_imports):
+            MegatronMultimodalDeployable(megatron_checkpoint_filepath="test_checkpoint.nemo")
+
+    @patch("nemo_deploy.multimodal.megatron_multimodal_deployable.HAVE_MBRIDGE", False)
+    def test_initialization_no_mbridge(self):
+        """Test that initialization fails when Megatron Bridge is not available."""
+        with pytest.raises(
+            UnavailableError,
+            match="megatron.bridge is not available. Please install it from https://github.com/NVIDIA-NeMo/Megatron-Bridge",
+        ):
+            MegatronMultimodalDeployable(megatron_checkpoint_filepath="test_checkpoint.nemo")
+
+    def test_initialization_missing_checkpoint(self, mock_setup_model_and_tokenizer, mock_triton_imports):
         """Test initialization with missing checkpoint filepath."""
         with pytest.raises(TypeError):
-            NeMoMultimodalDeployable()
+            MegatronMultimodalDeployable()
 
     def test_generate_empty_inputs(self, deployable):
         """Test generate method with empty inputs."""
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.generate") as mock_generate:
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.generate") as mock_generate:
             mock_generate.return_value = []
 
             results = deployable.generate(prompts=[], images=[])
@@ -371,7 +412,7 @@ def test_generate_mismatched_inputs(self, deployable, sample_image):
         prompts = ["prompt1", "prompt2"]
         images = [sample_image]  # Only one image for two prompts
 
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.generate") as mock_generate:
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.generate") as mock_generate:
             mock_generate.return_value = [MockResult("Generated text 1"), MockResult("Generated text 2")]
 
             # This should work as the mock handles it, but in real scenario it might fail
@@ -393,8 +434,8 @@ def test_triton_infer_fn_without_decorators(self, deployable, sample_image_base6
             "apply_chat_template": np.array([False]),
         }
 
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.str_ndarray2list") as mock_str2list:
-            with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.cast_output") as mock_cast:
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.str_ndarray2list") as mock_str2list:
+            with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.cast_output") as mock_cast:
                 with patch.object(deployable, "_infer_fn") as mock_infer:
                     # Setup mocks
                     mock_str2list.side_effect = [["test prompt 1", "test prompt 2"], ["mock_base64_1", "mock_base64_2"]]
@@ -484,8 +525,8 @@ def test_apply_chat_template_without_generation_prompt(self, deployable):
         )
         assert result == expected_text
 
-    def test_base64_to_image_with_qwenvl_wrapper(self, deployable):
-        """Test base64_to_image with QwenVLInferenceWrapper."""
+    def test_process_image_input_with_qwenvl_wrapper(self, deployable):
+        """Test process_image_input with QwenVLInferenceWrapper using base64 image."""
         # Create a mock QwenVLInferenceWrapper class
         mock_qwenvl_class = MagicMock()
 
@@ -493,20 +534,19 @@ def test_base64_to_image_with_qwenvl_wrapper(self, deployable):
         # Use isinstance check to return True for QwenVLInferenceWrapper
         deployable.inference_wrapped_model = MagicMock()
 
-        image_base64 = (
-            "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
-        )
+        # Image source with data URI prefix (new format)
+        image_source = "data:image;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
         expected_image = Image.new("RGB", (100, 100))
 
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.QwenVLInferenceWrapper", mock_qwenvl_class):
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.QwenVLInferenceWrapper", mock_qwenvl_class):
             # Make isinstance return True for our mock
-            with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.isinstance") as mock_isinstance:
+            with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.isinstance") as mock_isinstance:
                 mock_isinstance.return_value = True
 
                 with patch("qwen_vl_utils.process_vision_info") as mock_process:
                     mock_process.return_value = (expected_image, None)
 
-                    result = deployable.base64_to_image(image_base64)
+                    result = deployable.process_image_input(image_source)
 
                     # Verify isinstance was called to check the model type
                     mock_isinstance.assert_called_once_with(deployable.inference_wrapped_model, mock_qwenvl_class)
@@ -516,27 +556,58 @@ def test_base64_to_image_with_qwenvl_wrapper(self, deployable):
                     assert len(call_args) == 1
                     assert call_args[0]["role"] == "user"
                     assert call_args[0]["content"][0]["type"] == "image"
-                    assert call_args[0]["content"][0]["image"] == f"data:image;base64,{image_base64}"
+                    assert call_args[0]["content"][0]["image"] == image_source
+
+                    assert result == expected_image
+
+    def test_process_image_input_with_http_url(self, deployable):
+        """Test process_image_input with HTTP URL."""
+        # Create a mock QwenVLInferenceWrapper class
+        mock_qwenvl_class = MagicMock()
+
+        # Make deployable.inference_wrapped_model an instance of the mock class
+        deployable.inference_wrapped_model = MagicMock()
+
+        # HTTP URL as image source
+        image_source = "https://example.com/image.jpg"
+        expected_image = Image.new("RGB", (100, 100))
+
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.QwenVLInferenceWrapper", mock_qwenvl_class):
+            # Make isinstance return True for our mock
+            with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.isinstance") as mock_isinstance:
+                mock_isinstance.return_value = True
+
+                with patch("qwen_vl_utils.process_vision_info") as mock_process:
+                    mock_process.return_value = (expected_image, None)
+
+                    result = deployable.process_image_input(image_source)
+
+                    # Verify process_vision_info was called with URL
+                    call_args = mock_process.call_args[0][0]
+                    assert len(call_args) == 1
+                    assert call_args[0]["role"] == "user"
+                    assert call_args[0]["content"][0]["type"] == "image"
+                    assert call_args[0]["content"][0]["image"] == image_source
 
                     assert result == expected_image
 
-    def test_base64_to_image_with_unsupported_model(self, deployable):
-        """Test base64_to_image with unsupported model raises ValueError."""
+    def test_process_image_input_with_unsupported_model(self, deployable):
+        """Test process_image_input with unsupported model raises ValueError."""
         # Create a mock QwenVLInferenceWrapper class
         mock_qwenvl_class = MagicMock()
 
         # Make sure the wrapped model is NOT a QwenVLInferenceWrapper
         deployable.inference_wrapped_model = MagicMock()
 
-        image_base64 = "test_base64_string"
+        image_source = "data:image;base64,test_base64_string"
 
-        with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.QwenVLInferenceWrapper", mock_qwenvl_class):
+        with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.QwenVLInferenceWrapper", mock_qwenvl_class):
             # Make isinstance return False for our mock (not a QwenVLInferenceWrapper)
-            with patch("nemo_deploy.multimodal.nemo_multimodal_deployable.isinstance") as mock_isinstance:
+            with patch("nemo_deploy.multimodal.megatron_multimodal_deployable.isinstance") as mock_isinstance:
                 mock_isinstance.return_value = False
 
                 with pytest.raises(ValueError, match="not supported"):
-                    deployable.base64_to_image(image_base64)
+                    deployable.process_image_input(image_source)
 
     def test_ray_infer_fn(self, deployable):
         """Test ray_infer_fn method."""
diff --git a/tests/unit_tests/deploy/test_query_multimodal.py b/tests/unit_tests/deploy/test_query_multimodal.py
index e383d05dc0..1a5fa5b249 100644
--- a/tests/unit_tests/deploy/test_query_multimodal.py
+++ b/tests/unit_tests/deploy/test_query_multimodal.py
@@ -138,7 +138,7 @@ def query_multimodal_pytorch(self):
     @pytest.fixture
     def mock_images(self):
         # Create sample base64-encoded image strings for testing
-        return ["mock_base64_image_1", "mock_base64_image_2"]
+        return ["data:image;base64,mock_base64_image_1", "data:image;base64,mock_base64_image_2"]
 
     @pytest.fixture
     def mock_prompts(self):
@@ -305,7 +305,7 @@ def test_query_multimodal_single_prompt_single_image(self, mock_model_client, qu
         mock_model_client.return_value.__enter__.return_value = mock_client_instance
 
         # Use mock base64 image string
-        base64_image = "mock_base64_single_image"
+        base64_image = "data:image;base64,mock_base64_single_image"
 
         result = query_multimodal_pytorch.query_multimodal(prompts=["Single prompt"], images=[base64_image])