CentML · wangshangsam · Feb 10, 2026 · Feb 10, 2026
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
@@ -12,7 +12,4 @@ torchvision==0.24.1 # Required for phi3v processor. See https://github.com/pytor
 # FlashInfer should be updated together with the Dockerfile
 flashinfer-python==0.5.3
 # FA4
-flash-attn-cute @ git+https://github.com/Dao-AILab/flash-attention.git@2580b5a4882562640f3cfbffd2bb8d2de9268f9f#subdirectory=flash_attn/cute
-# nvimgcodec
-nvidia-nvjpeg==13.0.3.75
-nvidia-nvimgcodec-cu13==0.7.0.11
+flash-attn-cute @ git+https://github.com/Dao-AILab/flash-attention.git@2580b5a4882562640f3cfbffd2bb8d2de9268f9f#subdirectory=flash_attn/cute
diff --git a/tests/v1/test_serial_utils.py b/tests/v1/test_serial_utils.py
@@ -283,86 +283,3 @@ def test_custom_class_serialization_disallowed_without_pickle():
     with pytest.raises(TypeError):
         # Attempt to encode the custom class
         encoder.encode(obj)
-
-
-@dataclass
-class RequestWithTensor:
-    """Mock request with non-multimodal tensor field like EngineCoreRequest."""
-
-    prompt_embeds: torch.Tensor | None
-    data: str
-
-
-def test_non_multimodal_tensor_with_ipc():
-    """Test that non-multimodal tensor fields work correctly with IPC enabled.
-
-    This reproduces the bug where fields like prompt_embeds: torch.Tensor | None
-    would fail to decode when IPC is enabled because _decode_tensor expected a tuple
-    but received a TensorIpcHandle dict.
-    """
-    import torch.multiprocessing as torch_mp
-
-    # Create tensor queues for IPC
-    tensor_queues = [torch_mp.Queue()]
-
-    # Create encoder with IPC enabled
-    encoder = MsgpackEncoder(
-        tensor_queues=tensor_queues, multimodal_tensor_ipc="torch_shm"
-    )
-    encoder.set_target_engine(0)
-    encoder.set_request_context("test_request_123")
-
-    # Create decoder with IPC queue
-    decoder = MsgpackDecoder(RequestWithTensor, tensor_queue=tensor_queues[0])
-
-    # Create a request with a non-multimodal tensor
-    original_tensor = torch.randn(5, 10, dtype=torch.float32)
-    request = RequestWithTensor(prompt_embeds=original_tensor, data="test_data")
-
-    # Encode the request - this should send the tensor via IPC
-    encoded = encoder.encode(request)
-
-    # Verify encoding succeeded
-    assert len(encoded) > 0
-
-    # Decode the request - this should retrieve the tensor from IPC queue
-    # Previously this would fail with: TypeError: cannot unpack non-iterable dict object
-    decoded = decoder.decode(encoded)
-
-    # Verify the decoded request matches the original
-    assert isinstance(decoded, RequestWithTensor)
-    assert decoded.data == "test_data"
-    assert decoded.prompt_embeds is not None
-    assert torch.allclose(decoded.prompt_embeds, original_tensor), (
-        "Decoded tensor does not match the original tensor."
-    )
-
-
-def test_non_multimodal_tensor_with_ipc_none_value():
-    """Test that None values for tensor fields work correctly with IPC enabled."""
-    import torch.multiprocessing as torch_mp
-
-    # Create tensor queues for IPC
-    tensor_queues = [torch_mp.Queue()]
-
-    # Create encoder with IPC enabled
-    encoder = MsgpackEncoder(
-        tensor_queues=tensor_queues, multimodal_tensor_ipc="torch_shm"
-    )
-    encoder.set_target_engine(0)
-    encoder.set_request_context("test_request_456")
-
-    # Create decoder with IPC queue
-    decoder = MsgpackDecoder(RequestWithTensor, tensor_queue=tensor_queues[0])
-
-    # Create a request with None for the tensor field
-    request = RequestWithTensor(prompt_embeds=None, data="test_data_with_none")
-
-    # Encode and decode the request
-    encoded = encoder.encode(request)
-    decoded = decoder.decode(encoded)
-
-    # Verify the decoded request matches the original
-    assert isinstance(decoded, RequestWithTensor)
-    assert decoded.data == "test_data_with_none"
-    assert decoded.prompt_embeds is None