I have the following script:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
from vllm import LLM, SamplingParams
llm = LLM(
model="google/gemma-3-27b-it",
trust_remote_code=True,
max_model_len=8192,
tensor_parallel_size=4,
gpu_memory_utilization=0.9
)
and the environment I use is:
vLLM version: 0.12.0
torch version: 2.9.0+cu129
cuda available: True
cuda version (torch): 12.9
cudnn version: 91002
NVIDIA H100 80GB HBM3
and I encounter the following error:
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] WorkerProc hit an exception.
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] Traceback (most recent call last):
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py”, line 817, in worker_busy_loop
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] output = func(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/utils/_contextlib.py”, line 120, in decorate_context
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return func(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py”, line 324, in determine_available_memory
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] self.model_runner.profile_run()
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py”, line 4322, in profile_run
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] dummy_encoder_outputs = self.model.embed_multimodal(
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/model_executor/models/gemma3_mm.py”, line 604, in embed_multimodal
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return self._process_image_input(image_input)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/model_executor/models/gemma3_mm.py”, line 588, in _process_image_input
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] image_features = self._image_pixels_to_features(
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/model_executor/models/gemma3_mm.py”, line 577, in _image_pixels_to_features
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return vision_tower(pixel_values)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1786, in _call_impl
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/model_executor/models/siglip.py”, line 856, in forward
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return self.vision_model(
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1786, in _call_impl
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/model_executor/models/siglip.py”, line 754, in forward
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] encoder_outputs = self.encoder(
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1786, in _call_impl
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/model_executor/models/siglip.py”, line 562, in forward
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] hidden_states, _ = encoder_layer(hidden_states)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1775, in _wrapped_call_impl
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return self._call_impl(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1786, in _call_impl
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] return forward_call(*args, **kwargs)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] File “/data/rli15/venvs/vllm-py312/lib/python3.12/site-packages/vllm/model_executor/models/siglip.py”, line 511, in forward
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] hidden_states, _ = self.self_attn(hidden_states=hidden_states)
(EngineCore_DP0 pid=2936360) (Worker_TP0 pid=2936367) ERROR 12-17 20:12:04 [multiproc_executor.py:822] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^