启动命令:
CUDA_VISIBLE_DEVICES=0,1,2,5 vllm serve /mnt/afs/share_models/git_models/Qwen/Qwen3-32B \
> --served-model-name Qwen3-32B \
> --port 6669 \
> --host 0.0.0.0 \
> --tensor-parallel-size 4 \
> --dtype half \
> --enable-chunked-prefill \
> --enable-prefix-caching \
日志:
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] WorkerProc hit an exception.
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] Traceback (most recent call last):
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 517, in worker_busy_loop
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] output = func(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return func(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/v1/worker/gpu_worker.py", line 185, in determine_available_memory
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] self.model_runner.profile_run()
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1899, in profile_run
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] sampler_output = self._dummy_sampler_run(hidden_states)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return func(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1760, in _dummy_sampler_run
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = self.model.compute_logits(hidden_states, None)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/model_executor/models/qwen3.py", line 309, in compute_logits
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = self.logits_processor(self.lm_head, hidden_states,
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return self._call_impl(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return forward_call(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/model_executor/layers/logits_processor.py", line 70, in forward
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = self._get_logits(hidden_states, lm_head, embedding_bias)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/model_executor/layers/logits_processor.py", line 113, in _get_logits
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = self._gather_logits(logits)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/model_executor/layers/logits_processor.py", line 95, in _gather_logits
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = tensor_model_parallel_all_gather(logits)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/communication_op.py", line 19, in tensor_model_parallel_all_gather
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return get_tp_group().all_gather(input_, dim)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/parallel_state.py", line 372, in all_gather
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return torch.ops.vllm.all_gather(input_,
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/_ops.py", line 1158, in __call__
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return self._op(*args, **(kwargs or {}))
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/parallel_state.py", line 138, in all_gather
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return group._all_gather_out_place(tensor, dim)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/parallel_state.py", line 381, in _all_gather_out_place
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return self.device_communicator.all_gather(input_, dim)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/device_communicators/base_device_communicator.py", line 129, in all_gather
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] dist.all_gather_into_tensor(output_tensor,
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return func(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", line 3836, in all_gather_into_tensor
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] work = group._allgather_base(output_tensor, input_tensor, opts)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/NCCLUtils.cpp:77, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ncclUnhandledCudaError: Call to CUDA function failed.
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] Last error:
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] Failed to CUDA calloc 2097152 bytes
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] Traceback (most recent call last):
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 517, in worker_busy_loop
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] output = func(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return func(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/v1/worker/gpu_worker.py", line 185, in determine_available_memory
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] self.model_runner.profile_run()
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1899, in profile_run
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] sampler_output = self._dummy_sampler_run(hidden_states)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return func(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1760, in _dummy_sampler_run
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = self.model.compute_logits(hidden_states, None)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/model_executor/models/qwen3.py", line 309, in compute_logits
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = self.logits_processor(self.lm_head, hidden_states,
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return self._call_impl(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return forward_call(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/model_executor/layers/logits_processor.py", line 70, in forward
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = self._get_logits(hidden_states, lm_head, embedding_bias)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/model_executor/layers/logits_processor.py", line 113, in _get_logits
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = self._gather_logits(logits)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/model_executor/layers/logits_processor.py", line 95, in _gather_logits
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] logits = tensor_model_parallel_all_gather(logits)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/communication_op.py", line 19, in tensor_model_parallel_all_gather
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return get_tp_group().all_gather(input_, dim)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/parallel_state.py", line 372, in all_gather
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return torch.ops.vllm.all_gather(input_,
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/_ops.py", line 1158, in __call__
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return self._op(*args, **(kwargs or {}))
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/parallel_state.py", line 138, in all_gather
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return group._all_gather_out_place(tensor, dim)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/parallel_state.py", line 381, in _all_gather_out_place
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return self.device_communicator.all_gather(input_, dim)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/vllm/distributed/device_communicators/base_device_communicator.py", line 129, in all_gather
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] dist.all_gather_into_tensor(output_tensor,
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] return func(*args, **kwargs)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] File "/usr/local/lib/miniconda3/envs/SenseRL/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py", line 3836, in all_gather_into_tensor
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] work = group._allgather_base(output_tensor, input_tensor, opts)
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/NCCLUtils.cpp:77, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] ncclUnhandledCudaError: Call to CUDA function failed.
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] Last error:
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522] Failed to CUDA calloc 2097152 bytes
(VllmWorker rank=3 pid=457669) ERROR 07-30 17:25:21 [multiproc_executor.py:522]