我的配置llm = LLM(
model=MODEL_PATH,
tensor_parallel_size=2,
pipeline_parallel_size=3,
dtype="bfloat16",
gpu_memory_utilization=0.9,
enable_expert_parallel=True,
mm_encoder_tp_mode="data",
trust_remote_code=True,
max_model_len=131072,
enable_chunked_prefill=True,
enable_prefix_caching=True,
enforce_eager=True,
disable_custom_all_reduce=True
)下面是我的报错,换成30B小模型也报错(EngineCore_DP0 pid=171207) File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/core.py”, line 699, in run_engine_core
(EngineCore_DP0 pid=171207) engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=171207) File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/core.py”, line 498, in init
(EngineCore_DP0 pid=171207) super().init(vllm_config, executor_class, log_stats,
(EngineCore_DP0 pid=171207) File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/core.py”, line 83, in init
(EngineCore_DP0 pid=171207) self.model_executor = executor_class(vllm_config)
(EngineCore_DP0 pid=171207) File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/executor/executor_base.py”, line 54, in init
(EngineCore_DP0 pid=171207) self._init_executor()
(EngineCore_DP0 pid=171207) File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/executor/multiproc_executor.py”, line 106, in _init_executor
(EngineCore_DP0 pid=171207) self.workers = WorkerProc.wait_for_ready(unready_workers)
(EngineCore_DP0 pid=171207) File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/executor/multiproc_executor.py”, line 509, in wait_for_ready
(EngineCore_DP0 pid=171207) raise e from None
(EngineCore_DP0 pid=171207) Exception: WorkerProc initialization failed due to an exception in a background process. See stack trace for root cause.
Traceback (most recent call last):
File “/data/juicefs_sharing_data/11188211/xss_task2_asethetic_syn/qwenvl_3_238B/test_qwvl_3_238B_1W_quick.py”, line 225, in
run_vllm_inference()
File “/data/juicefs_sharing_data/11188211/xss_task2_asethetic_syn/qwenvl_3_238B/test_qwvl_3_238B_1W_quick.py”, line 92, in run_vllm_inference
llm = LLM(
File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/entrypoints/llm.py”, line 297, in init
self.llm_engine = LLMEngine.from_engine_args(
File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/llm_engine.py”, line 177, in from_engine_args
return cls(vllm_config=vllm_config,
File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/llm_engine.py”, line 114, in init
self.engine_core = EngineCoreClient.make_client(
File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/core_client.py”, line 80, in make_client
return SyncMPClient(vllm_config, executor_class, log_stats)
File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/core_client.py”, line 602, in init
super().init(
File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/core_client.py”, line 448, in init
with launch_core_engines(vllm_config, executor_class,
File “/usr/lib/python3.10/contextlib.py”, line 142, in exit
next(self.gen)
File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/utils.py”, line 732, in launch_core_engines
wait_for_engine_startup(
File “/data/juicefs_sharing_data/11188211/project2025/tool/vllm/vllm/v1/engine/utils.py”, line 785, in wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}
/usr/lib/python3.10/multiprocessing/resource_tracker.py:224: UserWarning: resource_tracker: There appear to be 3 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d ’