基本环境:
vllm 0.9.1 自己编译的版本,pytorch 2.7.1,cu128。机器信息:HP z6g5,192G内存,显卡RTX PRO 6000。操作系统:Ubuntu24.04。
启动命令:
VLLM_ATTENTION_BACKEND=FLASHINFER vllm serve ~/models/Qwen3-32B-FP8 
–served-model-name Qwen3-32B 
–api-key htxt-ai 
–seed 3407 
–disable-log-requests 
–gpu-memory-utilization 0.90 
–host 0.0.0.0 --port 6006 
–max-model-len 64000 
–dtype bfloat16 
–trust-remote-code 
–max-num-seqs 32
错误信息如下:
ERROR 07-04 13:17:38 [core.py:515] EngineCore failed to start.
ERROR 07-04 13:17:38 [core.py:515] Traceback (most recent call last):
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 506, in run_engine_core
ERROR 07-04 13:17:38 [core.py:515]     engine_core = EngineCoreProc(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 390, in init
ERROR 07-04 13:17:38 [core.py:515]     super().init(vllm_config, executor_class, log_stats,
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 83, in init
ERROR 07-04 13:17:38 [core.py:515]     self._initialize_kv_caches(vllm_config)
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 141, in _initialize_kv_caches
ERROR 07-04 13:17:38 [core.py:515]     available_gpu_memory = self.model_executor.determine_available_memory()
ERROR 07-04 13:17:38 [core.py:515]                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/executor/abstract.py”, line 76, in determine_available_memory
ERROR 07-04 13:17:38 [core.py:515]     output = self.collective_rpc(“determine_available_memory”)
ERROR 07-04 13:17:38 [core.py:515]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/executor/uniproc_executor.py”, line 57, in collective_rpc
ERROR 07-04 13:17:38 [core.py:515]     answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 07-04 13:17:38 [core.py:515]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/utils.py”, line 2671, in run_method
ERROR 07-04 13:17:38 [core.py:515]     return func(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/utils/_contextlib.py”, line 116, in decorate_context
ERROR 07-04 13:17:38 [core.py:515]     return func(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py”, line 205, in determine_available_memory
ERROR 07-04 13:17:38 [core.py:515]     self.model_runner.profile_run()
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py”, line 2012, in profile_run
ERROR 07-04 13:17:38 [core.py:515]     hidden_states = self._dummy_run(self.max_num_tokens)
ERROR 07-04 13:17:38 [core.py:515]                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File "/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/utils/contextlib.py", line 116, in decorate_context
ERROR 07-04 13:17:38 [core.py:515]     return func(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py”, line 1847, in dummy_run
ERROR 07-04 13:17:38 [core.py:515]     outputs = model(
ERROR 07-04 13:17:38 [core.py:515]               ^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1751, in wrapped_call_impl
ERROR 07-04 13:17:38 [core.py:515]     return self.call_impl(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1762, in call_impl
ERROR 07-04 13:17:38 [core.py:515]     return forward_call(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen3.py”, line 301, in forward
ERROR 07-04 13:17:38 [core.py:515]     hidden_states = self.model(input_ids, positions, intermediate_tensors,
ERROR 07-04 13:17:38 [core.py:515]                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/compilation/decorators.py”, line 239, in call
ERROR 07-04 13:17:38 [core.py:515]     output = self.compiled_callable(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File "/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/dynamo/eval_frame.py", line 655, in fn
ERROR 07-04 13:17:38 [core.py:515]     return fn(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py”, line 336, in forward
ERROR 07-04 13:17:38 [core.py:515]     def forward(
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1751, in wrapped_call_impl
ERROR 07-04 13:17:38 [core.py:515]     return self.call_impl(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1762, in call_impl
ERROR 07-04 13:17:38 [core.py:515]     return forward_call(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File "/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/dynamo/eval_frame.py", line 838, in fn
ERROR 07-04 13:17:38 [core.py:515]     return fn(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/fx/graph_module.py”, line 830, in call_wrapped
ERROR 07-04 13:17:38 [core.py:515]     return self.wrapped_call(self, *args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/fx/graph_module.py”, line 406, in call
ERROR 07-04 13:17:38 [core.py:515]     raise e
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/fx/graph_module.py”, line 393, in call
ERROR 07-04 13:17:38 [core.py:515]     return super(self.cls, obj).call(*args, **kwargs)  # type: ignore[misc]
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1751, in wrapped_call_impl
ERROR 07-04 13:17:38 [core.py:515]     return self.call_impl(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1762, in call_impl
ERROR 07-04 13:17:38 [core.py:515]     return forward_call(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “<eval_with_key>.130”, line 778, in forward
ERROR 07-04 13:17:38 [core.py:515]     submod_0 = self.submod_0(l_input_ids, s0, l_self_modules_embed_tokens_parameters_weight, l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_scale_inv, l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight, l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight, l_positions, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache);  l_input_ids = l_self_modules_embed_tokens_parameters_weight = l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_scale_inv = l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight = l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight = None
ERROR 07-04 13:17:38 [core.py:515]                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/compilation/cuda_piecewise_backend.py”, line 111, in call
ERROR 07-04 13:17:38 [core.py:515]     return self.compiled_graph_for_general_shape(*args)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py”, line 838, in _fn
ERROR 07-04 13:17:38 [core.py:515]     return fn(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/aot_autograd.py”, line 1209, in forward
ERROR 07-04 13:17:38 [core.py:515]     return compiled_fn(full_args)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py”, line 328, in runtime_wrapper
ERROR 07-04 13:17:38 [core.py:515]     all_outs = call_func_at_runtime_with_args(
ERROR 07-04 13:17:38 [core.py:515]                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/utils.py”, line 126, in call_func_at_runtime_with_args
ERROR 07-04 13:17:38 [core.py:515]     out = normalize_as_list(f(args))
ERROR 07-04 13:17:38 [core.py:515]                             ^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py”, line 689, in inner_fn
ERROR 07-04 13:17:38 [core.py:515]     outs = compiled_fn(args)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py”, line 495, in wrapper
ERROR 07-04 13:17:38 [core.py:515]     return compiled_fn(runtime_args)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_inductor/output_code.py”, line 460, in call
ERROR 07-04 13:17:38 [core.py:515]     return self.current_callable(inputs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_inductor/utils.py”, line 2404, in run
ERROR 07-04 13:17:38 [core.py:515]     return model(new_inputs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/.cache/vllm/torch_compile_cache/24fba47138/rank_0_0/inductor_cache/qv/cqvtomjpfvuxt5j6wimhfjiujwga4fgjjnth3npvhuixtbsqjpkg.py”, line 435, in call
ERROR 07-04 13:17:38 [core.py:515]     buf3 = torch.ops.vllm.apply_w8a8_block_fp8_linear.default(buf2, arg4_1, [128, 128], arg5_1)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_ops.py”, line 756, in call
ERROR 07-04 13:17:38 [core.py:515]     return self._op(*args, **kwargs)
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/quantization/utils/fp8_utils.py”, line 141, in apply_w8a8_block_fp8_linear
ERROR 07-04 13:17:38 [core.py:515]     output = w8a8_blockscale_func(q_input, weight, x_scale, weight_scale,
ERROR 07-04 13:17:38 [core.py:515]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/quantization/utils/fp8_utils.py”, line 39, in cutlass_scaled_mm
ERROR 07-04 13:17:38 [core.py:515]     return ops.cutlass_scaled_mm(A,
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/_custom_ops.py”, line 717, in cutlass_scaled_mm
ERROR 07-04 13:17:38 [core.py:515]     torch.ops._C.cutlass_scaled_mm(out, a, b, scale_a, scale_b, bias)
ERROR 07-04 13:17:38 [core.py:515]   File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_ops.py”, line 1158, in call
ERROR 07-04 13:17:38 [core.py:515]     return self._op(*args, **(kwargs or {}))
ERROR 07-04 13:17:38 [core.py:515]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 07-04 13:17:38 [core.py:515] RuntimeError: Error Internal
Process EngineCore_0:
Traceback (most recent call last):
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/multiprocessing/process.py”, line 314, in _bootstrap
self.run()
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/multiprocessing/process.py”, line 108, in run
self._target(*self._args, **self._kwargs)
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 519, in run_engine_core
raise e
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 506, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 390, in init
super().init(vllm_config, executor_class, log_stats,
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 83, in init
self._initialize_kv_caches(vllm_config)
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core.py”, line 141, in _initialize_kv_caches
available_gpu_memory = self.model_executor.determine_available_memory()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/executor/abstract.py”, line 76, in determine_available_memory
output = self.collective_rpc(“determine_available_memory”)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/executor/uniproc_executor.py”, line 57, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/utils.py”, line 2671, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/utils/_contextlib.py”, line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py”, line 205, in determine_available_memory
self.model_runner.profile_run()
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py”, line 2012, in profile_run
hidden_states = self._dummy_run(self.max_num_tokens)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/utils/contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py”, line 1847, in dummy_run
outputs = model(
^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1751, in wrapped_call_impl
return self.call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1762, in call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen3.py”, line 301, in forward
hidden_states = self.model(input_ids, positions, intermediate_tensors,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/compilation/decorators.py”, line 239, in call
output = self.compiled_callable(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/dynamo/eval_frame.py", line 655, in fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py”, line 336, in forward
def forward(
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1751, in wrapped_call_impl
return self.call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1762, in call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/dynamo/eval_frame.py", line 838, in fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/fx/graph_module.py”, line 830, in call_wrapped
return self.wrapped_call(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/fx/graph_module.py”, line 406, in call
raise e
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/fx/graph_module.py”, line 393, in call
return super(self.cls, obj).call(*args, **kwargs)  # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1751, in wrapped_call_impl
return self.call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/nn/modules/module.py”, line 1762, in call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “<eval_with_key>.130”, line 778, in forward
submod_0 = self.submod_0(l_input_ids, s0, l_self_modules_embed_tokens_parameters_weight, l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_scale_inv, l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight, l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight, l_positions, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache);  l_input_ids = l_self_modules_embed_tokens_parameters_weight = l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_scale_inv = l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight = l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight = None
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/compilation/cuda_piecewise_backend.py”, line 111, in call
return self.compiled_graph_for_general_shape(*args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py”, line 838, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/aot_autograd.py”, line 1209, in forward
return compiled_fn(full_args)
^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py”, line 328, in runtime_wrapper
all_outs = call_func_at_runtime_with_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/utils.py”, line 126, in call_func_at_runtime_with_args
out = normalize_as_list(f(args))
^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py”, line 689, in inner_fn
outs = compiled_fn(args)
^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py”, line 495, in wrapper
return compiled_fn(runtime_args)
^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_inductor/output_code.py”, line 460, in call
return self.current_callable(inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_inductor/utils.py”, line 2404, in run
return model(new_inputs)
^^^^^^^^^^^^^^^^^
File “/home/blackfog/.cache/vllm/torch_compile_cache/24fba47138/rank_0_0/inductor_cache/qv/cqvtomjpfvuxt5j6wimhfjiujwga4fgjjnth3npvhuixtbsqjpkg.py”, line 435, in call
buf3 = torch.ops.vllm.apply_w8a8_block_fp8_linear.default(buf2, arg4_1, [128, 128], arg5_1)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_ops.py”, line 756, in call
return self._op(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/quantization/utils/fp8_utils.py”, line 141, in apply_w8a8_block_fp8_linear
output = w8a8_blockscale_func(q_input, weight, x_scale, weight_scale,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/quantization/utils/fp8_utils.py”, line 39, in cutlass_scaled_mm
return ops.cutlass_scaled_mm(A,
^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/_custom_ops.py”, line 717, in cutlass_scaled_mm
torch.ops._C.cutlass_scaled_mm(out, a, b, scale_a, scale_b, bias)
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/torch/_ops.py”, line 1158, in call
return self._op(*args, **(kwargs or {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Error Internal
[rank0]:[W704 13:17:40.856694377 ProcessGroupNCCL.cpp:1479] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see Distributed communication package - torch.distributed — PyTorch 2.7 documentation (function operator())
Traceback (most recent call last):
File “/home/blackfog/miniconda3/envs/vllm/bin/vllm”, line 8, in 
sys.exit(main())
^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/cli/main.py”, line 59, in main
args.dispatch_function(args)
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/cli/serve.py”, line 58, in cmd
uvloop.run(run_server(args))
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/uvloop/init.py”, line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/asyncio/runners.py”, line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/asyncio/runners.py”, line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “uvloop/loop.pyx”, line 1518, in uvloop.loop.Loop.run_until_complete
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/uvloop/init.py”, line 61, in wrapper
return await main
^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py”, line 1323, in run_server
await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py”, line 1343, in run_server_worker
async with build_async_engine_client(args, client_config) as engine_client:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/contextlib.py”, line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py”, line 155, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/contextlib.py”, line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py”, line 191, in build_async_engine_client_from_engine_args
async_llm = AsyncLLM.from_vllm_config(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py”, line 162, in from_vllm_config
return cls(
^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py”, line 124, in init
self.engine_core = EngineCoreClient.make_async_mp_client(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py”, line 93, in make_async_mp_client
return AsyncMPClient(vllm_config, executor_class, log_stats,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py”, line 716, in init
super().init(
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py”, line 422, in init
self._init_engines_direct(vllm_config, local_only,
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py”, line 491, in _init_engines_direct
self._wait_for_engine_startup(handshake_socket, input_address,
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/engine/core_client.py”, line 511, in _wait_for_engine_startup
wait_for_engine_startup(
File “/home/blackfog/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/v1/utils.py”, line 494, in wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}