Command
HIP_VISIBLE_DEVICES=0,1,2,3 python3
-m vllm.entrypoints.openai.api_server
–model $model
–port 8100
–max-model-len 10000
–quantization fp8
–dtype float16
–gpu-memory-utilization 0.9
–max-num-batched-token 10000
–tensor-parallel-size 4
–trust-remote-code
–kv-transfer-config
‘{“kv_connector”:“PyNcclConnector”,“kv_role”:“kv_producer”,“kv_rank”:0,“kv_parallel_size”:2,“kv_buffer_size”:5e9}’ &
HIP_VISIBLE_DEVICES=4,5,6,7 python3
-m vllm.entrypoints.openai.api_server
–model $model
–port 8200
–max-model-len 10000
–quantization fp8
–dtype float16
–gpu-memory-utilization 0.9
–max-num-batched-token 10000
–tensor-parallel-size 4
–trust-remote-code
–kv-transfer-config
‘{“kv_connector”:“PyNcclConnector”,“kv_role”:“kv_consumer”,“kv_rank”:1,“kv_parallel_size”:2,“kv_buffer_size”:5e9}’ &
qps=10, input len = 256, output len =256
Error
Traceback (most recent call last):
[rank0]: File “”, line 1, in
[rank0]: File “/opt/conda/envs/py_3.12/lib/python3.12/multiprocessing/spawn.py”, line 122, in spawn_main
[rank0]: exitcode = _main(fd, parent_sentinel)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File “/opt/conda/envs/py_3.12/lib/python3.12/multiprocessing/spawn.py”, line 135, in _main
[rank0]: return self._bootstrap(parent_sentinel)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File “/opt/conda/envs/py_3.12/lib/python3.12/multiprocessing/process.py”, line 332, in _bootstrap
[rank0]: threading._shutdown()
[rank0]: File “/opt/conda/envs/py_3.12/lib/python3.12/threading.py”, line 1594, in _shutdown
[rank0]: atexit_call()
[rank0]: File “/opt/conda/envs/py_3.12/lib/python3.12/concurrent/futures/thread.py”, line 31, in _python_exit
[rank0]: t.join()
[rank0]: File “/opt/conda/envs/py_3.12/lib/python3.12/threading.py”, line 1149, in join
[rank0]: self._wait_for_tstate_lock()
[rank0]: File “/opt/conda/envs/py_3.12/lib/python3.12/threading.py”, line 1169, in _wait_for_tstate_lock
[rank0]: if lock.acquire(block, timeout):
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File “/var/lib/jenkins/vllm/vllm/engine/multiprocessing/engine.py”, line 426, in signal_handler
[rank0]: raise KeyboardInterrupt(“MQLLMEngine terminated”)
[rank0]: KeyboardInterrupt: MQLLMEngine terminated
[2025-07-10 23:19:50,661] ERROR in app: Exception on request POST /v1/chat/completions
Traceback (most recent call last):
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohttp/connector.py”, line 1115, in _wrap_create_connection
sock = await aiohappyeyeballs.start_connection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohappyeyeballs/impl.py”, line 122, in start_connection
raise first_exception
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohappyeyeballs/impl.py”, line 73, in start_connection
sock = await _connect_sock(
^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohappyeyeballs/impl.py”, line 208, in _connect_sock
await loop.sock_connect(sock, address)
File “/opt/conda/envs/py_3.12/lib/python3.12/asyncio/selector_events.py”, line 651, in sock_connect
return await fut
^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/asyncio/selector_events.py”, line 691, in _sock_connect_cb
raise OSError(err, f’Connect call failed {address}')
ConnectionRefusedError: [Errno 111] Connect call failed (‘127.0.0.1’, 8100)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/quart/app.py”, line 1464, in handle_request
return await self.full_dispatch_request(request_context)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/quart/app.py”, line 1502, in full_dispatch_request
result = await self.handle_user_exception(error)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/quart/app.py”, line 1059, in handle_user_exception
raise error
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/quart/app.py”, line 1500, in full_dispatch_request
result = await self.dispatch_request(request_context)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/quart/app.py”, line 1597, in dispatch_request
return await self.ensure_async(handler)(**request_.view_args) # type: ignore[return-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/var/lib/jenkins/vllm/benchmarks/disagg_benchmarks/disagg_prefill_proxy_server.py”, line 87, in catch_all
async with session.request(method, upstream, headers=request.headers, json=data) as resp:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohttp/client.py”, line 1425, in aenter
self._resp: _RetType = await self._coro
^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohttp/client.py”, line 703, in _request
conn = await self._connector.connect(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohttp/connector.py”, line 548, in connect
proto = await self._create_connection(req, traces, timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohttp/connector.py”, line 1056, in _create_connection
_, proto = await self._create_direct_connection(req, traces, timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohttp/connector.py”, line 1406, in _create_direct_connection
raise last_exc
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohttp/connector.py”, line 1375, in _create_direct_connection
transp, proto = await self._wrap_create_connection(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/opt/conda/envs/py_3.12/lib/python3.12/site-packages/aiohttp/connector.py”, line 1130, in _wrap_create_connection
raise client_error(req.connection_key, exc) from exc
aiohttp.client_exceptions.ClientConnectorError: Cannot connect to host localhost:8100 ssl:default [Connect call failed (‘127.0.0.1’, 8100)]