Here is the code, it’s very short. Using vllm v0.9.2
import asyncio
from typing import Optional
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.outputs import RequestOutput
from vllm.sampling_params import SamplingParams
from vllm.usage.usage_lib import UsageContext
async def main():
engine_args = AsyncEngineArgs(
model="Qwen/Qwen2.5-0.5B",
dtype="auto",
max_model_len=2048,
enforce_eager=True,
)
from vllm.v1.engine.async_llm import AsyncLLM
# engine_client = AsyncLLM.from_engine_args(engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
usage_context = UsageContext.OPENAI_API_SERVER
vllm_config = engine_args.create_engine_config(usage_context=usage_context)
engine_client = AsyncLLM.from_vllm_config(
vllm_config=vllm_config,
usage_context=usage_context,
disable_log_requests=engine_args.disable_log_requests,
disable_log_stats=engine_args.disable_log_stats)
sampling_params = SamplingParams(
max_tokens=256,
n=2,
)
prompt = "output a sentence with 10 words"
final_output: Optional[RequestOutput] = None
async for output in engine_client.generate(
prompt=prompt,
sampling_params=sampling_params,
request_id="abcdef",
):
if final_output:
print(len(final_output.outputs))
else:
print("none")
final_output = output
if final_output:
print(final_output.outputs)
if __name__ == "__main__":
asyncio.run(main())
only got one output in the end.
output:
2
2
...
2
1
...
1
[CompletionOutput(index=0, text='. deadly language is deleting to record unimaginy dishing untouchable utilityfulness it cuchous inc ritims on the indian boarding using everything that is a queer across unnerving possessions. darming is waiting for all the wrong ends are with the length skull, kidding once again in greed neo nation.', token_ids=[13, 24142, 4128, 374, 33011, 311, 3255, 86112, 88, 294, 10976, 13065, 3026, 480, 15549, 30414, 432, 272, 1387, 782, 3625, 21198, 5742, 389, 279, 42225, 48969, 1667, 4297, 429, 374, 264, 54541, 3941, 16950, 19505, 52535, 13, 294, 32902, 374, 8580, 369, 678, 279, 4969, 10335, 525, 448, 279, 3084, 34013, 11, 55725, 3055, 1549, 304, 55826, 35082, 6995, 13, 151643], cumulative_logprob=None, logprobs=None, finish_reason=stop, stop_reason=None)]