我使用的是v0.4.0
我启动vllm 的脚本是
#!/bin/bash
vLLM LoRA 启动脚本
用法: ./start_vllm_lora.sh <lora_count>
示例: ./start_vllm_lora.sh 100
检查参数
export CUDA_VISIBLE_DEVICES=3
if [ $# -eq 0 ]; then
echo “错误: 请提供LoRA数量参数”
echo “用法: $0 <lora_count>”
echo “示例: $0 100”
exit 1
fi
LORA_COUNT=$1
验证参数是否为正整数
if ! [[ “$LORA_COUNT” =~ [1][0-9]*$ ]]; then
echo “错误: LoRA数量必须是正整数”
exit 1
fi
echo “启动vLLM服务器,LoRA数量: $LORA_COUNT”
基础配置
PYTHON_PATH=“python”
SCRIPT_PATH=“/root/vllm/vllm/vllm/entrypoints/openai/api_server.py”
MODEL_NAME=“meta-llama/Meta-Llama-3-8B”
LORA_MODEL_PATH=“/root/data/huggingface/models–yspkm–Meta-Llama-3-8B-lora-math/snapshots/c4f4a7e5b889e3caaab2a222bad4a368a14302b0”
构建LoRA模块参数 - 修复格式
LORA_MODULES_ARGS=“”
for ((i=0; i<$LORA_COUNT; i++)); do
LORA_MODULES_ARGS=“$LORA_MODULES_ARGS --lora-modules llama-lora$i=$LORA_MODEL_PATH”
done
执行命令
exec $PYTHON_PATH $SCRIPT_PATH \
–model $MODEL_NAME \
$LORA_MODULES_ARGS \
–enable-lora \
–port 8090 \
–max-lora-rank 32 \
–block-size 32 \
–enable-prefix-caching \
–max-loras $LORA_COUNT
当我试图用lora进行问答时,发生了下面的错误:
curl -X POST \http://0.0.0.0:8090/v1/completions \
-H “Content-Type: application/json” \
-d '{
“model”: “llama-lora0”,
“prompt”: “hello”,
“max_tokens”: 100,
“temperature”: 0.0,
“stream”: false,
“ignore_eos”: true
}’
{“object”:“error”,“message”:“Unrecognized model in /root/data/huggingface/models–yspkm–Meta-Llama-3-8B-lora-math/snapshots/c4f4a7e5b889e3caaab2a222bad4a368a14302b0. Should have a model_type key in its config.json, or contain one of the following strings in its name: albert, align, altclip, arcee, aria, aria_text, audio-spectrogram-transformer, autoformer, aya_vision, bamba, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, bitnet, blenderbot, blenderbot-small, blip, blip-2, blip_2_qformer, bloom, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_text_model, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, cohere2, colpali, colqwen2, conditional_detr, convbert, convnext, convnextv2, cpmant, csm, ctrl, cvt, d_fine, dab-detr, dac, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deepseek_v3, deformable_detr, deit, depth_anything, depth_pro, deta, detr, dia, diffllama, dinat, dinov2, dinov2_with_registers, distilbert, donut-swin, dots1, dpr, dpt, efficientformer, efficientnet, electra, emu3, encodec, encoder-decoder, ernie, ernie_m, esm, falcon, falcon_h1, falcon_mamba, fastspeech2_conformer, flaubert, flava, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, gemma3, gemma3_text, gemma3n, gemma3n_audio, gemma3n_text, gemma3n_vision, git, glm, glm4, glm4v, glm4v_text, glpn, got_ocr2, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gptj, gptsan-japanese, granite, granite_speech, granitemoe, granitemoehybrid, granitemoeshared, granitevision, graphormer, grounding-dino, groupvit, helium, hgnet_v2, hiera, hubert, ibert, idefics, idefics2, idefics3, idefics3_vision, ijepa, imagegpt, informer, instructblip, instructblipvideo, internvl, internvl_vision, jamba, janus, jetmoe, jukebox, kosmos-2, kyutai_speech_to_text, layoutlm, layoutlmv2, layoutlmv3, led, levit, lightglue, lilt, llama, llama4, llama4_text, llava, llava_next, llava_next_video, llava_onevision, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, mgp-str, mimi, minimax, mistral, mistral3, mixtral, mlcd, mllama, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, modernbert, moonshine, moshi, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, olmo2, olmoe, omdet-turbo, oneformer, open-llama, openai-gpt, opt, owlv2, owlvit, paligemma, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, persimmon, phi, phi3, phi4_multimodal, phimoe, pix2struct, pixtral, plbart, poolformer, pop2piano, prompt_depth_anything, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_5_omni, qwen2_5_vl, qwen2_5_vl_text, qwen2_audio, qwen2_audio_encoder, qwen2_moe, qwen2_vl, qwen2_vl_text, qwen3, qwen3_moe, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rt_detr_v2, rwkv, sam, sam_hq, sam_hq_vision_model, sam_vision_model, seamless_m4t, seamless_m4t_v2, segformer, seggpt, sew, sew-d, shieldgemma2, siglip, siglip2, siglip_vision_model, smollm3, smolvlm, smolvlm_vision, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superglue, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, t5gemma, table-transformer, tapas, textnet, time_series_transformer, timesfm, timesformer, timm_backbone, timm_wrapper, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vitpose, vitpose_backbone, vits, vivit, vjepa2, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xmod, yolos, yoso, zamba, zamba2, zoedepth”,“type”:“BadRequestError”,“param”:null,“code”:400}%
可是,他说的文件/root/data/huggingface/models–yspkm–Meta-Llama-3-8B-lora-math/snapshots/c4f4a7e5b889e3caaab2a222bad4a368a14302b0并不是一个model,而是一个lora
1-9 ↩︎