Spaces:
Sleeping
Sleeping
File size: 545 Bytes
f189c3b b99c6a9 f189c3b 872a46f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
#!/bin/bash
# Start vLLM server in the background with your custom flags
vllm \
--runtime nvidia --gpus all --ipc=host \
--model unsloth/llama-3-8b-Instruct-bnb-4bit \
--enable-auto-tool-choice \
--tool-call-parser llama3_json \
--chat-template examples/tool_chat_template_llama3.1_json.jinja \
--quantization bitsandbytes \
--load-format bitsandbytes \
--dtype half \
--max-model-len 8192 &
# Allow some time for the vLLM server to start up (adjust if needed)
sleep 10
# Launch the Gradio chatbot application
python3 app.py
|