File size: 545 Bytes
f189c3b
 
 
b99c6a9
f189c3b
 
 
 
 
 
 
 
 
 
 
 
 
872a46f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/bin/bash
# Start vLLM server in the background with your custom flags
vllm \
  --runtime nvidia --gpus all --ipc=host \
  --model unsloth/llama-3-8b-Instruct-bnb-4bit \
  --enable-auto-tool-choice \
  --tool-call-parser llama3_json \
  --chat-template examples/tool_chat_template_llama3.1_json.jinja \
  --quantization bitsandbytes \
  --load-format bitsandbytes \
  --dtype half \
  --max-model-len 8192 &

# Allow some time for the vLLM server to start up (adjust if needed)
sleep 10

# Launch the Gradio chatbot application
python3 app.py