Sumkh commited on
Commit
edd1153
·
verified ·
1 Parent(s): 6d838bd

Update start.sh

Browse files
Files changed (1) hide show
  1. start.sh +8 -6
start.sh CHANGED
@@ -3,9 +3,11 @@
3
  export HF_HOME=/app/.cache
4
  mkdir -p /app/.cache
5
 
6
- # Launch the vLLM server using the proper subcommand (e.g., 'serve')
7
- vllm serve \
8
- --model unsloth/llama-3-8b-Instruct-bnb-4bit \
 
 
9
  --enable-auto-tool-choice \
10
  --tool-call-parser llama3_json \
11
  --chat-template examples/tool_chat_template_llama3.1_json.jinja \
@@ -14,8 +16,8 @@ vllm serve \
14
  --dtype half \
15
  --max-model-len 8192 &
16
 
17
- # Allow some time for vLLM to initialize
18
  sleep 10
19
 
20
- # Start the Gradio app (use python3 if python isn’t found)
21
- python3 app.py
 
3
  export HF_HOME=/app/.cache
4
  mkdir -p /app/.cache
5
 
6
+ # Optionally set a USER_AGENT to identify your requests
7
+ export USER_AGENT="vllm_huggingface_space"
8
+
9
+ # Launch the vLLM server with the model tag as a positional argument
10
+ vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \
11
  --enable-auto-tool-choice \
12
  --tool-call-parser llama3_json \
13
  --chat-template examples/tool_chat_template_llama3.1_json.jinja \
 
16
  --dtype half \
17
  --max-model-len 8192 &
18
 
19
+ # Wait to ensure the vLLM server is fully started (adjust if needed)
20
  sleep 10
21
 
22
+ # Start the Gradio application using python3
23
+ python3 app.py