Spaces:

Sumkh
/

AgenticRAG

Sleeping

Sumkh commited on Feb 24

Commit

edd1153

verified ·

1 Parent(s): 6d838bd

Update start.sh

Files changed (1) hide show

start.sh CHANGED Viewed

@@ -3,9 +3,11 @@
 export HF_HOME=/app/.cache
 mkdir -p /app/.cache
-# Launch the vLLM server using the proper subcommand (e.g., 'serve')
-vllm serve \
-  --model unsloth/llama-3-8b-Instruct-bnb-4bit \
   --enable-auto-tool-choice \
   --tool-call-parser llama3_json \
   --chat-template examples/tool_chat_template_llama3.1_json.jinja \
@@ -14,8 +16,8 @@ vllm serve \
   --dtype half \
   --max-model-len 8192 &
-# Allow some time for vLLM to initialize
 sleep 10
-# Start the Gradio app (use python3 if python isn’t found)
-python3 app.py

 export HF_HOME=/app/.cache
 mkdir -p /app/.cache
+# Optionally set a USER_AGENT to identify your requests
+export USER_AGENT="vllm_huggingface_space"
+# Launch the vLLM server with the model tag as a positional argument
+vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \
   --enable-auto-tool-choice \
   --tool-call-parser llama3_json \
   --chat-template examples/tool_chat_template_llama3.1_json.jinja \
   --dtype half \
   --max-model-len 8192 &
+# Wait to ensure the vLLM server is fully started (adjust if needed)
 sleep 10
+# Start the Gradio application using python3
+python3 app.py