Spaces:
Sleeping
Sleeping
| # Set a writable cache directory for Hugging Face Hub | |
| export HF_HOME=/app/.cache | |
| export XDG_CONFIG_HOME=/app/.config | |
| mkdir -p /app/.cache | |
| # Optionally set a USER_AGENT to identify your requests | |
| export USER_AGENT="vllm_huggingface_space" | |
| # Launch the vLLM server with the model tag as a positional argument | |
| vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \ | |
| --enable-auto-tool-choice \ | |
| --tool-call-parser llama3_json \ | |
| --chat-template examples/tool_chat_template_llama3.1_json.jinja \ | |
| --quantization bitsandbytes \ | |
| --load-format bitsandbytes \ | |
| --dtype half \ | |
| --enforce-eager \ | |
| --max-model-len 8192 & | |
| # Wait to ensure the vLLM server is fully started (adjust if needed) | |
| sleep 10 | |
| # Start the Gradio application using python3 | |
| python3 app.py | |