Spaces:

Sumkh
/

AgenticRAG

Sleeping

AgenticRAG / start.sh

Upload 4 files

cd22bfc verified 10 months ago

768 Bytes

	#!/bin/bash
	# Set a writable cache directory for Hugging Face Hub
	export HF_HOME=/app/.cache
	export XDG_CONFIG_HOME=/app/.config
	mkdir -p /app/.cache

	# Optionally set a USER_AGENT to identify your requests
	export USER_AGENT="vllm_huggingface_space"

	# Launch the vLLM server with the model tag as a positional argument
	vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \
	--enable-auto-tool-choice \
	--tool-call-parser llama3_json \
	--chat-template examples/tool_chat_template_llama3.1_json.jinja \
	--quantization bitsandbytes \
	--load-format bitsandbytes \
	--dtype half \
	--enforce-eager \
	--max-model-len 8192 &

	# Wait to ensure the vLLM server is fully started (adjust if needed)
	sleep 10

	# Start the Gradio application using python3
	python3 app.py