Spaces:

Sumkh
/

AgenticRAG

Sleeping

App Files Files Community

Sumkh commited on Feb 23

Commit

dfd3125

verified ·

1 Parent(s): ad4e9f2

Upload 3 files

Browse files

Files changed (2) hide show

Dockerfile +13 -15
app.py +1 -1

Dockerfile CHANGED Viewed

@@ -1,38 +1,36 @@
-# Use Python 3.11 slim image
-FROM python:3.11-slim
-# Ensure we run as root (the default) so we can set permissions
-USER root
-# Install system dependencies
 RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
-# Set working directory
-WORKDIR /app
-# Create and set permissions for cache directories
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
     && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
-# Set environment variables for cache directories
 ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
 ENV MPLCONFIGDIR=/app/.config/matplotlib
 ENV USER_AGENT="my-gradio-app"
-# Copy the requirements file and install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the rest of the application code
 COPY . .
 # Expose the port for Gradio (Spaces expects the app on port 7860)
 EXPOSE 7860
-# Start vLLM in the background and then the Gradio app
 CMD bash -c "vllm.entrypoints.openai.api_server \
-    --model unsloth/llama-3-8b-Instruct-bnb-4bit \
     --enable-auto-tool-choice \
     --tool-call-parser llama3_json \
     --chat-template examples/tool_chat_template_llama3.1_json.jinja \
@@ -40,4 +38,4 @@ CMD bash -c "vllm.entrypoints.openai.api_server \
     --load-format bitsandbytes \
     --dtype half \
     --max-model-len 8192 \
-    python app.py"

+# Use the official vLLM Docker image as the base image
+FROM vllm/vllm-openai:latest
+# Set the working directory
+WORKDIR /app
+# Install system dependencies if needed (e.g., wget)
 RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
+# Set up writable cache directories (for Hugging Face, matplotlib, etc.)
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
     && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
+# Set environment variables for cache directories and user agent
 ENV HF_HOME=/app/.cache
 ENV XDG_CACHE_HOME=/app/.cache
 ENV MPLCONFIGDIR=/app/.config/matplotlib
 ENV USER_AGENT="my-gradio-app"
+# Copy the requirements file and install additional Python dependencies (e.g., gradio)
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy your application code (including app.py and any other needed files)
 COPY . .
 # Expose the port for Gradio (Spaces expects the app on port 7860)
 EXPOSE 7860
+# Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
+# Replace 'new-model-name:latest' with your new model's identifier.
 CMD bash -c "vllm.entrypoints.openai.api_server \
+    --model new-model-name:latest \
     --enable-auto-tool-choice \
     --tool-call-parser llama3_json \
     --chat-template examples/tool_chat_template_llama3.1_json.jinja \
     --load-format bitsandbytes \
     --dtype half \
     --max-model-len 8192 \
+    > vllm.log 2>&1 & python app.py"

app.py CHANGED Viewed

@@ -1008,4 +1008,4 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
 if __name__ == "__main__":
-    AI_Tutor.launch(inline=True, debug=True)


1008
1009
1010	if __name__ == "__main__":
1011	+ AI_Tutor.launch(server_name="0.0.0.0", server_port=7860)