Spaces:

Sumkh
/

AgenticRAG

Sleeping

App Files Files Community

Sumkh commited on Feb 23

Commit

b29e1e3

verified ·

1 Parent(s): 4a1735e

Upload 3 files

Browse files

Files changed (2) hide show

Dockerfile +33 -14
app.py +16 -7

Dockerfile CHANGED Viewed

@@ -1,26 +1,45 @@
-FROM python:3.9-slim
-# Install vLLM dependencies
-RUN pip install vllm gradio bitsandbytes transformers accelerate wget
-# Copy your Gradio app files
-COPY app.py .
 COPY requirements.txt .
-RUN pip install -r requirements.txt
-# Expose Gradio port
 EXPOSE 7860
-# Start vLLM and Gradio
-CMD vllm.entrypoints.openai.api_server \
-    --model /app/models \
     --enable-auto-tool-choice \
     --tool-call-parser llama3_json \
-    --chat-template examples/tool_chat_template_llama3.1_json.jinja \
     --quantization bitsandbytes \
     --load-format bitsandbytes \
     --dtype half \
     --max-model-len 8192 \
-    --download-dir models/vllm \
-    --host 0.0.0.0 \
-    --port 8000 & python app.py

+# Use Python 3.11 slim image
+FROM python:3.11-slim
+# Ensure we run as root (the default) so we can set permissions
+USER root
+# Install system dependencies
+RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Create and set permissions for cache directories
+RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
+    && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
+# Set environment variables for cache directories
+ENV HF_HOME=/app/.cache
+ENV XDG_CACHE_HOME=/app/.cache
+ENV MPLCONFIGDIR=/app/.config/matplotlib
+ENV USER_AGENT="my-gradio-app"
+# Copy the requirements file and install dependencies
 COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . .
+# Expose the port for Gradio (Spaces expects the app on port 7860)
 EXPOSE 7860
+# Start vLLM in the background and then the Gradio app
+CMD bash -c "wget -O /tmp/tool_chat_template_llama3.1_json.jinja https://github.com/vllm-project/vllm/raw/refs/heads/main/examples/tool_chat_template_llama3.1_json.jinja && \
+    vllm.entrypoints.openai.api_server \
+    --model unsloth/llama-3-8b-Instruct-bnb-4bit \
     --enable-auto-tool-choice \
     --tool-call-parser llama3_json \
+    --chat-template /tmp/tool_chat_template_llama3.1_json.jinja \
     --quantization bitsandbytes \
     --load-format bitsandbytes \
     --dtype half \
     --max-model-len 8192 \
+    --download-dir models/vllm > vllm.log 2>&1 & \
+    python app.py"

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ from io import StringIO
 import sys
 import os
-#from huggingface_hub import login
 import gradio as gr
 import json
 import csv
@@ -31,7 +31,7 @@ from docling.chunking import HybridChunker
 from langchain_community.document_loaders import WebBaseLoader
 from urllib.parse import urlparse
-#from langchain_groq import ChatGroq
 from langchain_openai import ChatOpenAI
 from langgraph.prebuilt import InjectedStore
 from langgraph.store.base import BaseStore
@@ -56,6 +56,16 @@ logger = logging.getLogger(__name__)
 logging.disable(logging.WARNING)
 EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
 # =============================================================================
@@ -538,8 +548,7 @@ model = ChatOpenAI(
     timeout=None,
     max_retries=2,
     api_key="not_required",
-    base_url="http://localhost:8000", # Use the VLLM instance URL
-    verbose=True
 )
 # model = ChatGroq(
@@ -866,7 +875,7 @@ graph = graph_builder.compile(checkpointer=checkpointer, store=in_memory_store)
 import gradio as gr
 from gradio import ChatMessage
-system_prompt = "You are a helpful Assistant. Always use the tools {tools}."
 ########################################
 # Upload_documents
@@ -987,7 +996,7 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
         textbox=gr.MultimodalTextbox(
             file_count="multiple",
             file_types=None,
-            sources="upload",
             label="Type your query here:",
             placeholder="Enter your question...",
         ),
@@ -999,4 +1008,4 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
 if __name__ == "__main__":
-    AI_Tutor.launch()

 import sys
 import os
+from huggingface_hub import login
 import gradio as gr
 import json
 import csv
 from langchain_community.document_loaders import WebBaseLoader
 from urllib.parse import urlparse
+from langchain_groq import ChatGroq
 from langchain_openai import ChatOpenAI
 from langgraph.prebuilt import InjectedStore
 from langgraph.store.base import BaseStore
 logging.disable(logging.WARNING)
+HF_TOKEN = os.getenv("HF_TOKEN")  # Read from environment variable
+if HF_TOKEN:
+    login(token=HF_TOKEN)  # Log in to Hugging Face Hub
+else:
+    print("Warning: HF_TOKEN not found in environment variables.")
+# GROQ_API_KEY = os.getenv("GROQ_API_KEY")  # Read from environment variable
+# if not GROQ_API_KEY:
+#     print("Warning: GROQ_API_KEY not found in environment variables.")
 EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
 # =============================================================================
     timeout=None,
     max_retries=2,
     api_key="not_required",
+    base_url="http://localhost:8000/v1", # Use the VLLM instance URL
 )
 # model = ChatGroq(
 import gradio as gr
 from gradio import ChatMessage
+system_prompt = "You are a helpful Assistant. You will always use the tools available to you from {tools} to address user queries."
 ########################################
 # Upload_documents
         textbox=gr.MultimodalTextbox(
             file_count="multiple",
             file_types=None,
+            sources=["upload"],
             label="Type your query here:",
             placeholder="Enter your question...",
         ),
 if __name__ == "__main__":
+    AI_Tutor.launch(inline=True, debug=True)