Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- Dockerfile +33 -14
- app.py +16 -7
Dockerfile
CHANGED
|
@@ -1,26 +1,45 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
|
| 3 |
-
#
|
| 4 |
-
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
COPY requirements.txt .
|
| 9 |
-
RUN pip install -r requirements.txt
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
# Expose Gradio port
|
| 12 |
EXPOSE 7860
|
| 13 |
|
| 14 |
-
# Start vLLM and Gradio
|
| 15 |
-
CMD vllm.
|
| 16 |
-
|
|
|
|
| 17 |
--enable-auto-tool-choice \
|
| 18 |
--tool-call-parser llama3_json \
|
| 19 |
-
--chat-template
|
| 20 |
--quantization bitsandbytes \
|
| 21 |
--load-format bitsandbytes \
|
| 22 |
--dtype half \
|
| 23 |
--max-model-len 8192 \
|
| 24 |
-
--download-dir models/vllm \
|
| 25 |
-
|
| 26 |
-
--port 8000 & python app.py
|
|
|
|
| 1 |
+
# Use Python 3.11 slim image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
|
| 4 |
+
# Ensure we run as root (the default) so we can set permissions
|
| 5 |
+
USER root
|
| 6 |
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# Set working directory
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
# Create and set permissions for cache directories
|
| 14 |
+
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
|
| 15 |
+
&& mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
|
| 16 |
+
|
| 17 |
+
# Set environment variables for cache directories
|
| 18 |
+
ENV HF_HOME=/app/.cache
|
| 19 |
+
ENV XDG_CACHE_HOME=/app/.cache
|
| 20 |
+
ENV MPLCONFIGDIR=/app/.config/matplotlib
|
| 21 |
+
ENV USER_AGENT="my-gradio-app"
|
| 22 |
+
|
| 23 |
+
# Copy the requirements file and install dependencies
|
| 24 |
COPY requirements.txt .
|
| 25 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 26 |
+
|
| 27 |
+
# Copy the rest of the application code
|
| 28 |
+
COPY . .
|
| 29 |
|
| 30 |
+
# Expose the port for Gradio (Spaces expects the app on port 7860)
|
| 31 |
EXPOSE 7860
|
| 32 |
|
| 33 |
+
# Start vLLM in the background and then the Gradio app
|
| 34 |
+
CMD bash -c "wget -O /tmp/tool_chat_template_llama3.1_json.jinja https://github.com/vllm-project/vllm/raw/refs/heads/main/examples/tool_chat_template_llama3.1_json.jinja && \
|
| 35 |
+
vllm.entrypoints.openai.api_server \
|
| 36 |
+
--model unsloth/llama-3-8b-Instruct-bnb-4bit \
|
| 37 |
--enable-auto-tool-choice \
|
| 38 |
--tool-call-parser llama3_json \
|
| 39 |
+
--chat-template /tmp/tool_chat_template_llama3.1_json.jinja \
|
| 40 |
--quantization bitsandbytes \
|
| 41 |
--load-format bitsandbytes \
|
| 42 |
--dtype half \
|
| 43 |
--max-model-len 8192 \
|
| 44 |
+
--download-dir models/vllm > vllm.log 2>&1 & \
|
| 45 |
+
python app.py"
|
|
|
app.py
CHANGED
|
@@ -2,7 +2,7 @@ from io import StringIO
|
|
| 2 |
import sys
|
| 3 |
|
| 4 |
import os
|
| 5 |
-
|
| 6 |
import gradio as gr
|
| 7 |
import json
|
| 8 |
import csv
|
|
@@ -31,7 +31,7 @@ from docling.chunking import HybridChunker
|
|
| 31 |
from langchain_community.document_loaders import WebBaseLoader
|
| 32 |
from urllib.parse import urlparse
|
| 33 |
|
| 34 |
-
|
| 35 |
from langchain_openai import ChatOpenAI
|
| 36 |
from langgraph.prebuilt import InjectedStore
|
| 37 |
from langgraph.store.base import BaseStore
|
|
@@ -56,6 +56,16 @@ logger = logging.getLogger(__name__)
|
|
| 56 |
logging.disable(logging.WARNING)
|
| 57 |
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
|
| 60 |
|
| 61 |
# =============================================================================
|
|
@@ -538,8 +548,7 @@ model = ChatOpenAI(
|
|
| 538 |
timeout=None,
|
| 539 |
max_retries=2,
|
| 540 |
api_key="not_required",
|
| 541 |
-
base_url="http://localhost:8000", # Use the VLLM instance URL
|
| 542 |
-
verbose=True
|
| 543 |
)
|
| 544 |
|
| 545 |
# model = ChatGroq(
|
|
@@ -866,7 +875,7 @@ graph = graph_builder.compile(checkpointer=checkpointer, store=in_memory_store)
|
|
| 866 |
import gradio as gr
|
| 867 |
from gradio import ChatMessage
|
| 868 |
|
| 869 |
-
system_prompt = "You are a helpful Assistant.
|
| 870 |
|
| 871 |
########################################
|
| 872 |
# Upload_documents
|
|
@@ -987,7 +996,7 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
|
|
| 987 |
textbox=gr.MultimodalTextbox(
|
| 988 |
file_count="multiple",
|
| 989 |
file_types=None,
|
| 990 |
-
sources="upload",
|
| 991 |
label="Type your query here:",
|
| 992 |
placeholder="Enter your question...",
|
| 993 |
),
|
|
@@ -999,4 +1008,4 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
|
|
| 999 |
|
| 1000 |
|
| 1001 |
if __name__ == "__main__":
|
| 1002 |
-
AI_Tutor.launch()
|
|
|
|
| 2 |
import sys
|
| 3 |
|
| 4 |
import os
|
| 5 |
+
from huggingface_hub import login
|
| 6 |
import gradio as gr
|
| 7 |
import json
|
| 8 |
import csv
|
|
|
|
| 31 |
from langchain_community.document_loaders import WebBaseLoader
|
| 32 |
from urllib.parse import urlparse
|
| 33 |
|
| 34 |
+
from langchain_groq import ChatGroq
|
| 35 |
from langchain_openai import ChatOpenAI
|
| 36 |
from langgraph.prebuilt import InjectedStore
|
| 37 |
from langgraph.store.base import BaseStore
|
|
|
|
| 56 |
logging.disable(logging.WARNING)
|
| 57 |
|
| 58 |
|
| 59 |
+
HF_TOKEN = os.getenv("HF_TOKEN") # Read from environment variable
|
| 60 |
+
if HF_TOKEN:
|
| 61 |
+
login(token=HF_TOKEN) # Log in to Hugging Face Hub
|
| 62 |
+
else:
|
| 63 |
+
print("Warning: HF_TOKEN not found in environment variables.")
|
| 64 |
+
|
| 65 |
+
# GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Read from environment variable
|
| 66 |
+
# if not GROQ_API_KEY:
|
| 67 |
+
# print("Warning: GROQ_API_KEY not found in environment variables.")
|
| 68 |
+
|
| 69 |
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
|
| 70 |
|
| 71 |
# =============================================================================
|
|
|
|
| 548 |
timeout=None,
|
| 549 |
max_retries=2,
|
| 550 |
api_key="not_required",
|
| 551 |
+
base_url="http://localhost:8000/v1", # Use the VLLM instance URL
|
|
|
|
| 552 |
)
|
| 553 |
|
| 554 |
# model = ChatGroq(
|
|
|
|
| 875 |
import gradio as gr
|
| 876 |
from gradio import ChatMessage
|
| 877 |
|
| 878 |
+
system_prompt = "You are a helpful Assistant. You will always use the tools available to you from {tools} to address user queries."
|
| 879 |
|
| 880 |
########################################
|
| 881 |
# Upload_documents
|
|
|
|
| 996 |
textbox=gr.MultimodalTextbox(
|
| 997 |
file_count="multiple",
|
| 998 |
file_types=None,
|
| 999 |
+
sources=["upload"],
|
| 1000 |
label="Type your query here:",
|
| 1001 |
placeholder="Enter your question...",
|
| 1002 |
),
|
|
|
|
| 1008 |
|
| 1009 |
|
| 1010 |
if __name__ == "__main__":
|
| 1011 |
+
AI_Tutor.launch(inline=True, debug=True)
|