Sumkh commited on
Commit
b29e1e3
·
verified ·
1 Parent(s): 4a1735e

Upload 3 files

Browse files
Files changed (2) hide show
  1. Dockerfile +33 -14
  2. app.py +16 -7
Dockerfile CHANGED
@@ -1,26 +1,45 @@
1
- FROM python:3.9-slim
 
2
 
3
- # Install vLLM dependencies
4
- RUN pip install vllm gradio bitsandbytes transformers accelerate wget
5
 
6
- # Copy your Gradio app files
7
- COPY app.py .
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  COPY requirements.txt .
9
- RUN pip install -r requirements.txt
 
 
 
10
 
11
- # Expose Gradio port
12
  EXPOSE 7860
13
 
14
- # Start vLLM and Gradio
15
- CMD vllm.entrypoints.openai.api_server \
16
- --model /app/models \
 
17
  --enable-auto-tool-choice \
18
  --tool-call-parser llama3_json \
19
- --chat-template examples/tool_chat_template_llama3.1_json.jinja \
20
  --quantization bitsandbytes \
21
  --load-format bitsandbytes \
22
  --dtype half \
23
  --max-model-len 8192 \
24
- --download-dir models/vllm \
25
- --host 0.0.0.0 \
26
- --port 8000 & python app.py
 
1
+ # Use Python 3.11 slim image
2
+ FROM python:3.11-slim
3
 
4
+ # Ensure we run as root (the default) so we can set permissions
5
+ USER root
6
 
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Set working directory
11
+ WORKDIR /app
12
+
13
+ # Create and set permissions for cache directories
14
+ RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
15
+ && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
16
+
17
+ # Set environment variables for cache directories
18
+ ENV HF_HOME=/app/.cache
19
+ ENV XDG_CACHE_HOME=/app/.cache
20
+ ENV MPLCONFIGDIR=/app/.config/matplotlib
21
+ ENV USER_AGENT="my-gradio-app"
22
+
23
+ # Copy the requirements file and install dependencies
24
  COPY requirements.txt .
25
+ RUN pip install --no-cache-dir -r requirements.txt
26
+
27
+ # Copy the rest of the application code
28
+ COPY . .
29
 
30
+ # Expose the port for Gradio (Spaces expects the app on port 7860)
31
  EXPOSE 7860
32
 
33
+ # Start vLLM in the background and then the Gradio app
34
+ CMD bash -c "wget -O /tmp/tool_chat_template_llama3.1_json.jinja https://github.com/vllm-project/vllm/raw/refs/heads/main/examples/tool_chat_template_llama3.1_json.jinja && \
35
+ vllm.entrypoints.openai.api_server \
36
+ --model unsloth/llama-3-8b-Instruct-bnb-4bit \
37
  --enable-auto-tool-choice \
38
  --tool-call-parser llama3_json \
39
+ --chat-template /tmp/tool_chat_template_llama3.1_json.jinja \
40
  --quantization bitsandbytes \
41
  --load-format bitsandbytes \
42
  --dtype half \
43
  --max-model-len 8192 \
44
+ --download-dir models/vllm > vllm.log 2>&1 & \
45
+ python app.py"
 
app.py CHANGED
@@ -2,7 +2,7 @@ from io import StringIO
2
  import sys
3
 
4
  import os
5
- #from huggingface_hub import login
6
  import gradio as gr
7
  import json
8
  import csv
@@ -31,7 +31,7 @@ from docling.chunking import HybridChunker
31
  from langchain_community.document_loaders import WebBaseLoader
32
  from urllib.parse import urlparse
33
 
34
- #from langchain_groq import ChatGroq
35
  from langchain_openai import ChatOpenAI
36
  from langgraph.prebuilt import InjectedStore
37
  from langgraph.store.base import BaseStore
@@ -56,6 +56,16 @@ logger = logging.getLogger(__name__)
56
  logging.disable(logging.WARNING)
57
 
58
 
 
 
 
 
 
 
 
 
 
 
59
  EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
60
 
61
  # =============================================================================
@@ -538,8 +548,7 @@ model = ChatOpenAI(
538
  timeout=None,
539
  max_retries=2,
540
  api_key="not_required",
541
- base_url="http://localhost:8000", # Use the VLLM instance URL
542
- verbose=True
543
  )
544
 
545
  # model = ChatGroq(
@@ -866,7 +875,7 @@ graph = graph_builder.compile(checkpointer=checkpointer, store=in_memory_store)
866
  import gradio as gr
867
  from gradio import ChatMessage
868
 
869
- system_prompt = "You are a helpful Assistant. Always use the tools {tools}."
870
 
871
  ########################################
872
  # Upload_documents
@@ -987,7 +996,7 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
987
  textbox=gr.MultimodalTextbox(
988
  file_count="multiple",
989
  file_types=None,
990
- sources="upload",
991
  label="Type your query here:",
992
  placeholder="Enter your question...",
993
  ),
@@ -999,4 +1008,4 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
999
 
1000
 
1001
  if __name__ == "__main__":
1002
- AI_Tutor.launch()
 
2
  import sys
3
 
4
  import os
5
+ from huggingface_hub import login
6
  import gradio as gr
7
  import json
8
  import csv
 
31
  from langchain_community.document_loaders import WebBaseLoader
32
  from urllib.parse import urlparse
33
 
34
+ from langchain_groq import ChatGroq
35
  from langchain_openai import ChatOpenAI
36
  from langgraph.prebuilt import InjectedStore
37
  from langgraph.store.base import BaseStore
 
56
  logging.disable(logging.WARNING)
57
 
58
 
59
+ HF_TOKEN = os.getenv("HF_TOKEN") # Read from environment variable
60
+ if HF_TOKEN:
61
+ login(token=HF_TOKEN) # Log in to Hugging Face Hub
62
+ else:
63
+ print("Warning: HF_TOKEN not found in environment variables.")
64
+
65
+ # GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Read from environment variable
66
+ # if not GROQ_API_KEY:
67
+ # print("Warning: GROQ_API_KEY not found in environment variables.")
68
+
69
  EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
70
 
71
  # =============================================================================
 
548
  timeout=None,
549
  max_retries=2,
550
  api_key="not_required",
551
+ base_url="http://localhost:8000/v1", # Use the VLLM instance URL
 
552
  )
553
 
554
  # model = ChatGroq(
 
875
  import gradio as gr
876
  from gradio import ChatMessage
877
 
878
+ system_prompt = "You are a helpful Assistant. You will always use the tools available to you from {tools} to address user queries."
879
 
880
  ########################################
881
  # Upload_documents
 
996
  textbox=gr.MultimodalTextbox(
997
  file_count="multiple",
998
  file_types=None,
999
+ sources=["upload"],
1000
  label="Type your query here:",
1001
  placeholder="Enter your question...",
1002
  ),
 
1008
 
1009
 
1010
  if __name__ == "__main__":
1011
+ AI_Tutor.launch(inline=True, debug=True)