Sumkh commited on
Commit
dfd3125
·
verified ·
1 Parent(s): ad4e9f2

Upload 3 files

Browse files
Files changed (2) hide show
  1. Dockerfile +13 -15
  2. app.py +1 -1
Dockerfile CHANGED
@@ -1,38 +1,36 @@
1
- # Use Python 3.11 slim image
2
- FROM python:3.11-slim
3
 
4
- # Ensure we run as root (the default) so we can set permissions
5
- USER root
6
 
7
- # Install system dependencies
8
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
9
 
10
- # Set working directory
11
- WORKDIR /app
12
-
13
- # Create and set permissions for cache directories
14
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
15
  && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
16
 
17
- # Set environment variables for cache directories
18
  ENV HF_HOME=/app/.cache
19
  ENV XDG_CACHE_HOME=/app/.cache
20
  ENV MPLCONFIGDIR=/app/.config/matplotlib
21
  ENV USER_AGENT="my-gradio-app"
22
 
23
- # Copy the requirements file and install dependencies
24
  COPY requirements.txt .
25
  RUN pip install --no-cache-dir -r requirements.txt
26
 
27
- # Copy the rest of the application code
28
  COPY . .
29
 
30
  # Expose the port for Gradio (Spaces expects the app on port 7860)
31
  EXPOSE 7860
32
 
33
- # Start vLLM in the background and then the Gradio app
 
34
  CMD bash -c "vllm.entrypoints.openai.api_server \
35
- --model unsloth/llama-3-8b-Instruct-bnb-4bit \
36
  --enable-auto-tool-choice \
37
  --tool-call-parser llama3_json \
38
  --chat-template examples/tool_chat_template_llama3.1_json.jinja \
@@ -40,4 +38,4 @@ CMD bash -c "vllm.entrypoints.openai.api_server \
40
  --load-format bitsandbytes \
41
  --dtype half \
42
  --max-model-len 8192 \
43
- python app.py"
 
1
+ # Use the official vLLM Docker image as the base image
2
+ FROM vllm/vllm-openai:latest
3
 
4
+ # Set the working directory
5
+ WORKDIR /app
6
 
7
+ # Install system dependencies if needed (e.g., wget)
8
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
9
 
10
+ # Set up writable cache directories (for Hugging Face, matplotlib, etc.)
 
 
 
11
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache \
12
  && mkdir -p /app/.config/matplotlib && chmod -R 777 /app/.config/matplotlib
13
 
14
+ # Set environment variables for cache directories and user agent
15
  ENV HF_HOME=/app/.cache
16
  ENV XDG_CACHE_HOME=/app/.cache
17
  ENV MPLCONFIGDIR=/app/.config/matplotlib
18
  ENV USER_AGENT="my-gradio-app"
19
 
20
+ # Copy the requirements file and install additional Python dependencies (e.g., gradio)
21
  COPY requirements.txt .
22
  RUN pip install --no-cache-dir -r requirements.txt
23
 
24
+ # Copy your application code (including app.py and any other needed files)
25
  COPY . .
26
 
27
  # Expose the port for Gradio (Spaces expects the app on port 7860)
28
  EXPOSE 7860
29
 
30
+ # Set the CMD to launch the vLLM server (for your new model) in the background and then start the Gradio app.
31
+ # Replace 'new-model-name:latest' with your new model's identifier.
32
  CMD bash -c "vllm.entrypoints.openai.api_server \
33
+ --model new-model-name:latest \
34
  --enable-auto-tool-choice \
35
  --tool-call-parser llama3_json \
36
  --chat-template examples/tool_chat_template_llama3.1_json.jinja \
 
38
  --load-format bitsandbytes \
39
  --dtype half \
40
  --max-model-len 8192 \
41
+ > vllm.log 2>&1 & python app.py"
app.py CHANGED
@@ -1008,4 +1008,4 @@ with gr.Blocks(theme="ocean") as AI_Tutor:
1008
 
1009
 
1010
  if __name__ == "__main__":
1011
- AI_Tutor.launch(inline=True, debug=True)
 
1008
 
1009
 
1010
  if __name__ == "__main__":
1011
+ AI_Tutor.launch(server_name="0.0.0.0", server_port=7860)