# Use a slim Python base image for smaller size
FROM python:3.9-slim-buster

# Set environment variables to force Hugging Face libraries into offline mode
ENV HF_DATASETS_OFFLINE=1
ENV TRANSFORMERS_OFFLINE=1

# Set the working directory inside the container
WORKDIR /app

# Copy the requirements file and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Download NLTK data during the image build process
RUN python -c "import nltk; nltk.download('punkt')"

# --- NEW STEP: Pre-download the Hugging Face model ---
# Create a directory for the model cache
RUN mkdir -p /app/model_cache

# Download the model and tokenizer to the cache directory
# This will make the model available locally at runtime
RUN python -c "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer; \
    model_name = 'valhalla/t5-small-e2e-qg'; \
    tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir='/app/model_cache'); \
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name, cache_dir='/app/model_cache'); \
    print(f'Model {model_name} downloaded successfully to /app/model_cache')"

# Copy the main application file
COPY app.py .

# Expose the port Gradio runs on
EXPOSE 7860

# Command to run the application when the container starts
CMD ["python", "app.py"]