#!/bin/bash
# --- Final Definitive Startup Script (v24 - Adds git lfs pull to fix num_samples=0) ---

set -e
echo "--- Startup Script Initialized ---"

MODELS_DIR="/data/models"
OUTPUT_DIR="/data/output"

mkdir -p $MODELS_DIR
mkdir -p $OUTPUT_DIR

DIT_PATH="$MODELS_DIR/wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors"
VAE_PATH="$MODELS_DIR/Wan2.1_VAE.pth"
CLIP_PATH="$MODELS_DIR/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth"
T5_PATH="$MODELS_DIR/models_t5_umt5-xxl-enc-bf16.pth"

echo "--- Checking for model files... ---"
if [ ! -f "$DIT_PATH" ]; then
    huggingface-cli download jujutechnology/WANfortraining wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors --local-dir $MODELS_DIR --local-dir-use-symlinks False
fi
if [ ! -f "$VAE_PATH" ]; then
    huggingface-cli download jujutechnology/WANfortraining Wan2.1_VAE.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
fi
if [ ! -f "$CLIP_PATH" ]; then
    huggingface-cli download jujutechnology/WANfortraining models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
fi
if [ ! -f "$T5_PATH" ]; then
    huggingface-cli download jujutechnology/WANfortraining models_t5_umt5-xxl-enc-bf16.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False
fi
echo "--- Models are present. ---"

# --- CRITICAL STEP: Force checkout of Git LFS files in the repo ---
echo "--- Ensuring all dataset images are fully downloaded (git lfs pull)... ---"
git lfs pull
echo "--- LFS checkout complete. Verifying file sizes: ---"
ls -lh /code/dataset/ebPhotos-001/ # This will now show megabyte-sized files

echo "--- Starting training... ---"

# --- Run the training command ---
accelerate launch wan_train_network.py \
    --task="i2v-14B" \
    --dit="$DIT_PATH" \
    --vae="$VAE_PATH" \
    --clip="$CLIP_PATH" \
    --t5="$T5_PATH" \
    --dataset_config="dataset/huggingfacetoml.toml" \
    --output_dir="$OUTPUT_DIR" \
    --output_name="my-I2V-Lora" \
    --network_module="networks.lora_wan" \
    --network_dim="32" \
    --network_alpha="4" \
    --max_train_epochs="70" \
    --learning_rate="1e-5" \
    --optimizer_type="adamw" \
    --mixed_precision="bf16" \
    --gradient_checkpointing \
    --sdpa