#!/bin/bash # --- Final Definitive Startup Script (v24 - Adds git lfs pull to fix num_samples=0) --- set -e echo "--- Startup Script Initialized ---" MODELS_DIR="/data/models" OUTPUT_DIR="/data/output" mkdir -p $MODELS_DIR mkdir -p $OUTPUT_DIR DIT_PATH="$MODELS_DIR/wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors" VAE_PATH="$MODELS_DIR/Wan2.1_VAE.pth" CLIP_PATH="$MODELS_DIR/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth" T5_PATH="$MODELS_DIR/models_t5_umt5-xxl-enc-bf16.pth" echo "--- Checking for model files... ---" if [ ! -f "$DIT_PATH" ]; then huggingface-cli download jujutechnology/WANfortraining wan2.1_i2v_720p_14B_fp8_e4m3fn.safetensors --local-dir $MODELS_DIR --local-dir-use-symlinks False fi if [ ! -f "$VAE_PATH" ]; then huggingface-cli download jujutechnology/WANfortraining Wan2.1_VAE.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False fi if [ ! -f "$CLIP_PATH" ]; then huggingface-cli download jujutechnology/WANfortraining models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False fi if [ ! -f "$T5_PATH" ]; then huggingface-cli download jujutechnology/WANfortraining models_t5_umt5-xxl-enc-bf16.pth --local-dir $MODELS_DIR --local-dir-use-symlinks False fi echo "--- Models are present. ---" # --- CRITICAL STEP: Force checkout of Git LFS files in the repo --- echo "--- Ensuring all dataset images are fully downloaded (git lfs pull)... ---" git lfs pull echo "--- LFS checkout complete. Verifying file sizes: ---" ls -lh /code/dataset/ebPhotos-001/ # This will now show megabyte-sized files echo "--- Starting training... ---" # --- Run the training command --- accelerate launch wan_train_network.py \ --task="i2v-14B" \ --dit="$DIT_PATH" \ --vae="$VAE_PATH" \ --clip="$CLIP_PATH" \ --t5="$T5_PATH" \ --dataset_config="dataset/huggingfacetoml.toml" \ --output_dir="$OUTPUT_DIR" \ --output_name="my-I2V-Lora" \ --network_module="networks.lora_wan" \ --network_dim="32" \ --network_alpha="4" \ --max_train_epochs="70" \ --learning_rate="1e-5" \ --optimizer_type="adamw" \ --mixed_precision="bf16" \ --gradient_checkpointing \ --sdpa