#!/usr/bin/env python3 """ Helper script to download the int4 model files at build time for Hugging Face Spaces """ import os import sys import subprocess import logging from pathlib import Path # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Model configuration MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft" LOCAL_MODEL_PATH = "./int4" def download_model(): """Download the int4 model files to local directory""" try: logger.info(f"Downloading int4 model from {MAIN_MODEL_ID}/int4") # Create local directory if it doesn't exist os.makedirs(LOCAL_MODEL_PATH, exist_ok=True) # Use huggingface_hub to download the model files from huggingface_hub import hf_hub_download, list_repo_files # List all files in the repository all_files = list_repo_files(MAIN_MODEL_ID) # Filter files that are in the int4 subfolder int4_files = [f for f in all_files if f.startswith("int4/")] logger.info(f"Found {len(int4_files)} files in int4 subfolder") # Download each required file required_files = [ "config.json", "pytorch_model.bin", "tokenizer.json", "tokenizer_config.json", "special_tokens_map.json", "generation_config.json" ] downloaded_count = 0 for file_name in required_files: int4_file_path = f"int4/{file_name}" if int4_file_path in all_files: logger.info(f"Downloading {file_name}...") hf_hub_download( repo_id=MAIN_MODEL_ID, filename=int4_file_path, local_dir=LOCAL_MODEL_PATH, local_dir_use_symlinks=False ) logger.info(f"Downloaded {file_name}") downloaded_count += 1 else: logger.warning(f"File {file_name} not found in int4 subfolder") logger.info(f"Downloaded {downloaded_count} out of {len(required_files)} required files") logger.info(f"Model downloaded successfully to {LOCAL_MODEL_PATH}") return True except Exception as e: logger.error(f"Error downloading model: {e}") return False def check_model_files(): """Check if required model files exist""" required_files = [ "config.json", "pytorch_model.bin", "tokenizer.json", "tokenizer_config.json" ] missing_files = [] for file in required_files: file_path = os.path.join(LOCAL_MODEL_PATH, file) if not os.path.exists(file_path): missing_files.append(file) if missing_files: logger.error(f"Missing model files: {missing_files}") return False logger.info("All required model files found") return True def verify_model_integrity(): """Verify that the downloaded model files are valid""" try: # Try to load the tokenizer to verify it's working from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_PATH) logger.info("Tokenizer loaded successfully from local files") # Try to load the model config from transformers import AutoConfig config = AutoConfig.from_pretrained(LOCAL_MODEL_PATH) logger.info("Model config loaded successfully from local files") return True except Exception as e: logger.error(f"Error verifying model integrity: {e}") return False def main(): """Main function to download model at build time""" logger.info("Starting model download for Hugging Face Space...") # Check if model files already exist if check_model_files(): logger.info("Model files already exist, verifying integrity...") if verify_model_integrity(): logger.info("Model files verified successfully") return True else: logger.warning("Model files exist but failed integrity check, re-downloading...") # Download the model if download_model(): logger.info("Model download completed successfully") # Verify the downloaded files if check_model_files() and verify_model_integrity(): logger.info("Model download and verification completed successfully") return True else: logger.error("Model download completed but verification failed") return False else: logger.error("Model download failed") return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)