Spaces:

kvn420
/

Train

Sleeping

App Files Files Community

kvn420 commited on May 28

Commit

5bba009

verified ·

1 Parent(s): 794b299

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -95

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Dict, List, Optional, Union
 import time
 import tempfile
 import shutil
 # Configuration du logging
 logging.basicConfig(level=logging.INFO)
@@ -18,80 +19,107 @@ logger = logging.getLogger(__name__)
 def install_package(package_name):
     """Installe un package Python"""
     try:
-        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
         return True
     except subprocess.CalledProcessError as e:
-        logger.error(f"Erreur installation {package_name}: {e}")
         return False
-# Imports conditionnels avec tentative d'installation
-def safe_import(module_name, package_name=None):
-    """Import sécurisé avec possibilité d'installation"""
-    if package_name is None:
-        package_name = module_name
     try:
-        return __import__(module_name), True
     except ImportError:
-        logger.warning(f"{module_name} non trouvé")
-        return None, False
-# Tentative d'imports
-numpy, NUMPY_AVAILABLE = safe_import('numpy')
-torch_module, TORCH_AVAILABLE = safe_import('torch')
-if torch_module:
-    torch = torch_module
-else:
-    torch = None
-# Import transformers
-try:
-    from transformers import (
-        AutoTokenizer, AutoModel, AutoProcessor,
-        AutoModelForCausalLM, TrainingArguments, Trainer,
-        DataCollatorForLanguageModeling
-    )
-    TRANSFORMERS_AVAILABLE = True
-except ImportError:
-    TRANSFORMERS_AVAILABLE = False
-    logger.warning("Transformers non disponible")
-# Import datasets
-try:
-    from datasets import Dataset, load_dataset, concatenate_datasets
-    DATASETS_AVAILABLE = True
-except ImportError:
-    DATASETS_AVAILABLE = False
-    logger.warning("Datasets non disponible")
-# Import HuggingFace Hub
-try:
-    from huggingface_hub import HfApi
-    HF_HUB_AVAILABLE = True
-except ImportError:
-    HF_HUB_AVAILABLE = False
-    logger.warning("HuggingFace Hub non disponible")
-# Import PIL
-try:
-    from PIL import Image
-    PIL_AVAILABLE = True
-except ImportError:
-    PIL_AVAILABLE = False
-# Import librosa
-try:
-    import librosa
-    LIBROSA_AVAILABLE = True
-except ImportError:
-    LIBROSA_AVAILABLE = False
-# Import OpenCV
-try:
-    import cv2
-    CV2_AVAILABLE = True
-except ImportError:
-    CV2_AVAILABLE = False
 class MultimodalTrainer:
     def __init__(self):
@@ -101,43 +129,81 @@ class MultimodalTrainer:
         self.training_data = []
         # Device selection
-        if TORCH_AVAILABLE and torch.cuda.is_available():
             self.device = torch.device("cuda")
         else:
             self.device = "cpu"
         # HF API
-        if HF_HUB_AVAILABLE:
             self.hf_api = HfApi()
         else:
             self.hf_api = None
     def install_dependencies(self, packages_to_install):
         """Installe les dépendances manquantes"""
         installation_results = []
         for package in packages_to_install:
             installation_results.append(f"📦 Installation de {package}...")
-            success = install_package(package)
             if success:
                 installation_results.append(f"✅ {package} installé avec succès!")
             else:
                 installation_results.append(f"❌ Échec installation {package}")
-        installation_results.append("\n🔄 Redémarrage requis pour prendre effet")
         return "\n".join(installation_results)
     def check_dependencies(self):
         """Vérifie et affiche l'état des dépendances"""
         deps = {
-            "NumPy": NUMPY_AVAILABLE,
             "PyTorch": TORCH_AVAILABLE,
             "Transformers": TRANSFORMERS_AVAILABLE,
             "Datasets": DATASETS_AVAILABLE,
             "HuggingFace Hub": HF_HUB_AVAILABLE,
-            "PIL (Images)": PIL_AVAILABLE,
-            "Librosa (Audio)": LIBROSA_AVAILABLE,
-            "OpenCV (Vidéo)": CV2_AVAILABLE
         }
         status = "📦 État des dépendances:\n\n"
@@ -146,16 +212,13 @@ class MultimodalTrainer:
         critical_deps = ["PyTorch", "Transformers", "Datasets"]
         status += "🔥 CRITIQUES:\n"
         for dep in critical_deps:
-            icon = "✅" if deps.get(dep.replace(" ", "").replace("(", "").replace(")", "")) else "❌"
             status += f"{icon} {dep}\n"
         status += "\n🔧 OPTIONNELLES:\n"
-        optional_deps = ["NumPy", "HuggingFace Hub", "PIL (Images)", "Librosa (Audio)", "OpenCV (Vidéo)"]
         for dep in optional_deps:
-            key = dep.replace(" ", "").replace("(", "").replace(")", "").replace("Images", "").replace("Audio", "").replace("Vidéo", "")
-            if key == "HuggingFaceHub":
-                key = "HuggingFace Hub"
-            icon = "✅" if deps.get(key) else "⚠️"
             status += f"{icon} {dep}\n"
         # Système info
@@ -163,7 +226,7 @@ class MultimodalTrainer:
         status += f"🐍 Python: {sys.version.split()[0]}\n"
         status += f"💾 Device: {self.device}\n"
-        if TORCH_AVAILABLE and torch.cuda.is_available():
             status += f"🚀 GPU: {torch.cuda.get_device_name()}\n"
             status += f"🔋 VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB\n"
@@ -174,7 +237,7 @@ class MultimodalTrainer:
         if not TRANSFORMERS_AVAILABLE:
             return "❌ Transformers non installé! Utilisez l'outil d'installation."
-        if not TORCH_AVAILABLE:
             return "❌ PyTorch non installé! Utilisez l'outil d'installation."
         if not model_name.strip():
@@ -225,7 +288,7 @@ class MultimodalTrainer:
     def load_single_dataset(self, dataset_name: str, split: str = "train"):
         """Charge un dataset individuel"""
-        if not DATASETS_AVAILABLE:
             return "❌ Datasets non installé! Utilisez l'outil d'installation."
         if not dataset_name.strip():
@@ -262,7 +325,10 @@ class MultimodalTrainer:
         for i, step in enumerate(steps):
             result += f"Étape {i+1}: {step} ✅\n"
-        result += "\n⚠️ MODE DÉMO - Pour un vrai entraînement, installez PyTorch + Transformers"
         return result
     def get_model_info(self):
@@ -272,10 +338,11 @@ class MultimodalTrainer:
         info = f"📋 INFORMATIONS DU MODÈLE:\n\n"
         info += f"🏷️ Type: {type(self.current_model).__name__}\n"
-        info += f"💾 Device: {next(self.current_model.parameters()).device}\n"
-        # Compte les paramètres
-        if TORCH_AVAILABLE:
             total_params = sum(p.numel() for p in self.current_model.parameters())
             trainable_params = sum(p.numel() for p in self.current_model.parameters() if p.requires_grad)
@@ -323,7 +390,7 @@ def create_interface():
             install_status = gr.Textbox(
                 label="Status d'installation",
-                lines=5,
                 interactive=False
             )
@@ -335,7 +402,7 @@ def create_interface():
                 outputs=install_status
             )
             install_torch_btn.click(
-                lambda: trainer.install_dependencies(["torch", "torchvision"]),
                 outputs=install_status
             )
             install_datasets_btn.click(
@@ -352,8 +419,8 @@ def create_interface():
                 with gr.Column():
                     model_input = gr.Textbox(
                         label="Nom du modèle HuggingFace",
-                        placeholder="kvn420/Tenro_V4.1",
-                        value="kvn420/Tenro_V4.1"
                     )
                     model_type = gr.Dropdown(
                         label="Type de modèle",
@@ -390,7 +457,12 @@ def create_interface():
                     gr.Markdown("### 📝 Dataset individuel")
                     dataset_input = gr.Textbox(
                         label="Nom du dataset",
-                        placeholder="microsoft/coco"
                     )
                     dataset_split = gr.Textbox(
                         label="Split",
@@ -405,9 +477,16 @@ def create_interface():
                         lines=12
                     )
             load_dataset_btn.click(
-                trainer.load_single_dataset,
-                inputs=[dataset_input, dataset_split],
                 outputs=data_status
             )

 import time
 import tempfile
 import shutil
+import importlib
 # Configuration du logging
 logging.basicConfig(level=logging.INFO)
 def install_package(package_name):
     """Installe un package Python"""
     try:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name, "--quiet"])
+        logger.info(f"✅ {package_name} installé avec succès")
         return True
     except subprocess.CalledProcessError as e:
+        logger.error(f"❌ Erreur installation {package_name}: {e}")
         return False
+# Fonction pour recharger les modules après installation
+def reload_module(module_name):
+    """Recharge un module après installation"""
+    try:
+        if module_name in sys.modules:
+            importlib.reload(sys.modules[module_name])
+        else:
+            __import__(module_name)
+        return True
+    except Exception as e:
+        logger.error(f"Erreur rechargement {module_name}: {e}")
+        return False
+# Imports conditionnels avec vérification
+def check_and_import_dependencies():
+    """Vérifie et importe toutes les dépendances"""
+    global numpy, torch, NUMPY_AVAILABLE, TORCH_AVAILABLE, TRANSFORMERS_AVAILABLE
+    global DATASETS_AVAILABLE, HF_HUB_AVAILABLE, PIL_AVAILABLE, LIBROSA_AVAILABLE, CV2_AVAILABLE
+    global AutoTokenizer, AutoModel, AutoProcessor, AutoModelForCausalLM
+    global TrainingArguments, Trainer, DataCollatorForLanguageModeling
+    global Dataset, load_dataset, concatenate_datasets, HfApi, Image, librosa, cv2
+    # NumPy
     try:
+        import numpy
+        NUMPY_AVAILABLE = True
     except ImportError:
+        numpy = None
+        NUMPY_AVAILABLE = False
+    # PyTorch
+    try:
+        import torch
+        TORCH_AVAILABLE = True
+    except ImportError:
+        torch = None
+        TORCH_AVAILABLE = False
+    # Transformers
+    try:
+        from transformers import (
+            AutoTokenizer, AutoModel, AutoProcessor,
+            AutoModelForCausalLM, TrainingArguments, Trainer,
+            DataCollatorForLanguageModeling
+        )
+        TRANSFORMERS_AVAILABLE = True
+    except ImportError:
+        TRANSFORMERS_AVAILABLE = False
+        AutoTokenizer = AutoModel = AutoProcessor = None
+        AutoModelForCausalLM = TrainingArguments = Trainer = None
+        DataCollatorForLanguageModeling = None
+    # Datasets
+    try:
+        from datasets import Dataset, load_dataset, concatenate_datasets
+        DATASETS_AVAILABLE = True
+    except ImportError:
+        DATASETS_AVAILABLE = False
+        Dataset = load_dataset = concatenate_datasets = None
+    # HuggingFace Hub
+    try:
+        from huggingface_hub import HfApi
+        HF_HUB_AVAILABLE = True
+    except ImportError:
+        HF_HUB_AVAILABLE = False
+        HfApi = None
+    # PIL
+    try:
+        from PIL import Image
+        PIL_AVAILABLE = True
+    except ImportError:
+        PIL_AVAILABLE = False
+        Image = None
+    # Librosa
+    try:
+        import librosa
+        LIBROSA_AVAILABLE = True
+    except ImportError:
+        LIBROSA_AVAILABLE = False
+        librosa = None
+    # OpenCV
+    try:
+        import cv2
+        CV2_AVAILABLE = True
+    except ImportError:
+        CV2_AVAILABLE = False
+        cv2 = None
+# Initialisation des imports
+check_and_import_dependencies()
 class MultimodalTrainer:
     def __init__(self):
         self.training_data = []
         # Device selection
+        if TORCH_AVAILABLE and torch and torch.cuda.is_available():
             self.device = torch.device("cuda")
         else:
             self.device = "cpu"
         # HF API
+        if HF_HUB_AVAILABLE and HfApi:
             self.hf_api = HfApi()
         else:
             self.hf_api = None
     def install_dependencies(self, packages_to_install):
         """Installe les dépendances manquantes"""
         installation_results = []
+        # Mapping des packages
+        package_mapping = {
+            "torch": "torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu",
+            "transformers": "transformers",
+            "datasets": "datasets",
+            "accelerate": "accelerate",
+            "pillow": "pillow",
+            "librosa": "librosa",
+            "opencv": "opencv-python",
+            "huggingface_hub": "huggingface_hub"
+        }
         for package in packages_to_install:
             installation_results.append(f"📦 Installation de {package}...")
+            # Utilise le mapping si disponible
+            install_cmd = package_mapping.get(package.lower(), package)
+            if package.lower() == "torch":
+                # Installation spéciale pour PyTorch
+                try:
+                    subprocess.check_call([
+                        sys.executable, "-m", "pip", "install",
+                        "torch", "torchvision", "torchaudio",
+                        "--index-url", "https://download.pytorch.org/whl/cpu",
+                        "--quiet"
+                    ])
+                    success = True
+                except subprocess.CalledProcessError:
+                    success = False
+            else:
+                success = install_package(install_cmd)
             if success:
                 installation_results.append(f"✅ {package} installé avec succès!")
             else:
                 installation_results.append(f"❌ Échec installation {package}")
+        # Recharge les dépendances après installation
+        installation_results.append("\n🔄 Rechargement des modules...")
+        check_and_import_dependencies()
+        self.__init__()  # Réinitialise l'instance
+        installation_results.append("✅ Modules rechargés!")
         return "\n".join(installation_results)
     def check_dependencies(self):
         """Vérifie et affiche l'état des dépendances"""
+        # Force la vérification
+        check_and_import_dependencies()
         deps = {
             "PyTorch": TORCH_AVAILABLE,
             "Transformers": TRANSFORMERS_AVAILABLE,
             "Datasets": DATASETS_AVAILABLE,
+            "NumPy": NUMPY_AVAILABLE,
             "HuggingFace Hub": HF_HUB_AVAILABLE,
+            "PIL": PIL_AVAILABLE,
+            "Librosa": LIBROSA_AVAILABLE,
+            "OpenCV": CV2_AVAILABLE
         }
         status = "📦 État des dépendances:\n\n"
         critical_deps = ["PyTorch", "Transformers", "Datasets"]
         status += "🔥 CRITIQUES:\n"
         for dep in critical_deps:
+            icon = "✅" if deps.get(dep) else "❌"
             status += f"{icon} {dep}\n"
         status += "\n🔧 OPTIONNELLES:\n"
+        optional_deps = ["NumPy", "HuggingFace Hub", "PIL", "Librosa", "OpenCV"]
         for dep in optional_deps:
+            icon = "✅" if deps.get(dep) else "⚠️"
             status += f"{icon} {dep}\n"
         # Système info
         status += f"🐍 Python: {sys.version.split()[0]}\n"
         status += f"💾 Device: {self.device}\n"
+        if TORCH_AVAILABLE and torch and torch.cuda.is_available():
             status += f"🚀 GPU: {torch.cuda.get_device_name()}\n"
             status += f"🔋 VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB\n"
         if not TRANSFORMERS_AVAILABLE:
             return "❌ Transformers non installé! Utilisez l'outil d'installation."
+        if not TORCH_AVAILABLE or not torch:
             return "❌ PyTorch non installé! Utilisez l'outil d'installation."
         if not model_name.strip():
     def load_single_dataset(self, dataset_name: str, split: str = "train"):
         """Charge un dataset individuel"""
+        if not DATASETS_AVAILABLE or not load_dataset:
             return "❌ Datasets non installé! Utilisez l'outil d'installation."
         if not dataset_name.strip():
         for i, step in enumerate(steps):
             result += f"Étape {i+1}: {step} ✅\n"
+        if TORCH_AVAILABLE and TRANSFORMERS_AVAILABLE:
+            result += "\n✅ Prêt pour un vrai entraînement!"
+        else:
+            result += "\n⚠️ MODE DÉMO - Installez PyTorch + Transformers pour un vrai entraînement"
         return result
     def get_model_info(self):
         info = f"📋 INFORMATIONS DU MODÈLE:\n\n"
         info += f"🏷️ Type: {type(self.current_model).__name__}\n"
+        if TORCH_AVAILABLE and torch:
+            info += f"💾 Device: {next(self.current_model.parameters()).device}\n"
+            # Compte les paramètres
             total_params = sum(p.numel() for p in self.current_model.parameters())
             trainable_params = sum(p.numel() for p in self.current_model.parameters() if p.requires_grad)
             install_status = gr.Textbox(
                 label="Status d'installation",
+                lines=8,
                 interactive=False
             )
                 outputs=install_status
             )
             install_torch_btn.click(
+                lambda: trainer.install_dependencies(["torch"]),
                 outputs=install_status
             )
             install_datasets_btn.click(
                 with gr.Column():
                     model_input = gr.Textbox(
                         label="Nom du modèle HuggingFace",
+                        placeholder="microsoft/DialoGPT-medium",
+                        value="microsoft/DialoGPT-medium"
                     )
                     model_type = gr.Dropdown(
                         label="Type de modèle",
                     gr.Markdown("### 📝 Dataset individuel")
                     dataset_input = gr.Textbox(
                         label="Nom du dataset",
+                        placeholder="wikitext",
+                        value="wikitext"
+                    )
+                    dataset_config = gr.Textbox(
+                        label="Configuration (optionnel)",
+                        placeholder="wikitext-2-raw-v1"
                     )
                     dataset_split = gr.Textbox(
                         label="Split",
                         lines=12
                     )
+            def load_dataset_with_config(dataset_name, config_name, split):
+                if config_name.strip():
+                    full_name = f"{dataset_name}/{config_name}" if "/" not in config_name else config_name
+                else:
+                    full_name = dataset_name
+                return trainer.load_single_dataset(full_name, split)
             load_dataset_btn.click(
+                load_dataset_with_config,
+                inputs=[dataset_input, dataset_config, dataset_split],
                 outputs=data_status
             )