|
|
import gradio as gr |
|
|
import os |
|
|
import subprocess |
|
|
import sys |
|
|
import requests |
|
|
import json |
|
|
import logging |
|
|
from typing import Dict, List, Optional, Union |
|
|
import time |
|
|
import tempfile |
|
|
import shutil |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
def install_package(package_name): |
|
|
"""Installe un package Python""" |
|
|
try: |
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", package_name]) |
|
|
return True |
|
|
except subprocess.CalledProcessError as e: |
|
|
logger.error(f"Erreur installation {package_name}: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
def safe_import(module_name, package_name=None): |
|
|
"""Import sécurisé avec possibilité d'installation""" |
|
|
if package_name is None: |
|
|
package_name = module_name |
|
|
|
|
|
try: |
|
|
return __import__(module_name), True |
|
|
except ImportError: |
|
|
logger.warning(f"{module_name} non trouvé") |
|
|
return None, False |
|
|
|
|
|
|
|
|
numpy, NUMPY_AVAILABLE = safe_import('numpy') |
|
|
torch_module, TORCH_AVAILABLE = safe_import('torch') |
|
|
if torch_module: |
|
|
torch = torch_module |
|
|
else: |
|
|
torch = None |
|
|
|
|
|
|
|
|
try: |
|
|
from transformers import ( |
|
|
AutoTokenizer, AutoModel, AutoProcessor, |
|
|
AutoModelForCausalLM, TrainingArguments, Trainer, |
|
|
DataCollatorForLanguageModeling |
|
|
) |
|
|
TRANSFORMERS_AVAILABLE = True |
|
|
except ImportError: |
|
|
TRANSFORMERS_AVAILABLE = False |
|
|
logger.warning("Transformers non disponible") |
|
|
|
|
|
|
|
|
try: |
|
|
from datasets import Dataset, load_dataset, concatenate_datasets |
|
|
DATASETS_AVAILABLE = True |
|
|
except ImportError: |
|
|
DATASETS_AVAILABLE = False |
|
|
logger.warning("Datasets non disponible") |
|
|
|
|
|
|
|
|
try: |
|
|
from huggingface_hub import HfApi |
|
|
HF_HUB_AVAILABLE = True |
|
|
except ImportError: |
|
|
HF_HUB_AVAILABLE = False |
|
|
logger.warning("HuggingFace Hub non disponible") |
|
|
|
|
|
|
|
|
try: |
|
|
from PIL import Image |
|
|
PIL_AVAILABLE = True |
|
|
except ImportError: |
|
|
PIL_AVAILABLE = False |
|
|
|
|
|
|
|
|
try: |
|
|
import librosa |
|
|
LIBROSA_AVAILABLE = True |
|
|
except ImportError: |
|
|
LIBROSA_AVAILABLE = False |
|
|
|
|
|
|
|
|
try: |
|
|
import cv2 |
|
|
CV2_AVAILABLE = True |
|
|
except ImportError: |
|
|
CV2_AVAILABLE = False |
|
|
|
|
|
class MultimodalTrainer: |
|
|
def __init__(self): |
|
|
self.current_model = None |
|
|
self.current_tokenizer = None |
|
|
self.current_processor = None |
|
|
self.training_data = [] |
|
|
|
|
|
|
|
|
if TORCH_AVAILABLE and torch.cuda.is_available(): |
|
|
self.device = torch.device("cuda") |
|
|
else: |
|
|
self.device = "cpu" |
|
|
|
|
|
|
|
|
if HF_HUB_AVAILABLE: |
|
|
self.hf_api = HfApi() |
|
|
else: |
|
|
self.hf_api = None |
|
|
|
|
|
def install_dependencies(self, packages_to_install): |
|
|
"""Installe les dépendances manquantes""" |
|
|
installation_results = [] |
|
|
|
|
|
for package in packages_to_install: |
|
|
installation_results.append(f"📦 Installation de {package}...") |
|
|
success = install_package(package) |
|
|
if success: |
|
|
installation_results.append(f"✅ {package} installé avec succès!") |
|
|
else: |
|
|
installation_results.append(f"❌ Échec installation {package}") |
|
|
|
|
|
installation_results.append("\n🔄 Redémarrage requis pour prendre effet") |
|
|
return "\n".join(installation_results) |
|
|
|
|
|
def check_dependencies(self): |
|
|
"""Vérifie et affiche l'état des dépendances""" |
|
|
deps = { |
|
|
"NumPy": NUMPY_AVAILABLE, |
|
|
"PyTorch": TORCH_AVAILABLE, |
|
|
"Transformers": TRANSFORMERS_AVAILABLE, |
|
|
"Datasets": DATASETS_AVAILABLE, |
|
|
"HuggingFace Hub": HF_HUB_AVAILABLE, |
|
|
"PIL (Images)": PIL_AVAILABLE, |
|
|
"Librosa (Audio)": LIBROSA_AVAILABLE, |
|
|
"OpenCV (Vidéo)": CV2_AVAILABLE |
|
|
} |
|
|
|
|
|
status = "📦 État des dépendances:\n\n" |
|
|
|
|
|
|
|
|
critical_deps = ["PyTorch", "Transformers", "Datasets"] |
|
|
status += "🔥 CRITIQUES:\n" |
|
|
for dep in critical_deps: |
|
|
icon = "✅" if deps.get(dep.replace(" ", "").replace("(", "").replace(")", "")) else "❌" |
|
|
status += f"{icon} {dep}\n" |
|
|
|
|
|
status += "\n🔧 OPTIONNELLES:\n" |
|
|
optional_deps = ["NumPy", "HuggingFace Hub", "PIL (Images)", "Librosa (Audio)", "OpenCV (Vidéo)"] |
|
|
for dep in optional_deps: |
|
|
key = dep.replace(" ", "").replace("(", "").replace(")", "").replace("Images", "").replace("Audio", "").replace("Vidéo", "") |
|
|
if key == "HuggingFaceHub": |
|
|
key = "HuggingFace Hub" |
|
|
icon = "✅" if deps.get(key) else "⚠️" |
|
|
status += f"{icon} {dep}\n" |
|
|
|
|
|
|
|
|
status += f"\n💻 SYSTÈME:\n" |
|
|
status += f"🐍 Python: {sys.version.split()[0]}\n" |
|
|
status += f"💾 Device: {self.device}\n" |
|
|
|
|
|
if TORCH_AVAILABLE and torch.cuda.is_available(): |
|
|
status += f"🚀 GPU: {torch.cuda.get_device_name()}\n" |
|
|
status += f"🔋 VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB\n" |
|
|
|
|
|
return status |
|
|
|
|
|
def load_model(self, model_name: str, model_type: str = "causal"): |
|
|
"""Charge un modèle depuis Hugging Face""" |
|
|
if not TRANSFORMERS_AVAILABLE: |
|
|
return "❌ Transformers non installé! Utilisez l'outil d'installation." |
|
|
|
|
|
if not TORCH_AVAILABLE: |
|
|
return "❌ PyTorch non installé! Utilisez l'outil d'installation." |
|
|
|
|
|
if not model_name.strip(): |
|
|
return "❌ Veuillez entrer un nom de modèle" |
|
|
|
|
|
try: |
|
|
logger.info(f"Chargement du modèle: {model_name}") |
|
|
|
|
|
if model_type == "causal": |
|
|
self.current_model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
device_map="auto" if torch.cuda.is_available() else None, |
|
|
trust_remote_code=True |
|
|
) |
|
|
else: |
|
|
self.current_model = AutoModel.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
device_map="auto" if torch.cuda.is_available() else None, |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
self.current_tokenizer = AutoTokenizer.from_pretrained( |
|
|
model_name, trust_remote_code=True |
|
|
) |
|
|
if self.current_tokenizer.pad_token is None: |
|
|
self.current_tokenizer.pad_token = self.current_tokenizer.eos_token |
|
|
except Exception as e: |
|
|
logger.warning(f"Tokenizer non trouvé: {e}") |
|
|
|
|
|
|
|
|
try: |
|
|
self.current_processor = AutoProcessor.from_pretrained( |
|
|
model_name, trust_remote_code=True |
|
|
) |
|
|
except Exception as e: |
|
|
logger.warning(f"Processor non trouvé: {e}") |
|
|
|
|
|
return f"✅ Modèle {model_name} chargé avec succès!\nType: {type(self.current_model).__name__}" |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"❌ Erreur lors du chargement: {str(e)}" |
|
|
logger.error(error_msg) |
|
|
return error_msg |
|
|
|
|
|
def load_single_dataset(self, dataset_name: str, split: str = "train"): |
|
|
"""Charge un dataset individuel""" |
|
|
if not DATASETS_AVAILABLE: |
|
|
return "❌ Datasets non installé! Utilisez l'outil d'installation." |
|
|
|
|
|
if not dataset_name.strip(): |
|
|
return "❌ Veuillez entrer un nom de dataset" |
|
|
|
|
|
try: |
|
|
dataset = load_dataset(dataset_name, split=split) |
|
|
|
|
|
if hasattr(self, 'training_data') and self.training_data: |
|
|
self.training_data = concatenate_datasets([self.training_data, dataset]) |
|
|
else: |
|
|
self.training_data = dataset |
|
|
|
|
|
return f"✅ Dataset {dataset_name} ajouté!\n📊 Total: {len(self.training_data)} exemples\n🔍 Colonnes: {list(self.training_data.column_names)}" |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"❌ Erreur dataset: {str(e)}" |
|
|
logger.error(error_msg) |
|
|
return error_msg |
|
|
|
|
|
def simulate_training(self, output_dir: str, num_epochs: int, learning_rate: float, batch_size: int): |
|
|
"""Simulation d'entraînement (mode démo)""" |
|
|
if not self.current_model and not self.training_data: |
|
|
return "❌ Aucun modèle ou donnée chargé!" |
|
|
|
|
|
|
|
|
steps = ["🏗️ Préparation des données", "🔧 Configuration du modèle", "🚀 Début entraînement"] |
|
|
result = "📋 SIMULATION D'ENTRAÎNEMENT:\n\n" |
|
|
result += f"📂 Sortie: {output_dir}\n" |
|
|
result += f"🔄 Époques: {num_epochs}\n" |
|
|
result += f"📚 Learning rate: {learning_rate}\n" |
|
|
result += f"📦 Batch size: {batch_size}\n\n" |
|
|
|
|
|
for i, step in enumerate(steps): |
|
|
result += f"Étape {i+1}: {step} ✅\n" |
|
|
|
|
|
result += "\n⚠️ MODE DÉMO - Pour un vrai entraînement, installez PyTorch + Transformers" |
|
|
return result |
|
|
|
|
|
def get_model_info(self): |
|
|
"""Retourne les informations du modèle actuel""" |
|
|
if not self.current_model: |
|
|
return "❌ Aucun modèle chargé" |
|
|
|
|
|
info = f"📋 INFORMATIONS DU MODÈLE:\n\n" |
|
|
info += f"🏷️ Type: {type(self.current_model).__name__}\n" |
|
|
info += f"💾 Device: {next(self.current_model.parameters()).device}\n" |
|
|
|
|
|
|
|
|
if TORCH_AVAILABLE: |
|
|
total_params = sum(p.numel() for p in self.current_model.parameters()) |
|
|
trainable_params = sum(p.numel() for p in self.current_model.parameters() if p.requires_grad) |
|
|
|
|
|
info += f"🔢 Paramètres totaux: {total_params:,}\n" |
|
|
info += f"🎯 Paramètres entraînables: {trainable_params:,}\n" |
|
|
|
|
|
if hasattr(self, 'training_data') and self.training_data: |
|
|
info += f"\n📊 DONNÉES:\n" |
|
|
info += f"📈 Exemples: {len(self.training_data):,}\n" |
|
|
info += f"📝 Colonnes: {list(self.training_data.column_names)}\n" |
|
|
|
|
|
return info |
|
|
|
|
|
|
|
|
trainer = MultimodalTrainer() |
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
with gr.Blocks(title="🔥 Multimodal Training Hub", theme=gr.themes.Soft()) as app: |
|
|
|
|
|
gr.Markdown(""" |
|
|
# 🔥 Multimodal Training Hub |
|
|
### Plateforme d'entraînement de modèles multimodaux |
|
|
|
|
|
🤖 Modèles • 📊 Datasets • 🏋️ Training • 🛠️ Outils |
|
|
""") |
|
|
|
|
|
with gr.Tab("🔧 Diagnostic"): |
|
|
gr.Markdown("### 🩺 Vérification du système") |
|
|
|
|
|
with gr.Row(): |
|
|
check_deps_btn = gr.Button("🔍 Vérifier dépendances", variant="primary") |
|
|
install_core_btn = gr.Button("📦 Installer packages critiques", variant="secondary") |
|
|
|
|
|
deps_status = gr.Textbox( |
|
|
label="État des dépendances", |
|
|
lines=15, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
install_transformers_btn = gr.Button("🤗 Installer Transformers") |
|
|
install_torch_btn = gr.Button("🔥 Installer PyTorch") |
|
|
install_datasets_btn = gr.Button("📊 Installer Datasets") |
|
|
|
|
|
install_status = gr.Textbox( |
|
|
label="Status d'installation", |
|
|
lines=5, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
check_deps_btn.click(trainer.check_dependencies, outputs=deps_status) |
|
|
|
|
|
install_transformers_btn.click( |
|
|
lambda: trainer.install_dependencies(["transformers"]), |
|
|
outputs=install_status |
|
|
) |
|
|
install_torch_btn.click( |
|
|
lambda: trainer.install_dependencies(["torch", "torchvision"]), |
|
|
outputs=install_status |
|
|
) |
|
|
install_datasets_btn.click( |
|
|
lambda: trainer.install_dependencies(["datasets"]), |
|
|
outputs=install_status |
|
|
) |
|
|
install_core_btn.click( |
|
|
lambda: trainer.install_dependencies(["torch", "transformers", "datasets", "accelerate"]), |
|
|
outputs=install_status |
|
|
) |
|
|
|
|
|
with gr.Tab("🤖 Modèle"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
model_input = gr.Textbox( |
|
|
label="Nom du modèle HuggingFace", |
|
|
placeholder="kvn420/Tenro_V4.1", |
|
|
value="kvn420/Tenro_V4.1" |
|
|
) |
|
|
model_type = gr.Dropdown( |
|
|
label="Type de modèle", |
|
|
choices=["causal", "base"], |
|
|
value="causal" |
|
|
) |
|
|
load_model_btn = gr.Button("🔄 Charger le modèle", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
model_status = gr.Textbox( |
|
|
label="Status du modèle", |
|
|
interactive=False, |
|
|
lines=8 |
|
|
) |
|
|
|
|
|
info_btn = gr.Button("ℹ️ Info modèle") |
|
|
model_info = gr.Textbox( |
|
|
label="Informations détaillées", |
|
|
interactive=False, |
|
|
lines=8 |
|
|
) |
|
|
|
|
|
load_model_btn.click( |
|
|
trainer.load_model, |
|
|
inputs=[model_input, model_type], |
|
|
outputs=model_status |
|
|
) |
|
|
|
|
|
info_btn.click(trainer.get_model_info, outputs=model_info) |
|
|
|
|
|
with gr.Tab("📊 Données"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("### 📝 Dataset individuel") |
|
|
dataset_input = gr.Textbox( |
|
|
label="Nom du dataset", |
|
|
placeholder="microsoft/coco" |
|
|
) |
|
|
dataset_split = gr.Textbox( |
|
|
label="Split", |
|
|
value="train" |
|
|
) |
|
|
load_dataset_btn = gr.Button("➕ Ajouter dataset", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
data_status = gr.Textbox( |
|
|
label="Status des données", |
|
|
interactive=False, |
|
|
lines=12 |
|
|
) |
|
|
|
|
|
load_dataset_btn.click( |
|
|
trainer.load_single_dataset, |
|
|
inputs=[dataset_input, dataset_split], |
|
|
outputs=data_status |
|
|
) |
|
|
|
|
|
with gr.Tab("🏋️ Entraînement"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
output_dir = gr.Textbox( |
|
|
label="Dossier de sortie", |
|
|
value="./trained_model" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
num_epochs = gr.Number( |
|
|
label="Époques", |
|
|
value=3, |
|
|
minimum=1 |
|
|
) |
|
|
batch_size = gr.Number( |
|
|
label="Batch size", |
|
|
value=4, |
|
|
minimum=1 |
|
|
) |
|
|
|
|
|
learning_rate = gr.Number( |
|
|
label="Learning rate", |
|
|
value=5e-5, |
|
|
step=1e-6 |
|
|
) |
|
|
|
|
|
train_btn = gr.Button("🚀 Simuler entraînement", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(): |
|
|
training_status = gr.Textbox( |
|
|
label="Status de l'entraînement", |
|
|
interactive=False, |
|
|
lines=12 |
|
|
) |
|
|
|
|
|
train_btn.click( |
|
|
trainer.simulate_training, |
|
|
inputs=[output_dir, num_epochs, learning_rate, batch_size], |
|
|
outputs=training_status |
|
|
) |
|
|
|
|
|
|
|
|
app.load(trainer.check_dependencies, outputs=deps_status) |
|
|
|
|
|
return app |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app = create_interface() |
|
|
app.launch(share=True, server_name="0.0.0.0", server_port=7860) |