kvn420 commited on
Commit
5bba009
·
verified ·
1 Parent(s): 794b299

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -95
app.py CHANGED
@@ -9,6 +9,7 @@ from typing import Dict, List, Optional, Union
9
  import time
10
  import tempfile
11
  import shutil
 
12
 
13
  # Configuration du logging
14
  logging.basicConfig(level=logging.INFO)
@@ -18,80 +19,107 @@ logger = logging.getLogger(__name__)
18
  def install_package(package_name):
19
  """Installe un package Python"""
20
  try:
21
- subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
 
22
  return True
23
  except subprocess.CalledProcessError as e:
24
- logger.error(f"Erreur installation {package_name}: {e}")
25
  return False
26
 
27
- # Imports conditionnels avec tentative d'installation
28
- def safe_import(module_name, package_name=None):
29
- """Import sécurisé avec possibilité d'installation"""
30
- if package_name is None:
31
- package_name = module_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
 
33
  try:
34
- return __import__(module_name), True
 
35
  except ImportError:
36
- logger.warning(f"{module_name} non trouvé")
37
- return None, False
38
-
39
- # Tentative d'imports
40
- numpy, NUMPY_AVAILABLE = safe_import('numpy')
41
- torch_module, TORCH_AVAILABLE = safe_import('torch')
42
- if torch_module:
43
- torch = torch_module
44
- else:
45
- torch = None
46
-
47
- # Import transformers
48
- try:
49
- from transformers import (
50
- AutoTokenizer, AutoModel, AutoProcessor,
51
- AutoModelForCausalLM, TrainingArguments, Trainer,
52
- DataCollatorForLanguageModeling
53
- )
54
- TRANSFORMERS_AVAILABLE = True
55
- except ImportError:
56
- TRANSFORMERS_AVAILABLE = False
57
- logger.warning("Transformers non disponible")
58
-
59
- # Import datasets
60
- try:
61
- from datasets import Dataset, load_dataset, concatenate_datasets
62
- DATASETS_AVAILABLE = True
63
- except ImportError:
64
- DATASETS_AVAILABLE = False
65
- logger.warning("Datasets non disponible")
66
-
67
- # Import HuggingFace Hub
68
- try:
69
- from huggingface_hub import HfApi
70
- HF_HUB_AVAILABLE = True
71
- except ImportError:
72
- HF_HUB_AVAILABLE = False
73
- logger.warning("HuggingFace Hub non disponible")
74
-
75
- # Import PIL
76
- try:
77
- from PIL import Image
78
- PIL_AVAILABLE = True
79
- except ImportError:
80
- PIL_AVAILABLE = False
81
-
82
- # Import librosa
83
- try:
84
- import librosa
85
- LIBROSA_AVAILABLE = True
86
- except ImportError:
87
- LIBROSA_AVAILABLE = False
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- # Import OpenCV
90
- try:
91
- import cv2
92
- CV2_AVAILABLE = True
93
- except ImportError:
94
- CV2_AVAILABLE = False
95
 
96
  class MultimodalTrainer:
97
  def __init__(self):
@@ -101,43 +129,81 @@ class MultimodalTrainer:
101
  self.training_data = []
102
 
103
  # Device selection
104
- if TORCH_AVAILABLE and torch.cuda.is_available():
105
  self.device = torch.device("cuda")
106
  else:
107
  self.device = "cpu"
108
 
109
  # HF API
110
- if HF_HUB_AVAILABLE:
111
  self.hf_api = HfApi()
112
  else:
113
  self.hf_api = None
114
-
115
  def install_dependencies(self, packages_to_install):
116
  """Installe les dépendances manquantes"""
117
  installation_results = []
118
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  for package in packages_to_install:
120
  installation_results.append(f"📦 Installation de {package}...")
121
- success = install_package(package)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  if success:
123
  installation_results.append(f"✅ {package} installé avec succès!")
124
  else:
125
  installation_results.append(f"❌ Échec installation {package}")
126
 
127
- installation_results.append("\n🔄 Redémarrage requis pour prendre effet")
 
 
 
 
 
128
  return "\n".join(installation_results)
129
 
130
  def check_dependencies(self):
131
  """Vérifie et affiche l'état des dépendances"""
 
 
 
132
  deps = {
133
- "NumPy": NUMPY_AVAILABLE,
134
  "PyTorch": TORCH_AVAILABLE,
135
  "Transformers": TRANSFORMERS_AVAILABLE,
136
  "Datasets": DATASETS_AVAILABLE,
 
137
  "HuggingFace Hub": HF_HUB_AVAILABLE,
138
- "PIL (Images)": PIL_AVAILABLE,
139
- "Librosa (Audio)": LIBROSA_AVAILABLE,
140
- "OpenCV (Vidéo)": CV2_AVAILABLE
141
  }
142
 
143
  status = "📦 État des dépendances:\n\n"
@@ -146,16 +212,13 @@ class MultimodalTrainer:
146
  critical_deps = ["PyTorch", "Transformers", "Datasets"]
147
  status += "🔥 CRITIQUES:\n"
148
  for dep in critical_deps:
149
- icon = "✅" if deps.get(dep.replace(" ", "").replace("(", "").replace(")", "")) else "❌"
150
  status += f"{icon} {dep}\n"
151
 
152
  status += "\n🔧 OPTIONNELLES:\n"
153
- optional_deps = ["NumPy", "HuggingFace Hub", "PIL (Images)", "Librosa (Audio)", "OpenCV (Vidéo)"]
154
  for dep in optional_deps:
155
- key = dep.replace(" ", "").replace("(", "").replace(")", "").replace("Images", "").replace("Audio", "").replace("Vidéo", "")
156
- if key == "HuggingFaceHub":
157
- key = "HuggingFace Hub"
158
- icon = "✅" if deps.get(key) else "⚠️"
159
  status += f"{icon} {dep}\n"
160
 
161
  # Système info
@@ -163,7 +226,7 @@ class MultimodalTrainer:
163
  status += f"🐍 Python: {sys.version.split()[0]}\n"
164
  status += f"💾 Device: {self.device}\n"
165
 
166
- if TORCH_AVAILABLE and torch.cuda.is_available():
167
  status += f"🚀 GPU: {torch.cuda.get_device_name()}\n"
168
  status += f"🔋 VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB\n"
169
 
@@ -174,7 +237,7 @@ class MultimodalTrainer:
174
  if not TRANSFORMERS_AVAILABLE:
175
  return "❌ Transformers non installé! Utilisez l'outil d'installation."
176
 
177
- if not TORCH_AVAILABLE:
178
  return "❌ PyTorch non installé! Utilisez l'outil d'installation."
179
 
180
  if not model_name.strip():
@@ -225,7 +288,7 @@ class MultimodalTrainer:
225
 
226
  def load_single_dataset(self, dataset_name: str, split: str = "train"):
227
  """Charge un dataset individuel"""
228
- if not DATASETS_AVAILABLE:
229
  return "❌ Datasets non installé! Utilisez l'outil d'installation."
230
 
231
  if not dataset_name.strip():
@@ -262,7 +325,10 @@ class MultimodalTrainer:
262
  for i, step in enumerate(steps):
263
  result += f"Étape {i+1}: {step} ✅\n"
264
 
265
- result += "\n⚠️ MODE DÉMO - Pour un vrai entraînement, installez PyTorch + Transformers"
 
 
 
266
  return result
267
 
268
  def get_model_info(self):
@@ -272,10 +338,11 @@ class MultimodalTrainer:
272
 
273
  info = f"📋 INFORMATIONS DU MODÈLE:\n\n"
274
  info += f"🏷️ Type: {type(self.current_model).__name__}\n"
275
- info += f"💾 Device: {next(self.current_model.parameters()).device}\n"
276
 
277
- # Compte les paramètres
278
- if TORCH_AVAILABLE:
 
 
279
  total_params = sum(p.numel() for p in self.current_model.parameters())
280
  trainable_params = sum(p.numel() for p in self.current_model.parameters() if p.requires_grad)
281
 
@@ -323,7 +390,7 @@ def create_interface():
323
 
324
  install_status = gr.Textbox(
325
  label="Status d'installation",
326
- lines=5,
327
  interactive=False
328
  )
329
 
@@ -335,7 +402,7 @@ def create_interface():
335
  outputs=install_status
336
  )
337
  install_torch_btn.click(
338
- lambda: trainer.install_dependencies(["torch", "torchvision"]),
339
  outputs=install_status
340
  )
341
  install_datasets_btn.click(
@@ -352,8 +419,8 @@ def create_interface():
352
  with gr.Column():
353
  model_input = gr.Textbox(
354
  label="Nom du modèle HuggingFace",
355
- placeholder="kvn420/Tenro_V4.1",
356
- value="kvn420/Tenro_V4.1"
357
  )
358
  model_type = gr.Dropdown(
359
  label="Type de modèle",
@@ -390,7 +457,12 @@ def create_interface():
390
  gr.Markdown("### 📝 Dataset individuel")
391
  dataset_input = gr.Textbox(
392
  label="Nom du dataset",
393
- placeholder="microsoft/coco"
 
 
 
 
 
394
  )
395
  dataset_split = gr.Textbox(
396
  label="Split",
@@ -405,9 +477,16 @@ def create_interface():
405
  lines=12
406
  )
407
 
 
 
 
 
 
 
 
408
  load_dataset_btn.click(
409
- trainer.load_single_dataset,
410
- inputs=[dataset_input, dataset_split],
411
  outputs=data_status
412
  )
413
 
 
9
  import time
10
  import tempfile
11
  import shutil
12
+ import importlib
13
 
14
  # Configuration du logging
15
  logging.basicConfig(level=logging.INFO)
 
19
  def install_package(package_name):
20
  """Installe un package Python"""
21
  try:
22
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name, "--quiet"])
23
+ logger.info(f"✅ {package_name} installé avec succès")
24
  return True
25
  except subprocess.CalledProcessError as e:
26
+ logger.error(f"Erreur installation {package_name}: {e}")
27
  return False
28
 
29
+ # Fonction pour recharger les modules après installation
30
+ def reload_module(module_name):
31
+ """Recharge un module après installation"""
32
+ try:
33
+ if module_name in sys.modules:
34
+ importlib.reload(sys.modules[module_name])
35
+ else:
36
+ __import__(module_name)
37
+ return True
38
+ except Exception as e:
39
+ logger.error(f"Erreur rechargement {module_name}: {e}")
40
+ return False
41
+
42
+ # Imports conditionnels avec vérification
43
+ def check_and_import_dependencies():
44
+ """Vérifie et importe toutes les dépendances"""
45
+ global numpy, torch, NUMPY_AVAILABLE, TORCH_AVAILABLE, TRANSFORMERS_AVAILABLE
46
+ global DATASETS_AVAILABLE, HF_HUB_AVAILABLE, PIL_AVAILABLE, LIBROSA_AVAILABLE, CV2_AVAILABLE
47
+ global AutoTokenizer, AutoModel, AutoProcessor, AutoModelForCausalLM
48
+ global TrainingArguments, Trainer, DataCollatorForLanguageModeling
49
+ global Dataset, load_dataset, concatenate_datasets, HfApi, Image, librosa, cv2
50
 
51
+ # NumPy
52
  try:
53
+ import numpy
54
+ NUMPY_AVAILABLE = True
55
  except ImportError:
56
+ numpy = None
57
+ NUMPY_AVAILABLE = False
58
+
59
+ # PyTorch
60
+ try:
61
+ import torch
62
+ TORCH_AVAILABLE = True
63
+ except ImportError:
64
+ torch = None
65
+ TORCH_AVAILABLE = False
66
+
67
+ # Transformers
68
+ try:
69
+ from transformers import (
70
+ AutoTokenizer, AutoModel, AutoProcessor,
71
+ AutoModelForCausalLM, TrainingArguments, Trainer,
72
+ DataCollatorForLanguageModeling
73
+ )
74
+ TRANSFORMERS_AVAILABLE = True
75
+ except ImportError:
76
+ TRANSFORMERS_AVAILABLE = False
77
+ AutoTokenizer = AutoModel = AutoProcessor = None
78
+ AutoModelForCausalLM = TrainingArguments = Trainer = None
79
+ DataCollatorForLanguageModeling = None
80
+
81
+ # Datasets
82
+ try:
83
+ from datasets import Dataset, load_dataset, concatenate_datasets
84
+ DATASETS_AVAILABLE = True
85
+ except ImportError:
86
+ DATASETS_AVAILABLE = False
87
+ Dataset = load_dataset = concatenate_datasets = None
88
+
89
+ # HuggingFace Hub
90
+ try:
91
+ from huggingface_hub import HfApi
92
+ HF_HUB_AVAILABLE = True
93
+ except ImportError:
94
+ HF_HUB_AVAILABLE = False
95
+ HfApi = None
96
+
97
+ # PIL
98
+ try:
99
+ from PIL import Image
100
+ PIL_AVAILABLE = True
101
+ except ImportError:
102
+ PIL_AVAILABLE = False
103
+ Image = None
104
+
105
+ # Librosa
106
+ try:
107
+ import librosa
108
+ LIBROSA_AVAILABLE = True
109
+ except ImportError:
110
+ LIBROSA_AVAILABLE = False
111
+ librosa = None
112
+
113
+ # OpenCV
114
+ try:
115
+ import cv2
116
+ CV2_AVAILABLE = True
117
+ except ImportError:
118
+ CV2_AVAILABLE = False
119
+ cv2 = None
120
 
121
+ # Initialisation des imports
122
+ check_and_import_dependencies()
 
 
 
 
123
 
124
  class MultimodalTrainer:
125
  def __init__(self):
 
129
  self.training_data = []
130
 
131
  # Device selection
132
+ if TORCH_AVAILABLE and torch and torch.cuda.is_available():
133
  self.device = torch.device("cuda")
134
  else:
135
  self.device = "cpu"
136
 
137
  # HF API
138
+ if HF_HUB_AVAILABLE and HfApi:
139
  self.hf_api = HfApi()
140
  else:
141
  self.hf_api = None
142
+
143
  def install_dependencies(self, packages_to_install):
144
  """Installe les dépendances manquantes"""
145
  installation_results = []
146
 
147
+ # Mapping des packages
148
+ package_mapping = {
149
+ "torch": "torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu",
150
+ "transformers": "transformers",
151
+ "datasets": "datasets",
152
+ "accelerate": "accelerate",
153
+ "pillow": "pillow",
154
+ "librosa": "librosa",
155
+ "opencv": "opencv-python",
156
+ "huggingface_hub": "huggingface_hub"
157
+ }
158
+
159
  for package in packages_to_install:
160
  installation_results.append(f"📦 Installation de {package}...")
161
+
162
+ # Utilise le mapping si disponible
163
+ install_cmd = package_mapping.get(package.lower(), package)
164
+
165
+ if package.lower() == "torch":
166
+ # Installation spéciale pour PyTorch
167
+ try:
168
+ subprocess.check_call([
169
+ sys.executable, "-m", "pip", "install",
170
+ "torch", "torchvision", "torchaudio",
171
+ "--index-url", "https://download.pytorch.org/whl/cpu",
172
+ "--quiet"
173
+ ])
174
+ success = True
175
+ except subprocess.CalledProcessError:
176
+ success = False
177
+ else:
178
+ success = install_package(install_cmd)
179
+
180
  if success:
181
  installation_results.append(f"✅ {package} installé avec succès!")
182
  else:
183
  installation_results.append(f"❌ Échec installation {package}")
184
 
185
+ # Recharge les dépendances après installation
186
+ installation_results.append("\n🔄 Rechargement des modules...")
187
+ check_and_import_dependencies()
188
+ self.__init__() # Réinitialise l'instance
189
+
190
+ installation_results.append("✅ Modules rechargés!")
191
  return "\n".join(installation_results)
192
 
193
  def check_dependencies(self):
194
  """Vérifie et affiche l'état des dépendances"""
195
+ # Force la vérification
196
+ check_and_import_dependencies()
197
+
198
  deps = {
 
199
  "PyTorch": TORCH_AVAILABLE,
200
  "Transformers": TRANSFORMERS_AVAILABLE,
201
  "Datasets": DATASETS_AVAILABLE,
202
+ "NumPy": NUMPY_AVAILABLE,
203
  "HuggingFace Hub": HF_HUB_AVAILABLE,
204
+ "PIL": PIL_AVAILABLE,
205
+ "Librosa": LIBROSA_AVAILABLE,
206
+ "OpenCV": CV2_AVAILABLE
207
  }
208
 
209
  status = "📦 État des dépendances:\n\n"
 
212
  critical_deps = ["PyTorch", "Transformers", "Datasets"]
213
  status += "🔥 CRITIQUES:\n"
214
  for dep in critical_deps:
215
+ icon = "✅" if deps.get(dep) else "❌"
216
  status += f"{icon} {dep}\n"
217
 
218
  status += "\n🔧 OPTIONNELLES:\n"
219
+ optional_deps = ["NumPy", "HuggingFace Hub", "PIL", "Librosa", "OpenCV"]
220
  for dep in optional_deps:
221
+ icon = "" if deps.get(dep) else "⚠️"
 
 
 
222
  status += f"{icon} {dep}\n"
223
 
224
  # Système info
 
226
  status += f"🐍 Python: {sys.version.split()[0]}\n"
227
  status += f"💾 Device: {self.device}\n"
228
 
229
+ if TORCH_AVAILABLE and torch and torch.cuda.is_available():
230
  status += f"🚀 GPU: {torch.cuda.get_device_name()}\n"
231
  status += f"🔋 VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB\n"
232
 
 
237
  if not TRANSFORMERS_AVAILABLE:
238
  return "❌ Transformers non installé! Utilisez l'outil d'installation."
239
 
240
+ if not TORCH_AVAILABLE or not torch:
241
  return "❌ PyTorch non installé! Utilisez l'outil d'installation."
242
 
243
  if not model_name.strip():
 
288
 
289
  def load_single_dataset(self, dataset_name: str, split: str = "train"):
290
  """Charge un dataset individuel"""
291
+ if not DATASETS_AVAILABLE or not load_dataset:
292
  return "❌ Datasets non installé! Utilisez l'outil d'installation."
293
 
294
  if not dataset_name.strip():
 
325
  for i, step in enumerate(steps):
326
  result += f"Étape {i+1}: {step} ✅\n"
327
 
328
+ if TORCH_AVAILABLE and TRANSFORMERS_AVAILABLE:
329
+ result += "\n✅ Prêt pour un vrai entraînement!"
330
+ else:
331
+ result += "\n⚠️ MODE DÉMO - Installez PyTorch + Transformers pour un vrai entraînement"
332
  return result
333
 
334
  def get_model_info(self):
 
338
 
339
  info = f"📋 INFORMATIONS DU MODÈLE:\n\n"
340
  info += f"🏷️ Type: {type(self.current_model).__name__}\n"
 
341
 
342
+ if TORCH_AVAILABLE and torch:
343
+ info += f"💾 Device: {next(self.current_model.parameters()).device}\n"
344
+
345
+ # Compte les paramètres
346
  total_params = sum(p.numel() for p in self.current_model.parameters())
347
  trainable_params = sum(p.numel() for p in self.current_model.parameters() if p.requires_grad)
348
 
 
390
 
391
  install_status = gr.Textbox(
392
  label="Status d'installation",
393
+ lines=8,
394
  interactive=False
395
  )
396
 
 
402
  outputs=install_status
403
  )
404
  install_torch_btn.click(
405
+ lambda: trainer.install_dependencies(["torch"]),
406
  outputs=install_status
407
  )
408
  install_datasets_btn.click(
 
419
  with gr.Column():
420
  model_input = gr.Textbox(
421
  label="Nom du modèle HuggingFace",
422
+ placeholder="microsoft/DialoGPT-medium",
423
+ value="microsoft/DialoGPT-medium"
424
  )
425
  model_type = gr.Dropdown(
426
  label="Type de modèle",
 
457
  gr.Markdown("### 📝 Dataset individuel")
458
  dataset_input = gr.Textbox(
459
  label="Nom du dataset",
460
+ placeholder="wikitext",
461
+ value="wikitext"
462
+ )
463
+ dataset_config = gr.Textbox(
464
+ label="Configuration (optionnel)",
465
+ placeholder="wikitext-2-raw-v1"
466
  )
467
  dataset_split = gr.Textbox(
468
  label="Split",
 
477
  lines=12
478
  )
479
 
480
+ def load_dataset_with_config(dataset_name, config_name, split):
481
+ if config_name.strip():
482
+ full_name = f"{dataset_name}/{config_name}" if "/" not in config_name else config_name
483
+ else:
484
+ full_name = dataset_name
485
+ return trainer.load_single_dataset(full_name, split)
486
+
487
  load_dataset_btn.click(
488
+ load_dataset_with_config,
489
+ inputs=[dataset_input, dataset_config, dataset_split],
490
  outputs=data_status
491
  )
492