badrex commited on
Commit
099b786
·
verified ·
1 Parent(s): 4b2ab62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -94
app.py CHANGED
@@ -5,20 +5,18 @@ import numpy as np
5
  import torch
6
  import spaces
7
 
8
- # Load the model
9
  print("Loading model...")
10
  model_id = "badrex/mms-300m-arabic-dialect-identifier"
11
  classifier = pipeline("audio-classification", model=model_id, device='cuda')
12
  print("Model loaded successfully")
13
  print("Model moved to GPU successfully")
14
 
15
-
16
  @spaces.GPU
17
  def predict(audio_segment, sr=16000):
18
  return classifier({"sampling_rate": sr, "raw": audio_segment})
19
 
20
-
21
- # Define dialect mapping
22
  dialect_mapping = {
23
  "MSA": "Modern Standard Arabic (MSA) - العربية الفصحى الحديثة",
24
  "Egyptian": "Egyptian Arabic - اللهجة المصرية العامية",
@@ -31,16 +29,12 @@ def predict_dialect(audio):
31
  if audio is None:
32
  return {"Error": 1.0}
33
 
34
- # The audio input from Gradio is a tuple of (sample_rate, audio_array)
35
  sr, audio_array = audio
36
 
37
- # Process the audio input
38
  if len(audio_array.shape) > 1:
39
- audio_array = audio_array.mean(axis=1) # Convert stereo to mono
40
 
41
- # Convert audio to float32 if it's not already (fix for Chrome recording issue)
42
  if audio_array.dtype != np.float32:
43
- # Normalize to [-1, 1] range as expected by the model
44
  if audio_array.dtype == np.int16:
45
  audio_array = audio_array.astype(np.float32) / 32768.0
46
  else:
@@ -48,10 +42,8 @@ def predict_dialect(audio):
48
 
49
  print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}")
50
 
51
- # Classify the dialect
52
  predictions = predict(sr=sr, audio_segment=audio_array)
53
 
54
- # Format results for display
55
  results = {}
56
  for pred in predictions:
57
  dialect_name = dialect_mapping.get(pred['label'], pred['label'])
@@ -59,110 +51,36 @@ def predict_dialect(audio):
59
 
60
  return results
61
 
62
- # Manually prepare example file paths without metadata
63
  examples = []
64
  examples_dir = "examples"
65
  if os.path.exists(examples_dir):
66
  for filename in os.listdir(examples_dir):
67
  if filename.endswith((".wav", ".mp3", ".ogg")):
68
  examples.append([os.path.join(examples_dir, filename)])
69
-
70
  print(f"Found {len(examples)} example files")
71
  else:
72
  print("Examples directory not found")
73
 
 
 
 
74
 
 
 
75
 
76
- # Custom CSS for better styling
77
- custom_css = """
78
- <style>
79
- .centered-content {
80
- text-align: center;
81
- max-width: 800px;
82
- margin: 0 auto;
83
- padding: 20px;
84
- }
85
-
86
- .logo-image {
87
- width: 200px;
88
- height: auto;
89
- margin: 20px auto;
90
- display: block;
91
- }
92
-
93
- .description-text {
94
- font-size: 16px;
95
- line-height: 1.6;
96
- margin-bottom: 20px;
97
- }
98
-
99
- .dialect-list {
100
- font-size: 15px;
101
- line-height: 1.8;
102
- text-align: left;
103
- max-width: 600px;
104
- margin: 0 auto;
105
- }
106
-
107
- .highlight-text {
108
- font-size: 16px;
109
- color: #2563eb;
110
- margin: 20px 0;
111
- }
112
-
113
- .footer-text {
114
- font-size: 13px;
115
- color: #6b7280;
116
- margin-top: 20px;
117
- }
118
- </style>
119
- """
120
-
121
- """
122
- <p style="font-size: 15px; line-height: 1.8;">
123
- <strong>The following Arabic language varieties are supported:</strong>
124
- <br><br>
125
- ✦ <strong>Modern Standard Arabic (MSA)</strong> - The formal language of media and education
126
- <br>
127
- ✦ <strong>Egyptian Arabic</strong> - The dialect of Cairo, Alexandria, and popular Arabic cinema
128
- <br>
129
- ✦ <strong>peninsular Arabic</strong> - Spoken across Saudi Arabia, UAE, Kuwait, Qatar, Bahrain, and Oman
130
- <br>
131
- ✦ <strong>Levantine Arabic</strong> - The dialect of Syria, Lebanon, Jordan, and Palestine
132
- <br>
133
- ✦ <strong>Maghrebi Arabic</strong> - The distinctive varieties of Morocco, Algeria, Tunisia, and Libya
134
- </p>
135
- <br>
136
  """
137
 
138
- # Create the Gradio interface
139
  demo = gr.Interface(
140
  fn=predict_dialect,
141
  inputs=gr.Audio(),
142
  outputs=gr.Label(num_top_classes=5, label="Predicted Dialect"),
143
  title="Tamyïz 🍉 Arabic Dialect Identification in Speech",
144
- description="""
145
- <div class="centered-content">
146
- <div>
147
- <p>
148
- By <a href="https://badrex.github.io/" style="color: #2563eb;">Badr Alabsi</a> with ❤��🤍💚
149
- </p>
150
- <br>
151
- <p style="font-size: 15px; line-height: 1.8;">
152
- This is a demo for the accurate and robust Transformer-based <a href="https://huggingface.co/badrex/mms-300m-arabic-dialect-identifier" style="color: #FF5349;">model</a> for Spoken Arabic Dialect Identification (ADI).
153
- From just a short audio clip (5-10 seconds), the model can identify Modern Standard Arabic (<strong>MSA</strong>) as well as four major regional Arabic varieties: <strong>Egyptian</strong> Arabic, <strong>Peninsular</strong> Arabic (Gulf, Yemeni, and Iraqi), <strong>Levantine</strong> Arabic, and <strong>Maghrebi</strong> Arabic.
154
- <br>
155
- <p style="font-size: 15px; line-height: 1.8;">
156
- Simply <strong>upload an audio file</strong> 📤 or <strong>record yourself speaking</strong> 🎙️⏺️ to try out the model!
157
- </p>
158
- </div>
159
- </div>
160
- """,
161
  examples=examples if examples else None,
162
- cache_examples=False, # Disable caching to avoid issues
163
- #theme=gr.themes.Default(font=[gr.themes.GoogleFont("Amiri"), "Arial", "serif"]),
164
  flagging_mode=None
165
  )
166
 
167
- # Launch the app
168
  demo.launch(share=True)
 
5
  import torch
6
  import spaces
7
 
8
+ # load the model
9
  print("Loading model...")
10
  model_id = "badrex/mms-300m-arabic-dialect-identifier"
11
  classifier = pipeline("audio-classification", model=model_id, device='cuda')
12
  print("Model loaded successfully")
13
  print("Model moved to GPU successfully")
14
 
 
15
  @spaces.GPU
16
  def predict(audio_segment, sr=16000):
17
  return classifier({"sampling_rate": sr, "raw": audio_segment})
18
 
19
+ # define dialect mapping
 
20
  dialect_mapping = {
21
  "MSA": "Modern Standard Arabic (MSA) - العربية الفصحى الحديثة",
22
  "Egyptian": "Egyptian Arabic - اللهجة المصرية العامية",
 
29
  if audio is None:
30
  return {"Error": 1.0}
31
 
 
32
  sr, audio_array = audio
33
 
 
34
  if len(audio_array.shape) > 1:
35
+ audio_array = audio_array.mean(axis=1)
36
 
 
37
  if audio_array.dtype != np.float32:
 
38
  if audio_array.dtype == np.int16:
39
  audio_array = audio_array.astype(np.float32) / 32768.0
40
  else:
 
42
 
43
  print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}")
44
 
 
45
  predictions = predict(sr=sr, audio_segment=audio_array)
46
 
 
47
  results = {}
48
  for pred in predictions:
49
  dialect_name = dialect_mapping.get(pred['label'], pred['label'])
 
51
 
52
  return results
53
 
54
+ # prepare examples
55
  examples = []
56
  examples_dir = "examples"
57
  if os.path.exists(examples_dir):
58
  for filename in os.listdir(examples_dir):
59
  if filename.endswith((".wav", ".mp3", ".ogg")):
60
  examples.append([os.path.join(examples_dir, filename)])
 
61
  print(f"Found {len(examples)} example files")
62
  else:
63
  print("Examples directory not found")
64
 
65
+ # clean description without problematic HTML
66
+ description = """
67
+ By <a href="https://badrex.github.io/">Badr Alabsi</a> with ❤️🤍💚
68
 
69
+ This is a demo for the accurate and robust Transformer-based <a href="https://huggingface.co/badrex/mms-300m-arabic-dialect-identifier">model</a> for Spoken Arabic Dialect Identification (ADI).
70
+ From just a short audio clip (5-10 seconds), the model can identify Modern Standard Arabic (MSA) as well as four major regional Arabic varieties: Egyptian Arabic, Peninsular Arabic (Gulf, Yemeni, and Iraqi), Levantine Arabic, and Maghrebi Arabic.
71
 
72
+ Simply **upload an audio file** 📤 or **record yourself speaking** 🎙️⏺️ to try out the model!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  """
74
 
 
75
  demo = gr.Interface(
76
  fn=predict_dialect,
77
  inputs=gr.Audio(),
78
  outputs=gr.Label(num_top_classes=5, label="Predicted Dialect"),
79
  title="Tamyïz 🍉 Arabic Dialect Identification in Speech",
80
+ description=description,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  examples=examples if examples else None,
82
+ cache_examples=False,
 
83
  flagging_mode=None
84
  )
85
 
 
86
  demo.launch(share=True)