aether-raider commited on
Commit
4d18ab0
·
1 Parent(s): 188418c

fix: sample audio issue

Browse files
README.md CHANGED
@@ -30,12 +30,6 @@ An interactive evaluation interface for rating Air Traffic Control (ATC) Text-to
30
  5. **Gender Comparison**: Compare male and female voices
31
  6. **Submit**: Complete the evaluation and submit your responses
32
 
33
- ## Models Evaluated
34
-
35
- - **CSM**: Custom Speech Model
36
- - **XTTS**: XTTSv2 Model
37
- - **Orpheus**: Orpheus TTS Model
38
-
39
  ## Data Storage
40
 
41
  All evaluation responses are stored in the `aether-raid/atc-tts-mos-ratings` dataset for research purposes.
 
30
  5. **Gender Comparison**: Compare male and female voices
31
  6. **Submit**: Complete the evaluation and submit your responses
32
 
 
 
 
 
 
 
33
  ## Data Storage
34
 
35
  All evaluation responses are stored in the `aether-raid/atc-tts-mos-ratings` dataset for research purposes.
backend/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/__init__.cpython-311.pyc and b/backend/__pycache__/__init__.cpython-311.pyc differ
 
backend/__pycache__/config.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/config.cpython-311.pyc and b/backend/__pycache__/config.cpython-311.pyc differ
 
backend/__pycache__/data_manager.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/data_manager.cpython-311.pyc and b/backend/__pycache__/data_manager.cpython-311.pyc differ
 
backend/__pycache__/hf_logging.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/hf_logging.cpython-311.pyc and b/backend/__pycache__/hf_logging.cpython-311.pyc differ
 
backend/__pycache__/models.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/models.cpython-311.pyc and b/backend/__pycache__/models.cpython-311.pyc differ
 
backend/__pycache__/session_manager.cpython-311.pyc CHANGED
Binary files a/backend/__pycache__/session_manager.cpython-311.pyc and b/backend/__pycache__/session_manager.cpython-311.pyc differ
 
backend/data_manager.py CHANGED
@@ -25,34 +25,11 @@ class DataManager:
25
  self._clips: Optional[List[Clip]] = None
26
  self._loading = False
27
 
28
- def _audio_to_data_url(self, audio_val) -> Optional[str]:
29
  """
30
- Accepts:
31
- - torchcodec AudioDecoder
32
- - dict-like with 'path' / 'array' / 'sampling_rate'
33
- Returns data:audio/wav;base64,... or None.
34
  """
35
- # 1) Try to get a real file path and read it
36
- try:
37
- path = None
38
- if isinstance(audio_val, dict) and "path" in audio_val:
39
- path = audio_val["path"]
40
- else:
41
- # mapping-like: try __getitem__ then attribute
42
- try:
43
- path = audio_val["path"] # works on some decoders
44
- except Exception:
45
- path = getattr(audio_val, "path", None)
46
-
47
- if isinstance(path, str) and os.path.exists(path):
48
- with open(path, "rb") as f:
49
- audio_bytes = f.read()
50
- b64 = base64.b64encode(audio_bytes).decode("ascii")
51
- return f"data:audio/wav;base64,{b64}"
52
- except Exception as e:
53
- print(f"[WARN] Failed to build data URL from path: {e}")
54
-
55
- # 2) Fallback: use array + sampling_rate and render WAV in-memory
56
  try:
57
  array = None
58
  sr = None
@@ -60,6 +37,7 @@ class DataManager:
60
  if isinstance(audio_val, dict):
61
  array = audio_val.get("array")
62
  sr = audio_val.get("sampling_rate")
 
63
  if array is None or sr is None:
64
  # try mapping-style then attributes
65
  try:
@@ -69,15 +47,13 @@ class DataManager:
69
  array = getattr(audio_val, "array", None)
70
  sr = getattr(audio_val, "sampling_rate", None)
71
 
72
- if array is not None and sr is not None and sf is not None:
73
- buf = io.BytesIO()
74
- sf.write(buf, np.array(array), int(sr), format="WAV")
75
- b64 = base64.b64encode(buf.getvalue()).decode("ascii")
76
- return f"data:audio/wav;base64,{b64}"
77
  except Exception as e:
78
- print(f"[WARN] Failed to build data URL from array/sr: {e}")
79
 
80
- print("[WARN] Could not build audio data URL for this example")
81
  return None
82
 
83
  def load_clips(self) -> List[Clip]:
@@ -97,9 +73,9 @@ class DataManager:
97
  for row in dataset:
98
  audio_val = row.get("audio")
99
 
100
- audio_url = self._audio_to_data_url(audio_val)
101
- if audio_url is None:
102
- print(f"[WARN] Skipping clip {row.get('exercise_id')} – could not build audio URL")
103
  continue
104
 
105
  clip = Clip(
@@ -109,7 +85,7 @@ class DataManager:
109
  exercise=row["exercise"],
110
  exercise_id=row["exercise_id"],
111
  transcript=row["rt"],
112
- audio_url=audio_url, # string usable in <audio src="...">
113
  )
114
  clips.append(clip)
115
 
 
25
  self._clips: Optional[List[Clip]] = None
26
  self._loading = False
27
 
28
+ def _get_audio_data(self, audio_val) -> Optional[tuple]:
29
  """
30
+ Extract audio data that Gradio can handle directly.
31
+ Returns tuple (array, sample_rate) or None.
 
 
32
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  try:
34
  array = None
35
  sr = None
 
37
  if isinstance(audio_val, dict):
38
  array = audio_val.get("array")
39
  sr = audio_val.get("sampling_rate")
40
+
41
  if array is None or sr is None:
42
  # try mapping-style then attributes
43
  try:
 
47
  array = getattr(audio_val, "array", None)
48
  sr = getattr(audio_val, "sampling_rate", None)
49
 
50
+ if array is not None and sr is not None:
51
+ # Return as tuple that Gradio Audio can handle
52
+ return (np.array(array), int(sr))
 
 
53
  except Exception as e:
54
+ print(f"[WARN] Failed to extract audio data: {e}")
55
 
56
+ print("[WARN] Could not extract audio data for this example")
57
  return None
58
 
59
  def load_clips(self) -> List[Clip]:
 
73
  for row in dataset:
74
  audio_val = row.get("audio")
75
 
76
+ audio_data = self._get_audio_data(audio_val)
77
+ if audio_data is None:
78
+ print(f"[WARN] Skipping clip {row.get('exercise_id')} – could not extract audio data")
79
  continue
80
 
81
  clip = Clip(
 
85
  exercise=row["exercise"],
86
  exercise_id=row["exercise_id"],
87
  transcript=row["rt"],
88
+ audio_url=audio_data, # tuple (array, sample_rate) for Gradio Audio
89
  )
90
  clips.append(clip)
91
 
backend/models.py CHANGED
@@ -18,8 +18,29 @@ def get_display_model_name(internal_name: str) -> str:
18
 
19
 
20
  def audio_to_base64_url(audio_data):
21
- """Return the audio data URL string as-is (no-op since audio_url is already a base64 data URL)."""
22
- return audio_data if isinstance(audio_data, str) else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  # Data models
 
18
 
19
 
20
  def audio_to_base64_url(audio_data):
21
+ """Convert audio data to base64 URL for HTML audio elements."""
22
+ if isinstance(audio_data, str) and audio_data.startswith("data:audio/"):
23
+ return audio_data
24
+ elif isinstance(audio_data, tuple) and len(audio_data) == 2:
25
+ # Convert (array, sample_rate) tuple to base64 URL
26
+ try:
27
+ import numpy as np
28
+ import base64
29
+ import io
30
+ try:
31
+ import soundfile as sf
32
+ except ImportError:
33
+ return None
34
+
35
+ array, sr = audio_data
36
+ if sf is not None:
37
+ buf = io.BytesIO()
38
+ sf.write(buf, np.array(array), int(sr), format="WAV")
39
+ b64 = base64.b64encode(buf.getvalue()).decode("ascii")
40
+ return f"data:audio/wav;base64,{b64}"
41
+ except Exception as e:
42
+ print(f"[WARN] Failed to convert audio tuple to base64 URL: {e}")
43
+ return None
44
 
45
 
46
  # Data models
frontend/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/frontend/__pycache__/__init__.cpython-311.pyc and b/frontend/__pycache__/__init__.cpython-311.pyc differ
 
frontend/__pycache__/css.cpython-311.pyc CHANGED
Binary files a/frontend/__pycache__/css.cpython-311.pyc and b/frontend/__pycache__/css.cpython-311.pyc differ
 
frontend/app.py CHANGED
@@ -133,16 +133,13 @@ def create_app(data_manager, session_manager):
133
  """
134
  )
135
 
136
- audio_src = audio_to_base64_url(clip.audio_url) or ""
137
- gr.HTML(
138
- f"""
139
- <div style="background: #1f2937; padding: 15px; border-radius: 8px; margin-bottom: 15px;">
140
- <audio controls style="width: 100%; height: 54px;">
141
- <source src="{audio_src}" type="audio/wav">
142
- Audio not available
143
- </audio>
144
- </div>
145
- """
146
  )
147
 
148
  with gr.Group(elem_classes=["transcript-box"]):
 
133
  """
134
  )
135
 
136
+ # Use Gradio's native Audio component for better performance
137
+ gr.Audio(
138
+ value=clip.audio_url,
139
+ label=f"Sample {i} Audio",
140
+ interactive=False,
141
+ show_label=False,
142
+ container=False
 
 
 
143
  )
144
 
145
  with gr.Group(elem_classes=["transcript-box"]):
frontend/pages/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/frontend/pages/__pycache__/__init__.cpython-311.pyc and b/frontend/pages/__pycache__/__init__.cpython-311.pyc differ
 
frontend/pages/__pycache__/ab_gender.cpython-311.pyc CHANGED
Binary files a/frontend/pages/__pycache__/ab_gender.cpython-311.pyc and b/frontend/pages/__pycache__/ab_gender.cpython-311.pyc differ
 
frontend/pages/__pycache__/ab_model.cpython-311.pyc CHANGED
Binary files a/frontend/pages/__pycache__/ab_model.cpython-311.pyc and b/frontend/pages/__pycache__/ab_model.cpython-311.pyc differ
 
frontend/pages/__pycache__/conclusion.cpython-311.pyc CHANGED
Binary files a/frontend/pages/__pycache__/conclusion.cpython-311.pyc and b/frontend/pages/__pycache__/conclusion.cpython-311.pyc differ
 
frontend/pages/__pycache__/intro.cpython-311.pyc CHANGED
Binary files a/frontend/pages/__pycache__/intro.cpython-311.pyc and b/frontend/pages/__pycache__/intro.cpython-311.pyc differ
 
frontend/pages/__pycache__/mos.cpython-311.pyc CHANGED
Binary files a/frontend/pages/__pycache__/mos.cpython-311.pyc and b/frontend/pages/__pycache__/mos.cpython-311.pyc differ
 
frontend/pages/__pycache__/samples.cpython-311.pyc CHANGED
Binary files a/frontend/pages/__pycache__/samples.cpython-311.pyc and b/frontend/pages/__pycache__/samples.cpython-311.pyc differ
 
frontend/pages/__pycache__/thank_you.cpython-311.pyc CHANGED
Binary files a/frontend/pages/__pycache__/thank_you.cpython-311.pyc and b/frontend/pages/__pycache__/thank_you.cpython-311.pyc differ