Ringg-TTS-v1.0 / app.py
utkarshshukla2912's picture
base space
63d250f
raw
history blame
10.8 kB
import gradio as gr
import requests
import json
import os
# API Base URL
BASE_URL = os.environ.get("BASE_URL", "")
# Custom CSS for aesthetic design
custom_css = """
.health-status {
display: flex;
align-items: center;
gap: 10px;
padding: 15px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 12px;
margin-bottom: 20px;
}
.status-dot {
width: 20px;
height: 20px;
border-radius: 50%;
animation: pulse 2s infinite;
margin-top: 25px;
display: inline-block;
}
.status-dot-green {
background-color: #10b981;
box-shadow: 0 0 20px rgba(16, 185, 129, 0.6);
}
.status-dot-red {
background-color: #ef4444;
box-shadow: 0 0 20px rgba(239, 68, 68, 0.6);
}
@keyframes pulse {
0%, 100% {
opacity: 1;
transform: scale(1);
}
50% {
opacity: 0.6;
transform: scale(1.1);
}
}
.metric-card {
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
padding: 15px;
border-radius: 10px;
margin: 5px;
color: white;
text-align: center;
}
.metric-label {
font-size: 12px;
opacity: 0.9;
margin-bottom: 5px;
}
.metric-value {
font-size: 24px;
font-weight: bold;
}
.voice-card {
background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
padding: 10px;
border-radius: 8px;
margin: 5px 0;
}
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
}
.main-title {
text-align: center;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
font-size: 48px;
font-weight: bold;
margin-bottom: 20px;
margin-top: 0;
}
.subtitle {
text-align: center;
color: #666;
font-size: 18px;
margin-bottom: 30px;
}
/* Title bar behind the heading text */
.title-bar {
display: flex;
align-items: center;
justify-content: center;
gap: 15px;
margin-bottom: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 12px;
padding: 8px 16px;
height: 60px; /* compact header height */
min-height: 60px;
overflow: hidden;
}
/* Make title text solid and place gradient behind it */
.title-bar .main-title {
background: none !important;
-webkit-text-fill-color: #ffffff !important;
color: #ffffff !important;
margin: 0;
font-size: 22px; /* smaller title for compact header */
line-height: 1.2;
}
.logo {
height: 18px !important; /* ensure the image itself is small */
width: auto !important;
display: inline-block;
margin-right: 8px;
object-fit: contain;
flex-shrink: 0;
}
"""
def check_health():
"""Check API health status"""
try:
response = requests.get(f"{BASE_URL}/health", timeout=5)
if response.status_code == 200:
data = response.json()
if data.get("status") == "healthy":
return True, ""
return False, "❌ Service unhealthy"
except Exception as e:
return False, f"❌ Connection failed: {str(e)}"
def get_voices():
"""Fetch available voices from API"""
try:
response = requests.get(f"{BASE_URL}/voices", timeout=10)
if response.status_code == 200:
voices_data = response.json().get("voices", {})
# Create a list of tuples (display_name, voice_id)
voices = []
for voice_id, voice_info in voices_data.items():
name = voice_info.get("name", "Unknown")
gender = voice_info.get("gender", "N/A")
display_name = f"{name} ({gender})"
voices.append((display_name, voice_id))
return sorted(voices, key=lambda x: x[0])
return []
except Exception as e:
print(f"Error fetching voices: {e}")
return []
def synthesize_speech(text, voice_id):
"""Synthesize speech from text"""
if not text or not text.strip():
return None, "⚠️ Please enter some text", "", "", "", "", "", ""
if not voice_id:
return None, "⚠️ Please select a voice", "", "", "", "", "", ""
try:
payload = {"text": text, "voice_id": voice_id}
response = requests.post(
f"{BASE_URL}/synthesize",
headers={"Content-Type": "application/json"},
json=payload,
timeout=30,
)
if response.status_code == 200:
result = response.json()
if result.get("success"):
audio_url = result.get("audio_url", "")
metrics = result.get("metrics", {})
# Format metrics
total_time = f"{metrics.get('t', 0):.3f}s"
rtf = f"{metrics.get('rtf', 0):.4f}"
wav_duration = f"{metrics.get('wav_seconds', 0):.2f}s"
vocoder_time = f"{metrics.get('t_vocoder', 0):.3f}s"
no_vocoder_time = f"{metrics.get('t_no_vocoder', 0):.3f}s"
rtf_no_vocoder = f"{metrics.get('rtf_no_vocoder', 0):.4f}"
status_msg = "✅ Audio generated successfully!"
return (
audio_url,
status_msg,
total_time,
rtf,
wav_duration,
vocoder_time,
no_vocoder_time,
rtf_no_vocoder,
)
else:
error_msg = result.get("message", "Unknown error")
return None, f"❌ Synthesis failed: {error_msg}", "", "", "", "", "", ""
else:
return (
None,
f"❌ API returned status code: {response.status_code}",
"",
"",
"",
"",
"",
"",
)
except Exception as e:
return None, f"❌ Error: {str(e)}", "", "", "", "", "", ""
def get_health_indicator():
"""Get HTML for health status indicator"""
is_healthy, status_text = check_health()
dot_class = "status-dot-green" if is_healthy else "status-dot-red"
html = f"""
<div class="status-dot {dot_class}"></div>
"""
return html
# Create Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
# Title with Health Status
def get_title_with_status():
is_healthy, _ = check_health()
dot_class = "status-dot-green" if is_healthy else "status-dot-red"
return f"""
<div class="title-bar">
<img src="https://storage.googleapis.com/desivocal-prod/desi-vocal/logo.png" width="50" height="50">
<h1 class='main-title'>RinggAI - Text-to-Speech</h1>
<div class="status-dot {dot_class}" style="margin-top: 0;"></div>
</div>
"""
health_status = gr.HTML(value=get_title_with_status())
# Text Input
text_input = gr.Textbox(
label="Your text",
placeholder="Type or paste your text here...",
lines=6,
max_lines=10,
)
with gr.Row():
with gr.Column(scale=1):
# Voice Selection
voices = get_voices()
voice_choices = {display: vid for display, vid in voices}
voice_dropdown = gr.Dropdown(
choices=list(voice_choices.keys()),
label="Choose a voice style",
info=f"{len(voices)} voices available",
value=list(voice_choices.keys())[0] if voices else None,
)
with gr.Column(scale=1):
# Status Message
# status_output = gr.Markdown("ℹ️ Ready to generate speech")
# Audio Output
audio_output = gr.Audio(label="Listen to your audio", type="filepath")
# Metrics Display (hidden until available)
metrics_header = gr.Markdown("### 📊 Generation Metrics", visible=False)
metrics_output = gr.Code(
label="Metrics", language="json", interactive=False, visible=False
)
generate_btn = gr.Button("🎬 Generate Speech", variant="primary", size="lg")
# Footer
gr.Markdown("---")
gr.Markdown("### 🙏 Acknowledgements")
gr.Markdown("- Based on [ZipVoice](https://github.com/k2-fsa/ZipVoice)")
gr.Markdown(
"- Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions"
)
# Event Handlers
def on_generate(text, voice_display):
voice_id = voice_choices.get(voice_display)
audio_url, _status, t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc = (
synthesize_speech(text, voice_id)
)
# Download audio if URL is available
audio_file = None
if audio_url:
try:
audio_response = requests.get(audio_url, timeout=30)
if audio_response.status_code == 200:
# Save to temporary file
audio_file = "/tmp/generated_audio.wav"
with open(audio_file, "wb") as f:
f.write(audio_response.content)
except Exception as e:
_status = f"⚠️ Audio generated but download failed: {str(e)}"
# Format metrics as JSON string (only if available)
has_metrics = any([t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc])
metrics_json = ""
if has_metrics:
metrics_json = json.dumps(
{
"total_time": t_time,
"rtf": rtf,
"audio_duration": wav_dur,
"vocoder_time": voc_time,
"no_vocoder_time": no_voc_time,
"rtf_no_vocoder": rtf_no_voc,
},
indent=2,
)
return (
audio_file,
gr.update(visible=has_metrics),
gr.update(value=metrics_json, visible=has_metrics),
)
generate_btn.click(
fn=on_generate,
inputs=[text_input, voice_dropdown],
outputs=[
audio_output,
# status_output,
metrics_header,
metrics_output,
],
)
# Refresh health status on load
demo.load(fn=get_title_with_status, outputs=[health_status])
# Set up periodic refresh if Timer is available
try:
health_timer = gr.Timer(value=120)
health_timer.tick(fn=get_title_with_status, outputs=[health_status])
except Exception:
pass # Timer not available in this Gradio version
if __name__ == "__main__":
demo.queue()
demo.launch(share=False, server_name="0.0.0.0", server_port=7860)