Spaces:

RinggAI
/

Ringg-TTS-v1.0

Running

App Files Files Community

Ringg-TTS-v1.0 / app.py

utkarshshukla2912

base space

63d250f about 2 months ago

raw

history blame

10.8 kB

	import gradio as gr
	import requests
	import json
	import os

	# API Base URL
	BASE_URL = os.environ.get("BASE_URL", "")


	# Custom CSS for aesthetic design
	custom_css = """
	.health-status {
	display: flex;
	align-items: center;
	gap: 10px;
	padding: 15px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 12px;
	margin-bottom: 20px;
	}

	.status-dot {
	width: 20px;
	height: 20px;
	border-radius: 50%;
	animation: pulse 2s infinite;
	margin-top: 25px;
	display: inline-block;
	}

	.status-dot-green {
	background-color: #10b981;
	box-shadow: 0 0 20px rgba(16, 185, 129, 0.6);
	}

	.status-dot-red {
	background-color: #ef4444;
	box-shadow: 0 0 20px rgba(239, 68, 68, 0.6);
	}

	@keyframes pulse {
	0%, 100% {
	opacity: 1;
	transform: scale(1);
	}
	50% {
	opacity: 0.6;
	transform: scale(1.1);
	}
	}

	.metric-card {
	background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
	padding: 15px;
	border-radius: 10px;
	margin: 5px;
	color: white;
	text-align: center;
	}

	.metric-label {
	font-size: 12px;
	opacity: 0.9;
	margin-bottom: 5px;
	}

	.metric-value {
	font-size: 24px;
	font-weight: bold;
	}

	.voice-card {
	background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
	padding: 10px;
	border-radius: 8px;
	margin: 5px 0;
	}

	.gradio-container {
	max-width: 1200px !important;
	margin: auto !important;
	}

	.main-title {
	text-align: center;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	font-size: 48px;
	font-weight: bold;
	margin-bottom: 20px;
	margin-top: 0;
	}

	.subtitle {
	text-align: center;
	color: #666;
	font-size: 18px;
	margin-bottom: 30px;
	}

	/* Title bar behind the heading text */
	.title-bar {
	display: flex;
	align-items: center;
	justify-content: center;
	gap: 15px;
	margin-bottom: 20px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 12px;
	padding: 8px 16px;
	height: 60px; /* compact header height */
	min-height: 60px;
	overflow: hidden;
	}

	/* Make title text solid and place gradient behind it */
	.title-bar .main-title {
	background: none !important;
	-webkit-text-fill-color: #ffffff !important;
	color: #ffffff !important;
	margin: 0;
	font-size: 22px; /* smaller title for compact header */
	line-height: 1.2;
	}

	.logo {
	height: 18px !important; /* ensure the image itself is small */
	width: auto !important;
	display: inline-block;
	margin-right: 8px;
	object-fit: contain;
	flex-shrink: 0;
	}
	"""


	def check_health():
	"""Check API health status"""
	try:
	response = requests.get(f"{BASE_URL}/health", timeout=5)
	if response.status_code == 200:
	data = response.json()
	if data.get("status") == "healthy":
	return True, ""
	return False, "❌ Service unhealthy"
	except Exception as e:
	return False, f"❌ Connection failed: {str(e)}"


	def get_voices():
	"""Fetch available voices from API"""
	try:
	response = requests.get(f"{BASE_URL}/voices", timeout=10)
	if response.status_code == 200:
	voices_data = response.json().get("voices", {})
	# Create a list of tuples (display_name, voice_id)
	voices = []
	for voice_id, voice_info in voices_data.items():
	name = voice_info.get("name", "Unknown")
	gender = voice_info.get("gender", "N/A")
	display_name = f"{name} ({gender})"
	voices.append((display_name, voice_id))
	return sorted(voices, key=lambda x: x[0])
	return []
	except Exception as e:
	print(f"Error fetching voices: {e}")
	return []


	def synthesize_speech(text, voice_id):
	"""Synthesize speech from text"""
	if not text or not text.strip():
	return None, "⚠️ Please enter some text", "", "", "", "", "", ""

	if not voice_id:
	return None, "⚠️ Please select a voice", "", "", "", "", "", ""

	try:
	payload = {"text": text, "voice_id": voice_id}

	response = requests.post(
	f"{BASE_URL}/synthesize",
	headers={"Content-Type": "application/json"},
	json=payload,
	timeout=30,
	)

	if response.status_code == 200:
	result = response.json()

	if result.get("success"):
	audio_url = result.get("audio_url", "")
	metrics = result.get("metrics", {})

	# Format metrics
	total_time = f"{metrics.get('t', 0):.3f}s"
	rtf = f"{metrics.get('rtf', 0):.4f}"
	wav_duration = f"{metrics.get('wav_seconds', 0):.2f}s"
	vocoder_time = f"{metrics.get('t_vocoder', 0):.3f}s"
	no_vocoder_time = f"{metrics.get('t_no_vocoder', 0):.3f}s"
	rtf_no_vocoder = f"{metrics.get('rtf_no_vocoder', 0):.4f}"

	status_msg = "✅ Audio generated successfully!"

	return (
	audio_url,
	status_msg,
	total_time,
	rtf,
	wav_duration,
	vocoder_time,
	no_vocoder_time,
	rtf_no_vocoder,
	)
	else:
	error_msg = result.get("message", "Unknown error")
	return None, f"❌ Synthesis failed: {error_msg}", "", "", "", "", "", ""
	else:
	return (
	None,
	f"❌ API returned status code: {response.status_code}",
	"",
	"",
	"",
	"",
	"",
	"",
	)

	except Exception as e:
	return None, f"❌ Error: {str(e)}", "", "", "", "", "", ""


	def get_health_indicator():
	"""Get HTML for health status indicator"""
	is_healthy, status_text = check_health()
	dot_class = "status-dot-green" if is_healthy else "status-dot-red"

	html = f"""
	<div class="status-dot {dot_class}"></div>
	"""
	return html


	# Create Gradio interface
	with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
	# Title with Health Status
	def get_title_with_status():
	is_healthy, _ = check_health()
	dot_class = "status-dot-green" if is_healthy else "status-dot-red"
	return f"""
	<div class="title-bar">
	<img src="https://storage.googleapis.com/desivocal-prod/desi-vocal/logo.png" width="50" height="50">
	<h1 class='main-title'>RinggAI - Text-to-Speech</h1>
	<div class="status-dot {dot_class}" style="margin-top: 0;"></div>
	</div>
	"""

	health_status = gr.HTML(value=get_title_with_status())

	# Text Input
	text_input = gr.Textbox(
	label="Your text",
	placeholder="Type or paste your text here...",
	lines=6,
	max_lines=10,
	)

	with gr.Row():
	with gr.Column(scale=1):
	# Voice Selection
	voices = get_voices()
	voice_choices = {display: vid for display, vid in voices}

	voice_dropdown = gr.Dropdown(
	choices=list(voice_choices.keys()),
	label="Choose a voice style",
	info=f"{len(voices)} voices available",
	value=list(voice_choices.keys())[0] if voices else None,
	)

	with gr.Column(scale=1):
	# Status Message
	# status_output = gr.Markdown("ℹ️ Ready to generate speech")

	# Audio Output

	audio_output = gr.Audio(label="Listen to your audio", type="filepath")

	# Metrics Display (hidden until available)
	metrics_header = gr.Markdown("### 📊 Generation Metrics", visible=False)

	metrics_output = gr.Code(
	label="Metrics", language="json", interactive=False, visible=False
	)

	generate_btn = gr.Button("🎬 Generate Speech", variant="primary", size="lg")
	# Footer
	gr.Markdown("---")
	gr.Markdown("### 🙏 Acknowledgements")
	gr.Markdown("- Based on [ZipVoice](https://github.com/k2-fsa/ZipVoice)")
	gr.Markdown(
	"- Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions"
	)

	# Event Handlers
	def on_generate(text, voice_display):
	voice_id = voice_choices.get(voice_display)
	audio_url, _status, t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc = (
	synthesize_speech(text, voice_id)
	)

	# Download audio if URL is available
	audio_file = None
	if audio_url:
	try:
	audio_response = requests.get(audio_url, timeout=30)
	if audio_response.status_code == 200:
	# Save to temporary file
	audio_file = "/tmp/generated_audio.wav"
	with open(audio_file, "wb") as f:
	f.write(audio_response.content)
	except Exception as e:
	_status = f"⚠️ Audio generated but download failed: {str(e)}"

	# Format metrics as JSON string (only if available)
	has_metrics = any([t_time, rtf, wav_dur, voc_time, no_voc_time, rtf_no_voc])
	metrics_json = ""
	if has_metrics:
	metrics_json = json.dumps(
	{
	"total_time": t_time,
	"rtf": rtf,
	"audio_duration": wav_dur,
	"vocoder_time": voc_time,
	"no_vocoder_time": no_voc_time,
	"rtf_no_vocoder": rtf_no_voc,
	},
	indent=2,
	)

	return (
	audio_file,
	gr.update(visible=has_metrics),
	gr.update(value=metrics_json, visible=has_metrics),
	)

	generate_btn.click(
	fn=on_generate,
	inputs=[text_input, voice_dropdown],
	outputs=[
	audio_output,
	# status_output,
	metrics_header,
	metrics_output,
	],
	)

	# Refresh health status on load
	demo.load(fn=get_title_with_status, outputs=[health_status])

	# Set up periodic refresh if Timer is available
	try:
	health_timer = gr.Timer(value=120)
	health_timer.tick(fn=get_title_with_status, outputs=[health_status])
	except Exception:
	pass # Timer not available in this Gradio version

	if __name__ == "__main__":
	demo.queue()
	demo.launch(share=False, server_name="0.0.0.0", server_port=7860)