"""
Pip's Voice - Text-to-speech with emotional tone matching.
Uses ElevenLabs for high-quality, expressive speech.
"""

import asyncio
from typing import Optional, AsyncGenerator
from dataclasses import dataclass

from services.elevenlabs_client import ElevenLabsClient


@dataclass
class VoiceResponse:
    """Audio response from Pip."""
    audio_bytes: bytes
    tone_used: str
    model_used: str
    error: Optional[str] = None


class PipVoice:
    """
    Pip's voice synthesis with emotional tone matching.
    """
    
    def __init__(self):
        self.client = ElevenLabsClient()
        
        # Emotion to tone mapping with fallbacks
        self._emotion_tone_map = {
            # Positive emotions
            "happy": "warm",
            "joy": "excited",
            "excited": "excited",
            "proud": "warm",
            "grateful": "warm",
            "love": "warm",
            "hopeful": "warm",
            
            # Negative emotions
            "sad": "gentle",
            "melancholy": "gentle",
            "grief": "gentle",
            "lonely": "gentle",
            "disappointed": "gentle",
            
            # Anxious emotions
            "anxious": "calm",
            "worried": "calm",
            "nervous": "calm",
            "overwhelmed": "calm",
            "stressed": "calm",
            
            # Other emotions
            "angry": "calm",
            "frustrated": "calm",
            "confused": "warm",
            "curious": "mysterious",
            "peaceful": "calm",
            "tired": "calm",
            "neutral": "warm",
        }
        
        # Action to tone mapping
        self._action_tone_map = {
            "reflect": "warm",
            "celebrate": "excited",
            "comfort": "gentle",
            "calm": "calm",
            "energize": "warm",
            "curiosity": "mysterious",
            "intervene": "gentle",
        }
    
    def get_tone_for_context(
        self, 
        emotions: list[str], 
        action: str,
        intensity: int = 5
    ) -> str:
        """
        Determine the best voice tone based on emotional context.
        """
        # Action takes priority for tone
        action_tone = self._action_tone_map.get(action, "warm")
        
        if not emotions:
            return action_tone
        
        primary_emotion = emotions[0].lower()
        emotion_tone = self._emotion_tone_map.get(primary_emotion, "warm")
        
        # For high intensity, lean towards action tone
        # For low intensity, lean towards emotion tone
        if intensity >= 7:
            return action_tone
        
        return emotion_tone
    
    async def speak(
        self,
        text: str,
        emotions: list[str] = None,
        action: str = "reflect",
        intensity: int = 5,
        use_fast_model: bool = True
    ) -> VoiceResponse:
        """
        Generate speech for text with appropriate emotional tone.
        
        Args:
            text: What Pip should say
            emotions: Detected emotions for tone matching
            action: Pip's current action
            intensity: Emotional intensity (1-10)
            use_fast_model: Use Flash model for speed
        
        Returns:
            VoiceResponse with audio bytes
        """
        tone = self.get_tone_for_context(emotions or [], action, intensity)
        
        audio_bytes = await self.client.speak(
            text=text,
            tone=tone,
            use_fast_model=use_fast_model
        )
        
        if audio_bytes:
            return VoiceResponse(
                audio_bytes=audio_bytes,
                tone_used=tone,
                model_used="flash" if use_fast_model else "expressive"
            )
        
        return VoiceResponse(
            audio_bytes=b"",
            tone_used=tone,
            model_used="none",
            error="Failed to generate speech"
        )
    
    async def speak_stream(
        self,
        text: str,
        emotions: list[str] = None,
        action: str = "reflect",
        intensity: int = 5
    ) -> AsyncGenerator[bytes, None]:
        """
        Stream speech generation for lower latency.
        """
        tone = self.get_tone_for_context(emotions or [], action, intensity)
        
        async for chunk in self.client.speak_stream(text, tone):
            yield chunk
    
    async def speak_acknowledgment(self, ack_text: str) -> VoiceResponse:
        """
        Quick speech for acknowledgments (uses fastest model + tone).
        """
        return await self.speak(
            text=ack_text,
            action="reflect",
            use_fast_model=True
        )
    
    async def speak_intervention(self, text: str) -> VoiceResponse:
        """
        Speech for intervention scenarios - gentle and calming.
        """
        return await self.speak(
            text=text,
            action="intervene",
            use_fast_model=False  # Use expressive model for nuance
        )
    
    async def get_voices(self) -> list[dict]:
        """Get available voices for potential customization."""
        return await self.client.get_available_voices()


class PipEars:
    """
    Pip's hearing - Speech-to-text for voice input.
    Uses OpenAI Whisper.
    """
    
    def __init__(self):
        from services.openai_client import OpenAIClient
        self.client = OpenAIClient()
    
    async def listen(self, audio_file_path: str) -> str:
        """
        Transcribe audio file to text.
        
        Args:
            audio_file_path: Path to audio file
        
        Returns:
            Transcribed text
        """
        return await self.client.transcribe_audio(audio_file_path)
    
    async def listen_bytes(self, audio_bytes: bytes, filename: str = "audio.wav") -> str:
        """
        Transcribe audio bytes to text.
        
        Args:
            audio_bytes: Raw audio bytes
            filename: Filename hint for format detection
        
        Returns:
            Transcribed text
        """
        return await self.client.transcribe_audio_bytes(audio_bytes, filename)