import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces


# Available models
MODEL_IDS = {
    "Kallamni 4B":"yasserrmd/kallamni-4b-v1",
    "Kallamni 2.6B": "yasserrmd/kallamni-2.6b-v1",
    "Kallamni 1.2B": "yasserrmd/kallamni-1.2b-v1"
  }

# Preload models at startup
loaded_models = {}
for name, model_id in MODEL_IDS.items():
    print(f"🔄 Loading {name} ...")
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.bfloat16
    )
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    loaded_models[name] = (model, tokenizer)
print("✅ All models loaded successfully!")

# System prompt + few-shot
base_system_prompt = {
    "role": "system",
    "content": (
        'You are "بو سيف", a friendly Emirati assistant who always speaks in authentic Emirati dialect. \n'
        "Follow these rules when generating Q&A samples:\n\n"
        "1. Use **daily spoken Emirati Arabic only** — no Modern Standard Arabic (MSA) or other dialects.\n"
        "   - Examples: \"سير\"، \"وايد\"، \"هالسنة\"، \"الربع\".\n"
        "   - Avoid: \"ذلك\"، \"لقد\"، \"إنه\".\n\n"
        "2. **Tone & Style**:\n"
        "   - Keep it casual, warm, and respectful (like natural family or friend conversations).\n"
        "   - Slightly lengthen answers: 2–4 sentences is ideal.\n"
        "   - Answers should feel like real replies, not textbook lines.\n\n"
        "3. **Content Guidelines**:\n"
        "   - Focus on **daily life topics** (family, food, outings, weather, shopping, work, Ramadan, gatherings, sports, travel, etc.).\n"
        "   - Mix short and medium-length answers, but avoid one-word replies.\n"
        "   - Avoid repetition — don’t overuse words like \"وايد زين\" or \"حلو\". Use synonyms: \"مرتب\"، \"ممتاز\"، \"يهبل\".\n"
        "   - Keep context logical: if the question is about الشارقة, don’t answer about العين.\n\n"
        "4. **Format**:\n"
        "   - Provide as structured Q&A pairs.\n"
        "   - Example:\n"
        "     Q: وين تحبون تسهرون ليلة الخميس؟\n"
        "     A: عادة نسير صوب الكورنيش، نقعد ونسولف لين وقت متأخر. أحيانًا نكملها في الكافيه إذا الجو حلو.\n\n"
        "5. **Respect & Safety**:\n"
        "   - Stay polite.\n"
        "   - No sensitive, political, or inappropriate topics.\n"
        "   - Keep it light and family-friendly."
    )
}


# Specialized system prompt for Kallamni 4B model
kallamni_4b_system_prompt =   {
    "role": "system",
    "content": """
You are "بو سيف", a friendly Emirati assistant who speaks in authentic Emirati dialect.

Stay consistent for the entire conversation — never switch tone or shorten replies.

**Core Style:**
- Speak naturally like an Emirati friend chatting in a relaxed setting.
- Always write 3–5 sentences that flow like spoken conversation.
- Keep a warm, expressive, slightly storytelling tone — not robotic or academic.
- Use Emirati words like: وايد، هيه، سرت، عقب، الربع، القعدة، نغير جو، الحمد لله.
- Avoid Modern Standard Arabic and formal words (ذلك، إنه، لقد).

**Expansion Guidance:**
- Add small emotions, examples, or local color (القهوة، البحر، العائلة، البر، الربع).
- If the user’s question is short, expand your answer naturally.
- Imagine you’re replying by voice — relaxed, friendly, and expressive.
- Keep a human rhythm — pauses, reflections, feelings.

**Memory Reminder:**
Before every new answer, silently remind yourself:
"بو سيف يتكلم بروح إماراتية، دايمًا يجاوب بجمل وافية ودافية، فيها إحساس وقصة بسيطة."

Your goal: make every reply feel like a true Emirati conversation.
"""
}


# few_shot = [
#     {"role": "user", "content": "شحالَك اليوم؟"},
#     {"role": "assistant", "content": "الحمدلله زين، وانت كيفك؟"},
#     {"role": "user", "content": "وين ناوي تسير عقب الدوام؟"},
#     {"role": "assistant", "content": "يمكن أمر على المول وأتعشى ويا الربع."},
# ]


generation_configs = {
    "others": {
        "do_sample": True,
        "temperature": 0.1,
        "min_p": 0.15,
        "repetition_penalty": 1.05,
        "max_new_tokens": 60
    },
    "kallamni-4b": {
        "do_sample": True,
        "temperature": 0.7,
        "top_p": 0.8,
        "top_k": 20,
        "max_new_tokens": 1024,
        "pad_token_id": tokenizer.eos_token_id
    }
}

# Chat function
@spaces.GPU
def chat_fn(message, history, model_choice):
    try:
        model, tokenizer = loaded_models[model_choice]

        if "Kallamni 4B" in model_choice:
            system_prompt = kallamni_4b_system_prompt
            gen_kwargs = generation_configs["kallamni-4b"]
        else:
            system_prompt = base_system_prompt
            gen_kwargs = generation_configs["others"]

        messages = [system_prompt] +  history + [{"role": "user", "content": message}]


        input_ids = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt",
            tokenize=True,
        ).to(model.device)

        gen_kwargs = generation_configs["others"]
        
        if "Kallamni 4B" in model_choice:
            gen_kwargs = generation_configs["kallamni-4b"]
        
        print(model_choice)
            

        output = model.generate(
            input_ids,
            **gen_kwargs
        )

        decoded = tokenizer.decode(output[0], skip_special_tokens=False)

        try:
            a_start = decoded.rindex("<|im_start|>assistant") + len("<|im_start|>assistant")
            a_end = decoded.index("<|im_end|>", a_start)
            answer = decoded[a_start:a_end].strip()
        except ValueError:
            answer = decoded.strip()

        return answer

    except Exception as e:
        return f"[خطأ داخلي]: {str(e)}"

# CSS
css = """
#chat-container { direction: rtl; text-align: right; }
"""

# Gradio Interface
with gr.Blocks(css=css, fill_height=True) as demo:
    gr.HTML(
        """<div style="text-align: center;">
             <img src="https://huggingface.co/spaces/yasserrmd/Kallamni-chat/resolve/main/assets/logo.png" 
                  alt="Logo" width="120">
           </div>"""
    )

    model_state = gr.State("Kallamni 4B")

    with gr.Row():
        model_choice = gr.Dropdown(
            choices=list(MODEL_IDS.keys()),
            value="Kallamni 4B",
            label="Kallamni 4B"
        )

    def update_model(choice):
        return choice

    model_choice.change(update_model, inputs=model_choice, outputs=model_state)

    gr.ChatInterface(
        fn=lambda message, history: chat_fn(message, history, model_choice.value),
        type="messages",
        examples=[
            "وين ناوي تسير عقب ما تخلص الدوام اليوم؟",
            "شرايك في الجو هالأيام، والله تحسه حر وايد؟",
            "كيف تقضي الويكند عادةً ويا العيال والربع؟",
            "شو أحلى أكلة دوم تحبها من طبخ الوالدة؟",
            "وين أحلى مكان دوم تاخذ منه قهوة الصبح؟",
        ],
        title="💬 شات باللهجة الإماراتية",
        cache_examples=True,
        theme="soft",
        fill_height=True
    )

if __name__ == "__main__":
    demo.launch(debug=True)