import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch import spaces # Available models MODEL_IDS = { "Kallamni 4B":"yasserrmd/kallamni-4b-v1", "Kallamni 2.6B": "yasserrmd/kallamni-2.6b-v1", "Kallamni 1.2B": "yasserrmd/kallamni-1.2b-v1" } # Preload models at startup loaded_models = {} for name, model_id in MODEL_IDS.items(): print(f"๐Ÿ”„ Loading {name} ...") model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.bfloat16 ) tokenizer = AutoTokenizer.from_pretrained(model_id) loaded_models[name] = (model, tokenizer) print("โœ… All models loaded successfully!") # System prompt + few-shot base_system_prompt = { "role": "system", "content": ( 'You are "ุจูˆ ุณูŠู", a friendly Emirati assistant who always speaks in authentic Emirati dialect. \n' "Follow these rules when generating Q&A samples:\n\n" "1. Use **daily spoken Emirati Arabic only** โ€” no Modern Standard Arabic (MSA) or other dialects.\n" " - Examples: \"ุณูŠุฑ\"ุŒ \"ูˆุงูŠุฏ\"ุŒ \"ู‡ุงู„ุณู†ุฉ\"ุŒ \"ุงู„ุฑุจุน\".\n" " - Avoid: \"ุฐู„ูƒ\"ุŒ \"ู„ู‚ุฏ\"ุŒ \"ุฅู†ู‡\".\n\n" "2. **Tone & Style**:\n" " - Keep it casual, warm, and respectful (like natural family or friend conversations).\n" " - Slightly lengthen answers: 2โ€“4 sentences is ideal.\n" " - Answers should feel like real replies, not textbook lines.\n\n" "3. **Content Guidelines**:\n" " - Focus on **daily life topics** (family, food, outings, weather, shopping, work, Ramadan, gatherings, sports, travel, etc.).\n" " - Mix short and medium-length answers, but avoid one-word replies.\n" " - Avoid repetition โ€” donโ€™t overuse words like \"ูˆุงูŠุฏ ุฒูŠู†\" or \"ุญู„ูˆ\". Use synonyms: \"ู…ุฑุชุจ\"ุŒ \"ู…ู…ุชุงุฒ\"ุŒ \"ูŠู‡ุจู„\".\n" " - Keep context logical: if the question is about ุงู„ุดุงุฑู‚ุฉ, donโ€™t answer about ุงู„ุนูŠู†.\n\n" "4. **Format**:\n" " - Provide as structured Q&A pairs.\n" " - Example:\n" " Q: ูˆูŠู† ุชุญุจูˆู† ุชุณู‡ุฑูˆู† ู„ูŠู„ุฉ ุงู„ุฎู…ูŠุณุŸ\n" " A: ุนุงุฏุฉ ู†ุณูŠุฑ ุตูˆุจ ุงู„ูƒูˆุฑู†ูŠุดุŒ ู†ู‚ุนุฏ ูˆู†ุณูˆู„ู ู„ูŠู† ูˆู‚ุช ู…ุชุฃุฎุฑ. ุฃุญูŠุงู†ู‹ุง ู†ูƒู…ู„ู‡ุง ููŠ ุงู„ูƒุงููŠู‡ ุฅุฐุง ุงู„ุฌูˆ ุญู„ูˆ.\n\n" "5. **Respect & Safety**:\n" " - Stay polite.\n" " - No sensitive, political, or inappropriate topics.\n" " - Keep it light and family-friendly." ) } # Specialized system prompt for Kallamni 4B model kallamni_4b_system_prompt = { "role": "system", "content": """ You are "ุจูˆ ุณูŠู", a friendly Emirati assistant who speaks in authentic Emirati dialect. Stay consistent for the entire conversation โ€” never switch tone or shorten replies. **Core Style:** - Speak naturally like an Emirati friend chatting in a relaxed setting. - Always write 3โ€“5 sentences that flow like spoken conversation. - Keep a warm, expressive, slightly storytelling tone โ€” not robotic or academic. - Use Emirati words like: ูˆุงูŠุฏุŒ ู‡ูŠู‡ุŒ ุณุฑุชุŒ ุนู‚ุจุŒ ุงู„ุฑุจุนุŒ ุงู„ู‚ุนุฏุฉุŒ ู†ุบูŠุฑ ุฌูˆุŒ ุงู„ุญู…ุฏ ู„ู„ู‡. - Avoid Modern Standard Arabic and formal words (ุฐู„ูƒุŒ ุฅู†ู‡ุŒ ู„ู‚ุฏ). **Expansion Guidance:** - Add small emotions, examples, or local color (ุงู„ู‚ู‡ูˆุฉุŒ ุงู„ุจุญุฑุŒ ุงู„ุนุงุฆู„ุฉุŒ ุงู„ุจุฑุŒ ุงู„ุฑุจุน). - If the userโ€™s question is short, expand your answer naturally. - Imagine youโ€™re replying by voice โ€” relaxed, friendly, and expressive. - Keep a human rhythm โ€” pauses, reflections, feelings. **Memory Reminder:** Before every new answer, silently remind yourself: "ุจูˆ ุณูŠู ูŠุชูƒู„ู… ุจุฑูˆุญ ุฅู…ุงุฑุงุชูŠุฉุŒ ุฏุงูŠู…ู‹ุง ูŠุฌุงูˆุจ ุจุฌู…ู„ ูˆุงููŠุฉ ูˆุฏุงููŠุฉุŒ ููŠู‡ุง ุฅุญุณุงุณ ูˆู‚ุตุฉ ุจุณูŠุทุฉ." Your goal: make every reply feel like a true Emirati conversation. """ } # few_shot = [ # {"role": "user", "content": "ุดุญุงู„ูŽูƒ ุงู„ูŠูˆู…ุŸ"}, # {"role": "assistant", "content": "ุงู„ุญู…ุฏู„ู„ู‡ ุฒูŠู†ุŒ ูˆุงู†ุช ูƒูŠููƒุŸ"}, # {"role": "user", "content": "ูˆูŠู† ู†ุงูˆูŠ ุชุณูŠุฑ ุนู‚ุจ ุงู„ุฏูˆุงู…ุŸ"}, # {"role": "assistant", "content": "ูŠู…ูƒู† ุฃู…ุฑ ุนู„ู‰ ุงู„ู…ูˆู„ ูˆุฃุชุนุดู‰ ูˆูŠุง ุงู„ุฑุจุน."}, # ] generation_configs = { "others": { "do_sample": True, "temperature": 0.1, "min_p": 0.15, "repetition_penalty": 1.05, "max_new_tokens": 60 }, "kallamni-4b": { "do_sample": True, "temperature": 0.7, "top_p": 0.8, "top_k": 20, "max_new_tokens": 1024, "pad_token_id": tokenizer.eos_token_id } } # Chat function @spaces.GPU def chat_fn(message, history, model_choice): try: model, tokenizer = loaded_models[model_choice] if "Kallamni 4B" in model_choice: system_prompt = kallamni_4b_system_prompt gen_kwargs = generation_configs["kallamni-4b"] else: system_prompt = base_system_prompt gen_kwargs = generation_configs["others"] messages = [system_prompt] + history + [{"role": "user", "content": message}] input_ids = tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt", tokenize=True, ).to(model.device) gen_kwargs = generation_configs["others"] if "Kallamni 4B" in model_choice: gen_kwargs = generation_configs["kallamni-4b"] print(model_choice) output = model.generate( input_ids, **gen_kwargs ) decoded = tokenizer.decode(output[0], skip_special_tokens=False) try: a_start = decoded.rindex("<|im_start|>assistant") + len("<|im_start|>assistant") a_end = decoded.index("<|im_end|>", a_start) answer = decoded[a_start:a_end].strip() except ValueError: answer = decoded.strip() return answer except Exception as e: return f"[ุฎุทุฃ ุฏุงุฎู„ูŠ]: {str(e)}" # CSS css = """ #chat-container { direction: rtl; text-align: right; } """ # Gradio Interface with gr.Blocks(css=css, fill_height=True) as demo: gr.HTML( """
Logo
""" ) model_state = gr.State("Kallamni 4B") with gr.Row(): model_choice = gr.Dropdown( choices=list(MODEL_IDS.keys()), value="Kallamni 4B", label="Kallamni 4B" ) def update_model(choice): return choice model_choice.change(update_model, inputs=model_choice, outputs=model_state) gr.ChatInterface( fn=lambda message, history: chat_fn(message, history, model_choice.value), type="messages", examples=[ "ูˆูŠู† ู†ุงูˆูŠ ุชุณูŠุฑ ุนู‚ุจ ู…ุง ุชุฎู„ุต ุงู„ุฏูˆุงู… ุงู„ูŠูˆู…ุŸ", "ุดุฑุงูŠูƒ ููŠ ุงู„ุฌูˆ ู‡ุงู„ุฃูŠุงู…ุŒ ูˆุงู„ู„ู‡ ุชุญุณู‡ ุญุฑ ูˆุงูŠุฏุŸ", "ูƒูŠู ุชู‚ุถูŠ ุงู„ูˆูŠูƒู†ุฏ ุนุงุฏุฉู‹ ูˆูŠุง ุงู„ุนูŠุงู„ ูˆุงู„ุฑุจุนุŸ", "ุดูˆ ุฃุญู„ู‰ ุฃูƒู„ุฉ ุฏูˆู… ุชุญุจู‡ุง ู…ู† ุทุจุฎ ุงู„ูˆุงู„ุฏุฉุŸ", "ูˆูŠู† ุฃุญู„ู‰ ู…ูƒุงู† ุฏูˆู… ุชุงุฎุฐ ู…ู†ู‡ ู‚ู‡ูˆุฉ ุงู„ุตุจุญุŸ", ], title="๐Ÿ’ฌ ุดุงุช ุจุงู„ู„ู‡ุฌุฉ ุงู„ุฅู…ุงุฑุงุชูŠุฉ", cache_examples=True, theme="soft", fill_height=True ) if __name__ == "__main__": demo.launch(debug=True)