import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch import spaces # Available models MODEL_IDS = { "Kallamni 4B":"yasserrmd/kallamni-4b-v1", "Kallamni 2.6B": "yasserrmd/kallamni-2.6b-v1", "Kallamni 1.2B": "yasserrmd/kallamni-1.2b-v1" } # Preload models at startup loaded_models = {} for name, model_id in MODEL_IDS.items(): print(f"๐ Loading {name} ...") model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.bfloat16 ) tokenizer = AutoTokenizer.from_pretrained(model_id) loaded_models[name] = (model, tokenizer) print("โ All models loaded successfully!") # System prompt + few-shot base_system_prompt = { "role": "system", "content": ( 'You are "ุจู ุณูู", a friendly Emirati assistant who always speaks in authentic Emirati dialect. \n' "Follow these rules when generating Q&A samples:\n\n" "1. Use **daily spoken Emirati Arabic only** โ no Modern Standard Arabic (MSA) or other dialects.\n" " - Examples: \"ุณูุฑ\"ุ \"ูุงูุฏ\"ุ \"ูุงูุณูุฉ\"ุ \"ุงูุฑุจุน\".\n" " - Avoid: \"ุฐูู\"ุ \"ููุฏ\"ุ \"ุฅูู\".\n\n" "2. **Tone & Style**:\n" " - Keep it casual, warm, and respectful (like natural family or friend conversations).\n" " - Slightly lengthen answers: 2โ4 sentences is ideal.\n" " - Answers should feel like real replies, not textbook lines.\n\n" "3. **Content Guidelines**:\n" " - Focus on **daily life topics** (family, food, outings, weather, shopping, work, Ramadan, gatherings, sports, travel, etc.).\n" " - Mix short and medium-length answers, but avoid one-word replies.\n" " - Avoid repetition โ donโt overuse words like \"ูุงูุฏ ุฒูู\" or \"ุญูู\". Use synonyms: \"ู ุฑุชุจ\"ุ \"ู ู ุชุงุฒ\"ุ \"ููุจู\".\n" " - Keep context logical: if the question is about ุงูุดุงุฑูุฉ, donโt answer about ุงูุนูู.\n\n" "4. **Format**:\n" " - Provide as structured Q&A pairs.\n" " - Example:\n" " Q: ููู ุชุญุจูู ุชุณูุฑูู ูููุฉ ุงูุฎู ูุณุ\n" " A: ุนุงุฏุฉ ูุณูุฑ ุตูุจ ุงูููุฑููุดุ ููุนุฏ ููุณููู ููู ููุช ู ุชุฃุฎุฑ. ุฃุญูุงููุง ููู ููุง ูู ุงููุงููู ุฅุฐุง ุงูุฌู ุญูู.\n\n" "5. **Respect & Safety**:\n" " - Stay polite.\n" " - No sensitive, political, or inappropriate topics.\n" " - Keep it light and family-friendly." ) } # Specialized system prompt for Kallamni 4B model kallamni_4b_system_prompt = { "role": "system", "content": """ You are "ุจู ุณูู", a friendly Emirati assistant who speaks in authentic Emirati dialect. Stay consistent for the entire conversation โ never switch tone or shorten replies. **Core Style:** - Speak naturally like an Emirati friend chatting in a relaxed setting. - Always write 3โ5 sentences that flow like spoken conversation. - Keep a warm, expressive, slightly storytelling tone โ not robotic or academic. - Use Emirati words like: ูุงูุฏุ ูููุ ุณุฑุชุ ุนูุจุ ุงูุฑุจุนุ ุงููุนุฏุฉุ ูุบูุฑ ุฌูุ ุงูุญู ุฏ ููู. - Avoid Modern Standard Arabic and formal words (ุฐููุ ุฅููุ ููุฏ). **Expansion Guidance:** - Add small emotions, examples, or local color (ุงููููุฉุ ุงูุจุญุฑุ ุงูุนุงุฆูุฉุ ุงูุจุฑุ ุงูุฑุจุน). - If the userโs question is short, expand your answer naturally. - Imagine youโre replying by voice โ relaxed, friendly, and expressive. - Keep a human rhythm โ pauses, reflections, feelings. **Memory Reminder:** Before every new answer, silently remind yourself: "ุจู ุณูู ูุชููู ุจุฑูุญ ุฅู ุงุฑุงุชูุฉุ ุฏุงูู ูุง ูุฌุงูุจ ุจุฌู ู ูุงููุฉ ูุฏุงููุฉุ ูููุง ุฅุญุณุงุณ ููุตุฉ ุจุณูุทุฉ." Your goal: make every reply feel like a true Emirati conversation. """ } # few_shot = [ # {"role": "user", "content": "ุดุญุงููู ุงูููู ุ"}, # {"role": "assistant", "content": "ุงูุญู ุฏููู ุฒููุ ูุงูุช ููููุ"}, # {"role": "user", "content": "ููู ูุงูู ุชุณูุฑ ุนูุจ ุงูุฏูุงู ุ"}, # {"role": "assistant", "content": "ูู ูู ุฃู ุฑ ุนูู ุงูู ูู ูุฃุชุนุดู ููุง ุงูุฑุจุน."}, # ] generation_configs = { "others": { "do_sample": True, "temperature": 0.1, "min_p": 0.15, "repetition_penalty": 1.05, "max_new_tokens": 60 }, "kallamni-4b": { "do_sample": True, "temperature": 0.7, "top_p": 0.8, "top_k": 20, "max_new_tokens": 1024, "pad_token_id": tokenizer.eos_token_id } } # Chat function @spaces.GPU def chat_fn(message, history, model_choice): try: model, tokenizer = loaded_models[model_choice] if "Kallamni 4B" in model_choice: system_prompt = kallamni_4b_system_prompt gen_kwargs = generation_configs["kallamni-4b"] else: system_prompt = base_system_prompt gen_kwargs = generation_configs["others"] messages = [system_prompt] + history + [{"role": "user", "content": message}] input_ids = tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt", tokenize=True, ).to(model.device) gen_kwargs = generation_configs["others"] if "Kallamni 4B" in model_choice: gen_kwargs = generation_configs["kallamni-4b"] print(model_choice) output = model.generate( input_ids, **gen_kwargs ) decoded = tokenizer.decode(output[0], skip_special_tokens=False) try: a_start = decoded.rindex("<|im_start|>assistant") + len("<|im_start|>assistant") a_end = decoded.index("<|im_end|>", a_start) answer = decoded[a_start:a_end].strip() except ValueError: answer = decoded.strip() return answer except Exception as e: return f"[ุฎุทุฃ ุฏุงุฎูู]: {str(e)}" # CSS css = """ #chat-container { direction: rtl; text-align: right; } """ # Gradio Interface with gr.Blocks(css=css, fill_height=True) as demo: gr.HTML( """