Kallamni-chat / app.py
yasserrmd's picture
Update app.py
5663683 verified
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
# Available models
MODEL_IDS = {
"Kallamni 4B":"yasserrmd/kallamni-4b-v1",
"Kallamni 2.6B": "yasserrmd/kallamni-2.6b-v1",
"Kallamni 1.2B": "yasserrmd/kallamni-1.2b-v1"
}
# Preload models at startup
loaded_models = {}
for name, model_id in MODEL_IDS.items():
print(f"๐Ÿ”„ Loading {name} ...")
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
loaded_models[name] = (model, tokenizer)
print("โœ… All models loaded successfully!")
# System prompt + few-shot
base_system_prompt = {
"role": "system",
"content": (
'You are "ุจูˆ ุณูŠู", a friendly Emirati assistant who always speaks in authentic Emirati dialect. \n'
"Follow these rules when generating Q&A samples:\n\n"
"1. Use **daily spoken Emirati Arabic only** โ€” no Modern Standard Arabic (MSA) or other dialects.\n"
" - Examples: \"ุณูŠุฑ\"ุŒ \"ูˆุงูŠุฏ\"ุŒ \"ู‡ุงู„ุณู†ุฉ\"ุŒ \"ุงู„ุฑุจุน\".\n"
" - Avoid: \"ุฐู„ูƒ\"ุŒ \"ู„ู‚ุฏ\"ุŒ \"ุฅู†ู‡\".\n\n"
"2. **Tone & Style**:\n"
" - Keep it casual, warm, and respectful (like natural family or friend conversations).\n"
" - Slightly lengthen answers: 2โ€“4 sentences is ideal.\n"
" - Answers should feel like real replies, not textbook lines.\n\n"
"3. **Content Guidelines**:\n"
" - Focus on **daily life topics** (family, food, outings, weather, shopping, work, Ramadan, gatherings, sports, travel, etc.).\n"
" - Mix short and medium-length answers, but avoid one-word replies.\n"
" - Avoid repetition โ€” donโ€™t overuse words like \"ูˆุงูŠุฏ ุฒูŠู†\" or \"ุญู„ูˆ\". Use synonyms: \"ู…ุฑุชุจ\"ุŒ \"ู…ู…ุชุงุฒ\"ุŒ \"ูŠู‡ุจู„\".\n"
" - Keep context logical: if the question is about ุงู„ุดุงุฑู‚ุฉ, donโ€™t answer about ุงู„ุนูŠู†.\n\n"
"4. **Format**:\n"
" - Provide as structured Q&A pairs.\n"
" - Example:\n"
" Q: ูˆูŠู† ุชุญุจูˆู† ุชุณู‡ุฑูˆู† ู„ูŠู„ุฉ ุงู„ุฎู…ูŠุณุŸ\n"
" A: ุนุงุฏุฉ ู†ุณูŠุฑ ุตูˆุจ ุงู„ูƒูˆุฑู†ูŠุดุŒ ู†ู‚ุนุฏ ูˆู†ุณูˆู„ู ู„ูŠู† ูˆู‚ุช ู…ุชุฃุฎุฑ. ุฃุญูŠุงู†ู‹ุง ู†ูƒู…ู„ู‡ุง ููŠ ุงู„ูƒุงููŠู‡ ุฅุฐุง ุงู„ุฌูˆ ุญู„ูˆ.\n\n"
"5. **Respect & Safety**:\n"
" - Stay polite.\n"
" - No sensitive, political, or inappropriate topics.\n"
" - Keep it light and family-friendly."
)
}
# Specialized system prompt for Kallamni 4B model
kallamni_4b_system_prompt = {
"role": "system",
"content": """
You are "ุจูˆ ุณูŠู", a friendly Emirati assistant who speaks in authentic Emirati dialect.
Stay consistent for the entire conversation โ€” never switch tone or shorten replies.
**Core Style:**
- Speak naturally like an Emirati friend chatting in a relaxed setting.
- Always write 3โ€“5 sentences that flow like spoken conversation.
- Keep a warm, expressive, slightly storytelling tone โ€” not robotic or academic.
- Use Emirati words like: ูˆุงูŠุฏุŒ ู‡ูŠู‡ุŒ ุณุฑุชุŒ ุนู‚ุจุŒ ุงู„ุฑุจุนุŒ ุงู„ู‚ุนุฏุฉุŒ ู†ุบูŠุฑ ุฌูˆุŒ ุงู„ุญู…ุฏ ู„ู„ู‡.
- Avoid Modern Standard Arabic and formal words (ุฐู„ูƒุŒ ุฅู†ู‡ุŒ ู„ู‚ุฏ).
**Expansion Guidance:**
- Add small emotions, examples, or local color (ุงู„ู‚ู‡ูˆุฉุŒ ุงู„ุจุญุฑุŒ ุงู„ุนุงุฆู„ุฉุŒ ุงู„ุจุฑุŒ ุงู„ุฑุจุน).
- If the userโ€™s question is short, expand your answer naturally.
- Imagine youโ€™re replying by voice โ€” relaxed, friendly, and expressive.
- Keep a human rhythm โ€” pauses, reflections, feelings.
**Memory Reminder:**
Before every new answer, silently remind yourself:
"ุจูˆ ุณูŠู ูŠุชูƒู„ู… ุจุฑูˆุญ ุฅู…ุงุฑุงุชูŠุฉุŒ ุฏุงูŠู…ู‹ุง ูŠุฌุงูˆุจ ุจุฌู…ู„ ูˆุงููŠุฉ ูˆุฏุงููŠุฉุŒ ููŠู‡ุง ุฅุญุณุงุณ ูˆู‚ุตุฉ ุจุณูŠุทุฉ."
Your goal: make every reply feel like a true Emirati conversation.
"""
}
# few_shot = [
# {"role": "user", "content": "ุดุญุงู„ูŽูƒ ุงู„ูŠูˆู…ุŸ"},
# {"role": "assistant", "content": "ุงู„ุญู…ุฏู„ู„ู‡ ุฒูŠู†ุŒ ูˆุงู†ุช ูƒูŠููƒุŸ"},
# {"role": "user", "content": "ูˆูŠู† ู†ุงูˆูŠ ุชุณูŠุฑ ุนู‚ุจ ุงู„ุฏูˆุงู…ุŸ"},
# {"role": "assistant", "content": "ูŠู…ูƒู† ุฃู…ุฑ ุนู„ู‰ ุงู„ู…ูˆู„ ูˆุฃุชุนุดู‰ ูˆูŠุง ุงู„ุฑุจุน."},
# ]
generation_configs = {
"others": {
"do_sample": True,
"temperature": 0.1,
"min_p": 0.15,
"repetition_penalty": 1.05,
"max_new_tokens": 60
},
"kallamni-4b": {
"do_sample": True,
"temperature": 0.7,
"top_p": 0.8,
"top_k": 20,
"max_new_tokens": 1024,
"pad_token_id": tokenizer.eos_token_id
}
}
# Chat function
@spaces.GPU
def chat_fn(message, history, model_choice):
try:
model, tokenizer = loaded_models[model_choice]
if "Kallamni 4B" in model_choice:
system_prompt = kallamni_4b_system_prompt
gen_kwargs = generation_configs["kallamni-4b"]
else:
system_prompt = base_system_prompt
gen_kwargs = generation_configs["others"]
messages = [system_prompt] + history + [{"role": "user", "content": message}]
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt",
tokenize=True,
).to(model.device)
gen_kwargs = generation_configs["others"]
if "Kallamni 4B" in model_choice:
gen_kwargs = generation_configs["kallamni-4b"]
print(model_choice)
output = model.generate(
input_ids,
**gen_kwargs
)
decoded = tokenizer.decode(output[0], skip_special_tokens=False)
try:
a_start = decoded.rindex("<|im_start|>assistant") + len("<|im_start|>assistant")
a_end = decoded.index("<|im_end|>", a_start)
answer = decoded[a_start:a_end].strip()
except ValueError:
answer = decoded.strip()
return answer
except Exception as e:
return f"[ุฎุทุฃ ุฏุงุฎู„ูŠ]: {str(e)}"
# CSS
css = """
#chat-container { direction: rtl; text-align: right; }
"""
# Gradio Interface
with gr.Blocks(css=css, fill_height=True) as demo:
gr.HTML(
"""<div style="text-align: center;">
<img src="https://huggingface.co/spaces/yasserrmd/Kallamni-chat/resolve/main/assets/logo.png"
alt="Logo" width="120">
</div>"""
)
model_state = gr.State("Kallamni 4B")
with gr.Row():
model_choice = gr.Dropdown(
choices=list(MODEL_IDS.keys()),
value="Kallamni 4B",
label="Kallamni 4B"
)
def update_model(choice):
return choice
model_choice.change(update_model, inputs=model_choice, outputs=model_state)
gr.ChatInterface(
fn=lambda message, history: chat_fn(message, history, model_choice.value),
type="messages",
examples=[
"ูˆูŠู† ู†ุงูˆูŠ ุชุณูŠุฑ ุนู‚ุจ ู…ุง ุชุฎู„ุต ุงู„ุฏูˆุงู… ุงู„ูŠูˆู…ุŸ",
"ุดุฑุงูŠูƒ ููŠ ุงู„ุฌูˆ ู‡ุงู„ุฃูŠุงู…ุŒ ูˆุงู„ู„ู‡ ุชุญุณู‡ ุญุฑ ูˆุงูŠุฏุŸ",
"ูƒูŠู ุชู‚ุถูŠ ุงู„ูˆูŠูƒู†ุฏ ุนุงุฏุฉู‹ ูˆูŠุง ุงู„ุนูŠุงู„ ูˆุงู„ุฑุจุนุŸ",
"ุดูˆ ุฃุญู„ู‰ ุฃูƒู„ุฉ ุฏูˆู… ุชุญุจู‡ุง ู…ู† ุทุจุฎ ุงู„ูˆุงู„ุฏุฉุŸ",
"ูˆูŠู† ุฃุญู„ู‰ ู…ูƒุงู† ุฏูˆู… ุชุงุฎุฐ ู…ู†ู‡ ู‚ู‡ูˆุฉ ุงู„ุตุจุญุŸ",
],
title="๐Ÿ’ฌ ุดุงุช ุจุงู„ู„ู‡ุฌุฉ ุงู„ุฅู…ุงุฑุงุชูŠุฉ",
cache_examples=True,
theme="soft",
fill_height=True
)
if __name__ == "__main__":
demo.launch(debug=True)