Spaces:

dzehuggingface
/

SmallChat-FxnCaller

Sleeping

App Files Files Community

DylanZimmer commited on Aug 14

Commit

4424d43

1 Parent(s): 4ecd9e6

From ProCreations/smollm3

Browse files

Files changed (2) hide show

app.py +45 -34
requirements.txt +4 -4

app.py CHANGED Viewed

@@ -1,63 +1,74 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 # ------------------------
-# Model Setup
 # ------------------------
 model_name = "HuggingFaceTB/SmolLM3-3B"
-device = "cuda" if torch.cuda.is_available() else "cpu"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
 # ------------------------
-# Chat Function
 # ------------------------
-# `history` is a list of dicts: {"role": "user"/"assistant", "content": str}
-def respond(message, history):
     # Append current user message
-    history = history + [{"role": "user", "content": message}]
-    # Build input with tokenizer’s chat template
     text = tokenizer.apply_chat_template(
-        history,
         tokenize=False,
         add_generation_prompt=True
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     # Generate response
-    generated_ids = model.generate(
-        **model_inputs,
-        max_new_tokens=512,
-        temperature=0.7,
-        top_p=0.95,
-        do_sample=True
-    )
-    # Extract only the model's reply (exclude input tokens)
     output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
     reply = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
-    # Append assistant reply to history
-    history.append({"role": "assistant", "content": reply})
-    return reply, history
 # ------------------------
-# Gradio Interface
 # ------------------------
-demo = gr.ChatInterface(
-    fn=respond,
-    chatbot=gr.Chatbot(type="messages", height=400),
-    textbox=gr.Textbox(placeholder="Type a message..."),
-    title="SmallChat with History",
-    description="Persistent chat history using OpenAI-style messages"
-)
-# ------------------------
-# Launch
-# ------------------------
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 # ------------------------
+# Load model and tokenizer
 # ------------------------
 model_name = "HuggingFaceTB/SmolLM3-3B"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
 # ------------------------
+# Chat function
 # ------------------------
+def chat(message, history):
+    # Convert Gradio history to OpenAI-style messages
+    messages = []
+    for human_msg, bot_msg in history:
+        messages.append({"role": "user", "content": human_msg})
+        if bot_msg:
+            messages.append({"role": "assistant", "content": bot_msg})
     # Append current user message
+    messages.append({"role": "user", "content": message})
+    # Apply the model's chat template
     text = tokenizer.apply_chat_template(
+        messages,
         tokenize=False,
         add_generation_prompt=True
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     # Generate response
+    with torch.no_grad():
+        generated_ids = model.generate(
+            **model_inputs,
+            max_new_tokens=512,
+            temperature=0.6,
+            top_p=0.95,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    # Extract assistant reply
     output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
     reply = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+    # Update history
+    history.append((message, reply))
+    return "", history
 # ------------------------
+# Gradio interface
 # ------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 SmolLM3-3B Chat")
+    gr.Markdown("Simple chat interface with persistent history.")
+    chatbot = gr.Chatbot(height=500)
+    msg = gr.Textbox(placeholder="Type your message here...")
+    submit = gr.Button("Send")
+    clear = gr.Button("Clear")
+    submit.click(chat, [msg, chatbot], [msg, chatbot])
+    msg.submit(chat, [msg, chatbot], [msg, chatbot])
+    clear.click(lambda: ([], ""), outputs=[chatbot, msg])
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
-huggingface_hub>=0.34.0
-git+https://github.com/huggingface/transformers.git
-torch
-gradio
 accelerate

+gradio>=4.0.0
+transformers>=4.53.0
+torch>=2.0.0
+spaces>=0.1.0
 accelerate