DylanZimmer commited on
Commit
4424d43
·
1 Parent(s): 4ecd9e6

From ProCreations/smollm3

Browse files
Files changed (2) hide show
  1. app.py +45 -34
  2. requirements.txt +4 -4
app.py CHANGED
@@ -1,63 +1,74 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
4
 
5
  # ------------------------
6
- # Model Setup
7
  # ------------------------
8
  model_name = "HuggingFaceTB/SmolLM3-3B"
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
-
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
 
 
 
 
13
 
14
  # ------------------------
15
- # Chat Function
16
  # ------------------------
17
- # `history` is a list of dicts: {"role": "user"/"assistant", "content": str}
18
- def respond(message, history):
 
 
 
 
 
 
19
  # Append current user message
20
- history = history + [{"role": "user", "content": message}]
21
 
22
- # Build input with tokenizer’s chat template
23
  text = tokenizer.apply_chat_template(
24
- history,
25
  tokenize=False,
26
  add_generation_prompt=True
27
  )
28
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
29
 
30
  # Generate response
31
- generated_ids = model.generate(
32
- **model_inputs,
33
- max_new_tokens=512,
34
- temperature=0.7,
35
- top_p=0.95,
36
- do_sample=True
37
- )
 
 
38
 
39
- # Extract only the model's reply (exclude input tokens)
40
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
41
  reply = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
42
 
43
- # Append assistant reply to history
44
- history.append({"role": "assistant", "content": reply})
45
-
46
- return reply, history
47
 
48
  # ------------------------
49
- # Gradio Interface
50
  # ------------------------
51
- demo = gr.ChatInterface(
52
- fn=respond,
53
- chatbot=gr.Chatbot(type="messages", height=400),
54
- textbox=gr.Textbox(placeholder="Type a message..."),
55
- title="SmallChat with History",
56
- description="Persistent chat history using OpenAI-style messages"
57
- )
 
 
 
 
 
58
 
59
- # ------------------------
60
- # Launch
61
- # ------------------------
62
  if __name__ == "__main__":
63
  demo.launch()
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
  # ------------------------
6
+ # Load model and tokenizer
7
  # ------------------------
8
  model_name = "HuggingFaceTB/SmolLM3-3B"
 
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ torch_dtype=torch.bfloat16,
13
+ device_map="auto"
14
+ )
15
 
16
  # ------------------------
17
+ # Chat function
18
  # ------------------------
19
+ def chat(message, history):
20
+ # Convert Gradio history to OpenAI-style messages
21
+ messages = []
22
+ for human_msg, bot_msg in history:
23
+ messages.append({"role": "user", "content": human_msg})
24
+ if bot_msg:
25
+ messages.append({"role": "assistant", "content": bot_msg})
26
+
27
  # Append current user message
28
+ messages.append({"role": "user", "content": message})
29
 
30
+ # Apply the model's chat template
31
  text = tokenizer.apply_chat_template(
32
+ messages,
33
  tokenize=False,
34
  add_generation_prompt=True
35
  )
36
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
37
 
38
  # Generate response
39
+ with torch.no_grad():
40
+ generated_ids = model.generate(
41
+ **model_inputs,
42
+ max_new_tokens=512,
43
+ temperature=0.6,
44
+ top_p=0.95,
45
+ do_sample=True,
46
+ pad_token_id=tokenizer.eos_token_id
47
+ )
48
 
49
+ # Extract assistant reply
50
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
51
  reply = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
52
 
53
+ # Update history
54
+ history.append((message, reply))
55
+ return "", history
 
56
 
57
  # ------------------------
58
+ # Gradio interface
59
  # ------------------------
60
+ with gr.Blocks() as demo:
61
+ gr.Markdown("# 🤖 SmolLM3-3B Chat")
62
+ gr.Markdown("Simple chat interface with persistent history.")
63
+
64
+ chatbot = gr.Chatbot(height=500)
65
+ msg = gr.Textbox(placeholder="Type your message here...")
66
+ submit = gr.Button("Send")
67
+ clear = gr.Button("Clear")
68
+
69
+ submit.click(chat, [msg, chatbot], [msg, chatbot])
70
+ msg.submit(chat, [msg, chatbot], [msg, chatbot])
71
+ clear.click(lambda: ([], ""), outputs=[chatbot, msg])
72
 
 
 
 
73
  if __name__ == "__main__":
74
  demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- huggingface_hub>=0.34.0
2
- git+https://github.com/huggingface/transformers.git
3
- torch
4
- gradio
5
  accelerate
 
1
+ gradio>=4.0.0
2
+ transformers>=4.53.0
3
+ torch>=2.0.0
4
+ spaces>=0.1.0
5
  accelerate