pandora-s commited on
Commit
ed7e639
·
verified ·
1 Parent(s): 549b431

Create _README.md

Browse files
Files changed (1) hide show
  1. _README.md +232 -0
_README.md ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ # mistralai/Ministral-3-14B-Instruct-2512
6
+
7
+ For now you can only launch via vLLM or Transformers-private
8
+ - [vLLM](#vllm)
9
+ - [Transformers](#transformers) branch: https://github.com/mistralai/Transformers-private/pull/1/
10
+
11
+ The architecture change in comparison with Mistral-Small-3.2 is using Yarn with llama4 scaling.
12
+
13
+ Please note that 3B also has tied embeddings (no output layer) to reduce the number of weights. This is not the case of 8B and 14B.
14
+
15
+ ## vLLM
16
+
17
+ 1. install vLLM
18
+
19
+ ```sh
20
+ VLLM_USE_PRECOMPILED=1 uv pip install git+https://github.com/vllm-project/vllm.git
21
+ ```
22
+
23
+ 2. Launch server
24
+
25
+ ```sh
26
+ vllm serve mistralai/Ministral-3-14B-Instruct-2512 --tool-call-parser mistral \
27
+ --enable-auto-tool-choice --tensor-parallel-size 1
28
+ ```
29
+
30
+ 3. test it
31
+
32
+ ```python
33
+ from datetime import datetime, timedelta
34
+
35
+ from openai import OpenAI
36
+ from huggingface_hub import hf_hub_download
37
+
38
+ # Modify OpenAI's API key and API base to use vLLM's API server.
39
+ openai_api_key = "EMPTY"
40
+ openai_api_base = "http://localhost:8000/v1"
41
+
42
+ TEMP = 0.15
43
+ MAX_TOK = 262144
44
+
45
+ client = OpenAI(
46
+ api_key=openai_api_key,
47
+ base_url=openai_api_base,
48
+ )
49
+
50
+ models = client.models.list()
51
+ model = models.data[0].id
52
+
53
+
54
+ def load_system_prompt() -> str:
55
+ file_path = hf_hub_download(repo_id="mistralai/Ministral-3-14B-Instruct-2512", filename="SYSTEM_PROMPT.txt")
56
+ with open(file_path, "r") as file:
57
+ system_prompt = file.read()
58
+ today = datetime.today().strftime("%Y-%m-%d")
59
+ yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
60
+ return system_prompt.format(today=today, yesterday=yesterday)
61
+
62
+
63
+ SYSTEM_PROMPT = load_system_prompt()
64
+ image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438"
65
+
66
+ messages = [
67
+ {"role": "system", "content": SYSTEM_PROMPT},
68
+ {
69
+ "role": "user",
70
+ "content": [
71
+ {
72
+ "type": "text",
73
+ "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.",
74
+ },
75
+ {"type": "image_url", "image_url": {"url": image_url}},
76
+ ],
77
+ },
78
+ ]
79
+
80
+
81
+ response = client.chat.completions.create(
82
+ model=model,
83
+ messages=messages,
84
+ temperature=TEMP,
85
+ max_tokens=MAX_TOK,
86
+ )
87
+
88
+ print(response.choices[0].message.content)
89
+ ```
90
+
91
+ ## Transformers
92
+
93
+
94
+ 1. install Transformers
95
+
96
+ ```sh
97
+ pip install git+https://github.com/mistralai/Transformers-private@add_ministral3
98
+ ```
99
+
100
+ or clone
101
+
102
+ ```
103
+ git clone [email protected]:mistralai/Transformers-private.git
104
+ cd Transformers-private
105
+ git checkout add_ministal3
106
+ ```
107
+
108
+ 2. test (with mistral-common)
109
+
110
+ ```sh
111
+ pip install mistral-common[image]
112
+ ```
113
+
114
+ ```python
115
+ from datetime import datetime, timedelta
116
+ import torch
117
+
118
+ from huggingface_hub import hf_hub_download
119
+ from transformers import Mistral3ForConditionalGeneration, AutoTokenizer
120
+
121
+
122
+ def load_system_prompt() -> str:
123
+ file_path = hf_hub_download(repo_id="mistralai/Ministral-3-14B-Instruct-2512", filename="SYSTEM_PROMPT.txt")
124
+ with open(file_path, "r") as file:
125
+ system_prompt = file.read()
126
+ today = datetime.today().strftime("%Y-%m-%d")
127
+ yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
128
+ return system_prompt.format(today=today, yesterday=yesterday)
129
+
130
+
131
+ SYSTEM_PROMPT = load_system_prompt()
132
+
133
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Ministral-3-14B-Instruct-2512", tokenizer_type="mistral")
134
+
135
+ model = Mistral3ForConditionalGeneration.from_pretrained(
136
+ "mistralai/Ministral-3-14B-Instruct-2512", torch_dtype=torch.bfloat16, device_map="auto"
137
+ ).eval()
138
+
139
+ image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438"
140
+
141
+ messages = [
142
+ {"role": "system", "content": SYSTEM_PROMPT},
143
+ {
144
+ "role": "user",
145
+ "content": [
146
+ {
147
+ "type": "text",
148
+ "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.",
149
+ },
150
+ {"type": "image_url", "image_url": {"url": image_url}},
151
+ ],
152
+ },
153
+ ]
154
+
155
+ tokenized = tokenizer.apply_chat_template(messages, return_dict=True)
156
+
157
+ input_ids = torch.tensor(tokenized.input_ids, device="cuda").unsqueeze(0)
158
+ attention_mask = torch.tensor(tokenized.attention_mask, device="cuda").unsqueeze(0)
159
+ pixel_values = torch.tensor(
160
+ tokenized.pixel_values[0], dtype=torch.bfloat16, device="cuda"
161
+ ).unsqueeze(0)
162
+ image_sizes = torch.tensor(pixel_values.shape[-2:], device="cuda").unsqueeze(0)
163
+
164
+ with torch.inference_mode():
165
+ output = model.generate(
166
+ input_ids=input_ids,
167
+ attention_mask=attention_mask,
168
+ pixel_values=pixel_values,
169
+ image_sizes=image_sizes,
170
+ max_new_tokens=1000,
171
+ )[0]
172
+
173
+ decoded_output = tokenizer.decode(output, skip_special_tokens=True)
174
+ print(decoded_output)
175
+ ```
176
+
177
+ 3. test (without mistral-common)
178
+
179
+ ```python
180
+ from datetime import datetime, timedelta
181
+ import torch
182
+
183
+ from huggingface_hub import hf_hub_download
184
+ from transformers import Mistral3ForConditionalGeneration, AutoProcessor
185
+
186
+
187
+ def load_system_prompt() -> str:
188
+ file_path = hf_hub_download(repo_id="mistralai/Ministral-3-14B-Instruct-2512", filename="SYSTEM_PROMPT.txt")
189
+ with open(file_path, "r") as file:
190
+ system_prompt = file.read()
191
+ today = datetime.today().strftime("%Y-%m-%d")
192
+ yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
193
+ return system_prompt.format(name="mistralai/Ministral-3-14B-Instruct-2512".split("/")[-1], today=today, yesterday=yesterday)
194
+
195
+
196
+ SYSTEM_PROMPT = load_system_prompt()
197
+
198
+ processor = AutoProcessor.from_pretrained("mistralai/Ministral-3-14B-Instruct-2512")
199
+
200
+ model = Mistral3ForConditionalGeneration.from_pretrained(
201
+ "mistralai/Ministral-3-14B-Instruct-2512", torch_dtype=torch.bfloat16, device_map="auto"
202
+ ).eval()
203
+
204
+ image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438"
205
+
206
+ messages = [
207
+ {"role": "system", "content": [
208
+ {"type": "text", "text": SYSTEM_PROMPT}
209
+ ]},
210
+ {
211
+ "role": "user",
212
+ "content": [
213
+ {
214
+ "type": "text",
215
+ "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.",
216
+ },
217
+ {"type": "image", "url": image_url},
218
+ ],
219
+ },
220
+ ]
221
+
222
+ inputs = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt").to(device=model.device, dtype=torch.bfloat16)
223
+
224
+ with torch.inference_mode():
225
+ output = model.generate(
226
+ **inputs,
227
+ max_new_tokens=1000,
228
+ )
229
+
230
+ decoded_output = processor.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
231
+ print(decoded_output)
232
+ ```