mistralai
/

Ministral-3-14B-Instruct-2512-BF16

+---
+license: apache-2.0
+---
+# mistralai/Ministral-3-14B-Instruct-2512
+For now you can only launch via vLLM or Transformers-private
+- [vLLM](#vllm)
+- [Transformers](#transformers) branch: https://github.com/mistralai/Transformers-private/pull/1/
+The architecture change in comparison with Mistral-Small-3.2 is using Yarn with llama4 scaling.
+Please note that 3B also has tied embeddings (no output layer) to reduce the number of weights. This is not the case of 8B and 14B.
+## vLLM
+1. install vLLM
+```sh
+VLLM_USE_PRECOMPILED=1 uv pip install git+https://github.com/vllm-project/vllm.git
+```
+2. Launch server
+```sh
+vllm serve mistralai/Ministral-3-14B-Instruct-2512 --tool-call-parser mistral \
+    --enable-auto-tool-choice --tensor-parallel-size 1
+```
+3. test it
+```python
+from datetime import datetime, timedelta
+from openai import OpenAI
+from huggingface_hub import hf_hub_download
+# Modify OpenAI's API key and API base to use vLLM's API server.
+openai_api_key = "EMPTY"
+openai_api_base = "http://localhost:8000/v1"
+TEMP = 0.15
+MAX_TOK = 262144
+client = OpenAI(
+    api_key=openai_api_key,
+    base_url=openai_api_base,
+)
+models = client.models.list()
+model = models.data[0].id
+def load_system_prompt() -> str:
+    file_path = hf_hub_download(repo_id="mistralai/Ministral-3-14B-Instruct-2512", filename="SYSTEM_PROMPT.txt")
+    with open(file_path, "r") as file:
+        system_prompt = file.read()
+    today = datetime.today().strftime("%Y-%m-%d")
+    yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
+    return system_prompt.format(today=today, yesterday=yesterday)
+SYSTEM_PROMPT = load_system_prompt()
+image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438"
+messages = [
+    {"role": "system", "content": SYSTEM_PROMPT},
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.",
+            },
+            {"type": "image_url", "image_url": {"url": image_url}},
+        ],
+    },
+]
+response = client.chat.completions.create(
+    model=model,
+    messages=messages,
+    temperature=TEMP,
+    max_tokens=MAX_TOK,
+)
+print(response.choices[0].message.content)
+```
+## Transformers
+1. install Transformers
+```sh
+pip install git+https://github.com/mistralai/Transformers-private@add_ministral3
+```
+or clone
+```
+git clone [email protected]:mistralai/Transformers-private.git
+cd Transformers-private
+git checkout add_ministal3
+```
+2. test (with mistral-common)
+```sh
+pip install mistral-common[image]
+```
+```python
+from datetime import datetime, timedelta
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import Mistral3ForConditionalGeneration, AutoTokenizer
+def load_system_prompt() -> str:
+    file_path = hf_hub_download(repo_id="mistralai/Ministral-3-14B-Instruct-2512", filename="SYSTEM_PROMPT.txt")
+    with open(file_path, "r") as file:
+        system_prompt = file.read()
+    today = datetime.today().strftime("%Y-%m-%d")
+    yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
+    return system_prompt.format(today=today, yesterday=yesterday)
+SYSTEM_PROMPT = load_system_prompt()
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Ministral-3-14B-Instruct-2512", tokenizer_type="mistral")
+model = Mistral3ForConditionalGeneration.from_pretrained(
+    "mistralai/Ministral-3-14B-Instruct-2512", torch_dtype=torch.bfloat16, device_map="auto"
+).eval()
+image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438"
+messages = [
+    {"role": "system", "content": SYSTEM_PROMPT},
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.",
+            },
+            {"type": "image_url", "image_url": {"url": image_url}},
+        ],
+    },
+]
+tokenized = tokenizer.apply_chat_template(messages, return_dict=True)
+input_ids = torch.tensor(tokenized.input_ids, device="cuda").unsqueeze(0)
+attention_mask = torch.tensor(tokenized.attention_mask, device="cuda").unsqueeze(0)
+pixel_values = torch.tensor(
+    tokenized.pixel_values[0], dtype=torch.bfloat16, device="cuda"
+).unsqueeze(0)
+image_sizes = torch.tensor(pixel_values.shape[-2:], device="cuda").unsqueeze(0)
+with torch.inference_mode():
+    output = model.generate(
+        input_ids=input_ids,
+        attention_mask=attention_mask,
+        pixel_values=pixel_values,
+        image_sizes=image_sizes,
+        max_new_tokens=1000,
+    )[0]
+decoded_output = tokenizer.decode(output, skip_special_tokens=True)
+print(decoded_output)
+```
+3. test (without mistral-common)
+```python
+from datetime import datetime, timedelta
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import Mistral3ForConditionalGeneration, AutoProcessor
+def load_system_prompt() -> str:
+    file_path = hf_hub_download(repo_id="mistralai/Ministral-3-14B-Instruct-2512", filename="SYSTEM_PROMPT.txt")
+    with open(file_path, "r") as file:
+        system_prompt = file.read()
+    today = datetime.today().strftime("%Y-%m-%d")
+    yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
+    return system_prompt.format(name="mistralai/Ministral-3-14B-Instruct-2512".split("/")[-1], today=today, yesterday=yesterday)
+SYSTEM_PROMPT = load_system_prompt()
+processor = AutoProcessor.from_pretrained("mistralai/Ministral-3-14B-Instruct-2512")
+model = Mistral3ForConditionalGeneration.from_pretrained(
+    "mistralai/Ministral-3-14B-Instruct-2512", torch_dtype=torch.bfloat16, device_map="auto"
+).eval()
+image_url = "https://static.wikia.nocookie.net/essentialsdocs/images/7/70/Battle.png/revision/latest?cb=20220523172438"
+messages = [
+    {"role": "system", "content": [
+        {"type": "text", "text": SYSTEM_PROMPT}
+    ]},
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "What action do you think I should take in this situation? List all the possible actions and explain why you think they are good or bad.",
+            },
+            {"type": "image", "url": image_url},
+        ],
+    },
+]
+inputs = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt").to(device=model.device, dtype=torch.bfloat16)
+with torch.inference_mode():
+    output = model.generate(
+        **inputs,
+        max_new_tokens=1000,
+    )
+decoded_output = processor.batch_decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+print(decoded_output)
+```