Add/update the quantized ONNX model files and README.md for Transformers.js v3
Browse files## Applied Quantizations
### ✅ Based on `model.onnx` *with* slimming
↳ ✅ `int8`: `model_int8.onnx` (added)
↳ ✅ `uint8`: `model_uint8.onnx` (added)
↳ ✅ `q4`: `model_q4.onnx` (added)
↳ ✅ `q4f16`: `model_q4f16.onnx` (added)
↳ ✅ `bnb4`: `model_bnb4.onnx` (added)
- README.md +4 -4
- onnx/model_bnb4.onnx +3 -0
- onnx/model_int8.onnx +3 -0
- onnx/model_q4.onnx +3 -0
- onnx/model_q4f16.onnx +3 -0
- onnx/model_uint8.onnx +3 -0
README.md
CHANGED
|
@@ -7,14 +7,14 @@ https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-12-v2 with ONNX weights t
|
|
| 7 |
|
| 8 |
## Usage (Transformers.js)
|
| 9 |
|
| 10 |
-
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@
|
| 11 |
```bash
|
| 12 |
-
npm i @
|
| 13 |
```
|
| 14 |
|
| 15 |
**Example:** Information Retrieval w/ `Xenova/ms-marco-MiniLM-L-12-v2`.
|
| 16 |
```js
|
| 17 |
-
import { AutoTokenizer, AutoModelForSequenceClassification } from '@
|
| 18 |
|
| 19 |
const model = await AutoModelForSequenceClassification.from_pretrained('Xenova/ms-marco-MiniLM-L-12-v2');
|
| 20 |
const tokenizer = await AutoTokenizer.from_pretrained('Xenova/ms-marco-MiniLM-L-12-v2');
|
|
@@ -31,7 +31,7 @@ const features = tokenizer(
|
|
| 31 |
}
|
| 32 |
)
|
| 33 |
|
| 34 |
-
const scores = await model(features)
|
| 35 |
console.log(scores);
|
| 36 |
// quantized: [ 9.597102165222168, -11.141762733459473 ]
|
| 37 |
// unquantized: [ 9.450557708740234, -11.160483360290527 ]
|
|
|
|
| 7 |
|
| 8 |
## Usage (Transformers.js)
|
| 9 |
|
| 10 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
| 11 |
```bash
|
| 12 |
+
npm i @huggingface/transformers
|
| 13 |
```
|
| 14 |
|
| 15 |
**Example:** Information Retrieval w/ `Xenova/ms-marco-MiniLM-L-12-v2`.
|
| 16 |
```js
|
| 17 |
+
import { AutoTokenizer, AutoModelForSequenceClassification } from '@huggingface/transformers';
|
| 18 |
|
| 19 |
const model = await AutoModelForSequenceClassification.from_pretrained('Xenova/ms-marco-MiniLM-L-12-v2');
|
| 20 |
const tokenizer = await AutoTokenizer.from_pretrained('Xenova/ms-marco-MiniLM-L-12-v2');
|
|
|
|
| 31 |
}
|
| 32 |
)
|
| 33 |
|
| 34 |
+
const scores = await model(features);
|
| 35 |
console.log(scores);
|
| 36 |
// quantized: [ 9.597102165222168, -11.141762733459473 ]
|
| 37 |
// unquantized: [ 9.450557708740234, -11.160483360290527 ]
|
onnx/model_bnb4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5d42081dfecfabea73b606dd5d194569ae3b57e64c1b8ef522d9330a72eaf6b
|
| 3 |
+
size 60763518
|
onnx/model_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4509e8d29fce1db0ecdf9413c4269ccd7787fc925ecd2bdd3fd2fe99bc9b6b6f
|
| 3 |
+
size 33940752
|
onnx/model_q4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c355a1546ff8d2162f13041cd0c5544a3d99ff9ee2d7a71bb902ca9310857e69
|
| 3 |
+
size 62090166
|
onnx/model_q4f16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6934cd7f95924d4f2476203e6eda5ef0c597f49f822d6719d8688bbc2d85eda8
|
| 3 |
+
size 36509715
|
onnx/model_uint8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc5ed81a6ea6e595bb85e43f5a5b70f70692deacc287b19918f51b019dd62732
|
| 3 |
+
size 33940786
|