whitphx HF Staff commited on
Commit
3da497b
·
verified ·
1 Parent(s): 285669f

Add/update the quantized ONNX model files and README.md for Transformers.js v3

Browse files

## Applied Quantizations

### ✅ Based on `model.onnx` *with* slimming

↳ ✅ `int8`: `model_int8.onnx` (added)
↳ ✅ `uint8`: `model_uint8.onnx` (added)
↳ ✅ `q4`: `model_q4.onnx` (added)
↳ ✅ `q4f16`: `model_q4f16.onnx` (added)
↳ ✅ `bnb4`: `model_bnb4.onnx` (added)

README.md CHANGED
@@ -7,14 +7,14 @@ https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-12-v2 with ONNX weights t
7
 
8
  ## Usage (Transformers.js)
9
 
10
- If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
11
  ```bash
12
- npm i @xenova/transformers
13
  ```
14
 
15
  **Example:** Information Retrieval w/ `Xenova/ms-marco-MiniLM-L-12-v2`.
16
  ```js
17
- import { AutoTokenizer, AutoModelForSequenceClassification } from '@xenova/transformers';
18
 
19
  const model = await AutoModelForSequenceClassification.from_pretrained('Xenova/ms-marco-MiniLM-L-12-v2');
20
  const tokenizer = await AutoTokenizer.from_pretrained('Xenova/ms-marco-MiniLM-L-12-v2');
@@ -31,7 +31,7 @@ const features = tokenizer(
31
  }
32
  )
33
 
34
- const scores = await model(features)
35
  console.log(scores);
36
  // quantized: [ 9.597102165222168, -11.141762733459473 ]
37
  // unquantized: [ 9.450557708740234, -11.160483360290527 ]
 
7
 
8
  ## Usage (Transformers.js)
9
 
10
+ If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
11
  ```bash
12
+ npm i @huggingface/transformers
13
  ```
14
 
15
  **Example:** Information Retrieval w/ `Xenova/ms-marco-MiniLM-L-12-v2`.
16
  ```js
17
+ import { AutoTokenizer, AutoModelForSequenceClassification } from '@huggingface/transformers';
18
 
19
  const model = await AutoModelForSequenceClassification.from_pretrained('Xenova/ms-marco-MiniLM-L-12-v2');
20
  const tokenizer = await AutoTokenizer.from_pretrained('Xenova/ms-marco-MiniLM-L-12-v2');
 
31
  }
32
  )
33
 
34
+ const scores = await model(features);
35
  console.log(scores);
36
  // quantized: [ 9.597102165222168, -11.141762733459473 ]
37
  // unquantized: [ 9.450557708740234, -11.160483360290527 ]
onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5d42081dfecfabea73b606dd5d194569ae3b57e64c1b8ef522d9330a72eaf6b
3
+ size 60763518
onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4509e8d29fce1db0ecdf9413c4269ccd7787fc925ecd2bdd3fd2fe99bc9b6b6f
3
+ size 33940752
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c355a1546ff8d2162f13041cd0c5544a3d99ff9ee2d7a71bb902ca9310857e69
3
+ size 62090166
onnx/model_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6934cd7f95924d4f2476203e6eda5ef0c597f49f822d6719d8688bbc2d85eda8
3
+ size 36509715
onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5ed81a6ea6e595bb85e43f5a5b70f70692deacc287b19918f51b019dd62732
3
+ size 33940786