Update README.md
Browse files
README.md
CHANGED
|
@@ -38,10 +38,18 @@ prepare_for_inference(model, backend=backend, verbose=True)
|
|
| 38 |
```
|
| 39 |
|
| 40 |
Use in <a href="https://github.com/vllm-project/vllm/">vllm</a>:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
```Python
|
| 42 |
from vllm import LLM
|
| 43 |
from vllm.sampling_params import SamplingParams
|
| 44 |
|
|
|
|
|
|
|
|
|
|
| 45 |
model_id = "mobiuslabsgmbh/Qwen2.5-VL-3B-Instruct_4bitgs64_hqq_hf"
|
| 46 |
|
| 47 |
llm = LLM(model=model_id, max_model_len=4096, max_num_seqs=2, limit_mm_per_prompt={"image": 1}, dtype=torch.float16)
|
|
|
|
| 38 |
```
|
| 39 |
|
| 40 |
Use in <a href="https://github.com/vllm-project/vllm/">vllm</a>:
|
| 41 |
+
```
|
| 42 |
+
pip install git+https://github.com/mobiusml/hqq/;
|
| 43 |
+
pip install git+https://github.com/mobiusml/gemlite/;
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
```Python
|
| 47 |
from vllm import LLM
|
| 48 |
from vllm.sampling_params import SamplingParams
|
| 49 |
|
| 50 |
+
from hqq.utils.vllm import set_vllm_hqq_backend, VLLM_HQQ_BACKEND
|
| 51 |
+
set_vllm_hqq_backend(backend=VLLM_HQQ_BACKEND.GEMLITE)
|
| 52 |
+
|
| 53 |
model_id = "mobiuslabsgmbh/Qwen2.5-VL-3B-Instruct_4bitgs64_hqq_hf"
|
| 54 |
|
| 55 |
llm = LLM(model=model_id, max_model_len=4096, max_num_seqs=2, limit_mm_per_prompt={"image": 1}, dtype=torch.float16)
|