mobicham commited on
Commit
e6f4532
·
verified ·
1 Parent(s): 0573dab

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -0
README.md CHANGED
@@ -38,10 +38,18 @@ prepare_for_inference(model, backend=backend, verbose=True)
38
  ```
39
 
40
  Use in <a href="https://github.com/vllm-project/vllm/">vllm</a>:
 
 
 
 
 
41
  ```Python
42
  from vllm import LLM
43
  from vllm.sampling_params import SamplingParams
44
 
 
 
 
45
  model_id = "mobiuslabsgmbh/Qwen2.5-VL-3B-Instruct_4bitgs64_hqq_hf"
46
 
47
  llm = LLM(model=model_id, max_model_len=4096, max_num_seqs=2, limit_mm_per_prompt={"image": 1}, dtype=torch.float16)
 
38
  ```
39
 
40
  Use in <a href="https://github.com/vllm-project/vllm/">vllm</a>:
41
+ ```
42
+ pip install git+https://github.com/mobiusml/hqq/;
43
+ pip install git+https://github.com/mobiusml/gemlite/;
44
+ ```
45
+
46
  ```Python
47
  from vllm import LLM
48
  from vllm.sampling_params import SamplingParams
49
 
50
+ from hqq.utils.vllm import set_vllm_hqq_backend, VLLM_HQQ_BACKEND
51
+ set_vllm_hqq_backend(backend=VLLM_HQQ_BACKEND.GEMLITE)
52
+
53
  model_id = "mobiuslabsgmbh/Qwen2.5-VL-3B-Instruct_4bitgs64_hqq_hf"
54
 
55
  llm = LLM(model=model_id, max_model_len=4096, max_num_seqs=2, limit_mm_per_prompt={"image": 1}, dtype=torch.float16)