JunHowie commited on
Commit
591a8e2
·
verified ·
1 Parent(s): bf14887

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -60,7 +60,7 @@ see [Official vLLM Deepseek-V3.2 Guide](https://docs.vllm.ai/projects/recipes/en
60
  export VLLM_USE_DEEP_GEMM=0 # ATM, this line is a "must" for Hopper devices
61
  CONTEXT_LENGTH=32768
62
  vllm serve \
63
- __YOUR_PATH__/tclf90/DeepSeek-V3.2-AWQ \
64
  --served-model-name MY_MODEL_NAME \
65
  --enable-auto-tool-choice \
66
  --tool-call-parser deepseek_v31 \
@@ -71,7 +71,7 @@ vllm serve \
71
  --gpu-memory-utilization 0.9 \
72
  --tensor-parallel-size 8 \
73
  --enable-expert-parallel \ # optional
74
- --speculative-config '{"model": "__YOUR_PATH__/tclf90/DeepSeek-V3.2-AWQ", "num_speculative_tokens": 1}' \ # optional, 50%+- throughput increase is observed
75
  --trust-remote-code \
76
  --host 0.0.0.0 \
77
  --port 8000
@@ -90,8 +90,8 @@ vllm serve \
90
 
91
  ### 【Model Download】
92
  ```python
93
- from modelscope import snapshot_download
94
- snapshot_download('tclf90/DeepSeek-V3.2-AWQ', cache_dir="your_local_path")
95
  ```
96
 
97
  ### 【Overview】
 
60
  export VLLM_USE_DEEP_GEMM=0 # ATM, this line is a "must" for Hopper devices
61
  CONTEXT_LENGTH=32768
62
  vllm serve \
63
+ __YOUR_PATH__/QuantTrio/DeepSeek-V3.2-AWQ \
64
  --served-model-name MY_MODEL_NAME \
65
  --enable-auto-tool-choice \
66
  --tool-call-parser deepseek_v31 \
 
71
  --gpu-memory-utilization 0.9 \
72
  --tensor-parallel-size 8 \
73
  --enable-expert-parallel \ # optional
74
+ --speculative-config '{"model": "__YOUR_PATH__/QuantTrio/DeepSeek-V3.2-AWQ", "num_speculative_tokens": 1}' \ # optional, 50%+- throughput increase is observed
75
  --trust-remote-code \
76
  --host 0.0.0.0 \
77
  --port 8000
 
90
 
91
  ### 【Model Download】
92
  ```python
93
+ from huggingface_hub import snapshot_download
94
+ snapshot_download('QuantTrio/DeepSeek-V3.2-AWQ', cache_dir="your_local_path")
95
  ```
96
 
97
  ### 【Overview】