We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ad2e8b9 commit df11d7aCopy full SHA for df11d7a
evaluation/deepseek_fp4/launch_deepseekr1_fp4_TP.sh
@@ -30,16 +30,16 @@ echo "running $model_path"
30
31
vllm serve $model_path \
32
--host localhost \
33
- --port 6789 \
+ --port 9000 \
34
--tensor-parallel-size 8 \
35
--max-num-batched-tokens 32768 \
36
--trust-remote-code \
37
--no-enable-prefix-caching \
38
--disable-log-requests \
39
- --compilation-config '{"cudagraph_mode": "FULL_AND_PIECEWISE"}' \
+ --enforce-eager \
40
--gpu_memory_utilization 0.7 \
41
- --block-size 1 \
42
- --seed 123 2>&1 | tee log.server.log
+ --block-size 16 \
+ --seed 123 2>&1 | tee log.server.log &
43
44
- # --enforce-eager \
+# --compilation-config '{"cudagraph_mode": "FULL_AND_PIECEWISE"}' \
45
# --enable-expert-parallel \
0 commit comments