diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index c1412b8f4..9677054b2 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -10293,3 +10293,216 @@ minimaxm2.5-fp4-gb200-dynamo-vllm: tp: 4 ep: 4 dp-attn: true + +minimaxm2.5-fp8-gb300-dynamo-vllm: + image: vllm/vllm-openai:v0.20.1 + model: MiniMaxAI/MiniMax-M2.5 + model-prefix: minimaxm2.5 + runner: gb300-nv + precision: fp8 + framework: dynamo-vllm + multinode: true + disagg: true + scenarios: + fixed-seq-len: + - isl: 1024 + osl: 1024 + search-space: + - conc-list: [8, 16, 32, 64, 128] + prefill: + num-worker: 1 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 1 + dp-attn: false + - conc-list: [32, 64, 128, 256, 512] + prefill: + num-worker: 1 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml" + decode: + num-worker: 2 + tp: 4 + ep: 1 + dp-attn: false + - conc-list: [256, 512, 1024] + prefill: + num-worker: 1 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml" + decode: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: false + - conc-list: [256, 512, 1024] + prefill: + num-worker: 2 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + - conc-list: [512, 1024, 2048] + prefill: + num-worker: 2 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml" + decode: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: true + - conc-list: [4096, 8192] + prefill: + num-worker: 2 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml" + decode: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: true + - conc-list: [1024] + prefill: + num-worker: 2 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml" + decode: + num-worker: 3 + tp: 2 + ep: 2 + dp-attn: true + - isl: 8192 + osl: 1024 + search-space: + - conc-list: [16, 64, 128] + prefill: + num-worker: 1 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: false + - conc-list: [256, 512] + prefill: + num-worker: 1 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: false + - conc-list: [32] + prefill: + num-worker: 2 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml" + decode: + num-worker: 1 + tp: 2 + ep: 1 + dp-attn: false + - conc-list: [64, 128, 256, 512] + prefill: + num-worker: 2 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: false + - conc-list: [64] + prefill: + num-worker: 3 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 1 + dp-attn: false + - conc-list: [256, 512] + prefill: + num-worker: 3 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + - conc-list: [1024, 2048] + prefill: + num-worker: 3 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + - conc-list: [512, 1024, 2048] + prefill: + num-worker: 5 + tp: 1 + ep: 1 + dp-attn: false + additional-settings: + - "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml" + decode: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: true diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml new file mode 100644 index 000000000..4475c4548 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml @@ -0,0 +1,64 @@ +name: "minimax-m2.5-vllm-disagg-gb300-1p1d-tp4" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 1 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 4 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 1024 + osl: 1024 + concurrencies: "8x16x32x64x128" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml new file mode 100644 index 000000000..005d3ab45 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml @@ -0,0 +1,69 @@ +name: "minimax-m2.5-vllm-disagg-gb300-1p2d-tp4" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 2 + prefill_workers: 1 + decode_workers: 2 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 4 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 1024 + osl: 1024 + concurrencies: "32x64x128x256x512" + # warmup_prompts: 1 + # use_chat_template: false + # req_rate: "inf" + # random_range_ratio: 1.0 diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml new file mode 100644 index 000000000..42e2bbff7 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml @@ -0,0 +1,66 @@ +name: "minimax-m2.5-vllm-disagg-gb300-1p2d-tp4ep" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 2 + prefill_workers: 1 + decode_workers: 2 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 4 + pipeline-parallel-size: 1 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 1024 + osl: 1024 + concurrencies: "256x512x1024" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml new file mode 100644 index 000000000..dadaea41c --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml @@ -0,0 +1,83 @@ +name: "minimax-m2.5-vllm-disagg-gb300-2p1d-dep8" + +# model: +# path: "minimax-m2.5-fp8" +# container: "v0.18.1" +# precision: "fp8" + +# dynamo: +# version: 1.0.1 +# install: true + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + + + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 2 + prefill_workers: 2 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 8 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 8 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 1024 + osl: 1024 + concurrencies: "256x512x1024" + # warmup_prompts: 1 + # use_chat_template: false + # req_rate: "inf" + # random_range_ratio: 1.0 diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml new file mode 100644 index 000000000..95a6f4032 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml @@ -0,0 +1,82 @@ +name: "minimax-m2.5-vllm-disagg-gb300-2p2d-dep4-hi-conc" + +# model: +# path: "minimax-m2.5-fp8" +# container: "v0.18.1" +# precision: "fp8" + +# dynamo: +# version: 1.0.1 +# install: true + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + + + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 2 + prefill_workers: 2 + decode_workers: 2 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 128 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 4 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 128 + +benchmark: + type: "sa-bench" + isl: 1024 + osl: 1024 + concurrencies: "4096x8192" + # use_chat_template: false + # req_rate: "inf" + # random_range_ratio: 1.0 diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml new file mode 100644 index 000000000..90d14b5b0 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml @@ -0,0 +1,82 @@ +name: "minimax-m2.5-vllm-disagg-gb300-2p2d-dep4" + +# model: +# path: "minimax-m2.5-fp8" +# container: "v0.18.1" +# precision: "fp8" + +# dynamo: +# version: 1.0.1 +# install: true + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + + + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 2 + prefill_workers: 2 + decode_workers: 2 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 4 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 1024 + osl: 1024 + concurrencies: "512x1024x2048" + # use_chat_template: false + # req_rate: "inf" + # random_range_ratio: 1.0 diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml new file mode 100644 index 000000000..ef4bfc846 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml @@ -0,0 +1,68 @@ +name: "minimax-m2.5-vllm-disagg-gb300-2p3d-dep2" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 2 + prefill_workers: 2 + decode_workers: 3 + gpus_per_prefill: 1 + gpus_per_decode: 2 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 2 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 1024 + osl: 1024 + concurrencies: "1024" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml new file mode 100644 index 000000000..f9e9ccf79 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml @@ -0,0 +1,66 @@ +name: "minimax-m2.5-vllm-disagg-gb300-1p1d-tp4ep-hi-conc" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 1 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 4 + pipeline-parallel-size: 1 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "256x512" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml new file mode 100644 index 000000000..76e72c229 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml @@ -0,0 +1,66 @@ +name: "minimax-m2.5-vllm-disagg-gb300-1p1d-tp4ep" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 1 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 4 + pipeline-parallel-size: 1 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "16x64x128" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml new file mode 100644 index 000000000..f71458a70 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml @@ -0,0 +1,68 @@ +name: "minimax-m2.5-vllm-disagg-gb300-2p1d-tp2" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 2 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 2 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 2 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "32" + use_chat_template: false + req_rate: "inf" + random_range_ratio: 1.0 diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml new file mode 100644 index 000000000..668cf185b --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml @@ -0,0 +1,66 @@ +name: "minimax-m2.5-vllm-disagg-gb300-2p1d-tp4ep" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 2 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 4 + pipeline-parallel-size: 1 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "64x128x256x512" diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml new file mode 100644 index 000000000..94b866d95 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml @@ -0,0 +1,82 @@ +name: "minimax-m2.5-vllm-disagg-gb300-3p1d-dep4-hi-conc" + +# model: +# path: "minimax-m2.5-fp8" +# container: "v0.18.1" +# precision: "fp8" + +# dynamo: +# version: 1.0.1 +# install: true + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + + + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 3 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 4 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "1024x2048" + use_chat_template: false + req_rate: "inf" + random_range_ratio: 1.0 diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml new file mode 100644 index 000000000..9bb6081db --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml @@ -0,0 +1,82 @@ +name: "minimax-m2.5-vllm-disagg-gb300-3p1d-dep4" + +# model: +# path: "minimax-m2.5-fp8" +# container: "v0.18.1" +# precision: "fp8" + +# dynamo: +# version: 1.0.1 +# install: true + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + + + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 3 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 4 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "256x512" + use_chat_template: false + req_rate: "inf" + random_range_ratio: 1.0 diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml new file mode 100644 index 000000000..b638c0351 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml @@ -0,0 +1,68 @@ +name: "minimax-m2.5-vllm-disagg-gb300-3p1d-tp4" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 1 + decode_nodes: 1 + prefill_workers: 3 + decode_workers: 1 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 4 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "64" + use_chat_template: false + req_rate: "inf" + random_range_ratio: 1.0 diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml new file mode 100644 index 000000000..ed2a9cdd4 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml @@ -0,0 +1,68 @@ +name: "minimax-m2.5-vllm-disagg-gb300-5p2d-dep4" + +model: + path: "minimax-m2.5-fp8" + container: "vllm/vllm-openai:v0.20.1" + precision: "fp8" + +dynamo: + install: true + wheel: "1.2.0.dev20260526" + +setup_script: install-deps.sh + +resources: + gpu_type: "gb300" + gpus_per_node: 4 + prefill_nodes: 2 + decode_nodes: 2 + prefill_workers: 5 + decode_workers: 2 + gpus_per_prefill: 1 + gpus_per_decode: 4 + +frontend: + type: dynamo + enable_multiple_frontends: false + +backend: + type: vllm + connector: null + + prefill_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + decode_environment: + VLLM_ENGINE_READY_TIMEOUT_S: "3600" + VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl" + + vllm_config: + prefill: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + + decode: + kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}' + kv-cache-dtype: "fp8" + tensor-parallel-size: 1 + pipeline-parallel-size: 1 + data-parallel-size: 4 + data-parallel-rpc-port: 13345 + enable-expert-parallel: true + safetensors-load-strategy: "prefetch" + trust-remote-code: true + no-enable-prefix-caching: true + stream-interval: 32 + +benchmark: + type: "sa-bench" + isl: 8192 + osl: 1024 + concurrencies: "512x1024x2048" diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 1ce1698b8..1374a6492 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3416,3 +3416,10 @@ - "Add MiniMax-M2.5 FP8 B200 disaggregated multinode vLLM benchmarks via Dynamo" - "Add 1k1k/8k1k FP8 recipe set under benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b200-fp8/" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1649 + +- config-keys: + - minimaxm2.5-fp8-gb300-dynamo-vllm + description: + - "Add MiniMax-M2.5 FP8 GB300 disaggregated multinode vLLM benchmarks via Dynamo" + - "Add 1k1k/8k1k FP8 recipe set under benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8/" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1647 diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh index b47e103fd..4bffdb0b5 100644 --- a/runners/launch_gb300-nv.sh +++ b/runners/launch_gb300-nv.sh @@ -45,8 +45,11 @@ elif [[ $MODEL_PREFIX == "glm5" && $PRECISION == "fp4" ]]; then elif [[ $MODEL_PREFIX == "glm5" && $PRECISION == "fp8" ]]; then export MODEL_PATH=/scratch/models/GLM-5-FP8 export SRT_SLURM_MODEL_PREFIX="glm-5-fp8" +elif [[ $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp8" ]]; then + export MODEL_PATH=/data/models/MiniMax-M2.5 + export SRT_SLURM_MODEL_PREFIX="minimax-m2.5-fp8" else - echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4, glm5-fp4, glm5-fp8" + echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4, glm5-fp4, glm5-fp8, minimaxm2.5-fp8" exit 1 fi @@ -137,6 +140,12 @@ elif [[ $FRAMEWORK == "dynamo-sglang" && $MODEL_PREFIX == "glm5" ]]; then git checkout sa-submission-q2-2026 mkdir -p recipes/sglang/glm5 cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5" recipes/sglang/glm5 +elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp8" ]]; then + git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" + cd "$SRT_REPO_DIR" + git checkout main + mkdir -p recipes/vllm/minimax-m2.5-fp8 + cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-fp8" recipes/vllm/minimax-m2.5-fp8 else git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" @@ -370,8 +379,12 @@ if [[ "${RUN_EVAL:-false}" == "true" || "${EVAL_ONLY:-false}" == "true" ]]; then shopt -s nullglob for eval_file in "$EVAL_DIR"/*; do [ -f "$eval_file" ] || continue - cp "$eval_file" "$GITHUB_WORKSPACE/" - echo "Copied eval artifact: $(basename "$eval_file")" + eval_dest="$GITHUB_WORKSPACE/$(basename "$eval_file")" + if cp "$eval_file" "$eval_dest"; then + echo "Copied eval artifact: $(basename "$eval_file")" + else + echo "WARNING: Failed to copy eval artifact, continuing: $(basename "$eval_file")" + fi done shopt -u nullglob else