From 041f84c071d1505b21c2b5879203feebc62f96b3 Mon Sep 17 00:00:00 2001
From: Ankur-singh <ankusingh@nvidia.com>
Date: Tue, 2 Jun 2026 17:22:30 -0700
Subject: [PATCH 1/3] [B300][vLLM] Add MiniMax-M2.5 FP4 disagg Dynamo configs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split of SemiAnalysisAI/InferenceX#1560 — B300 half.

- Add minimaxm2.5-fp4-b300-dynamo-vllm to nvidia-master.yaml (1k1k + 8k1k
  search spaces; image vllm/vllm-openai:v0.20.1, model nvidia/MiniMax-M2.5-NVFP4).
- Add srt-slurm recipes under benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/.
- Wire minimax + dynamo-vllm routing into runners/launch_b300-nv.sh.
- Append perf-changelog entry.
---
 .github/configs/nvidia-master.yaml            | 213 ++++++++++++++++++
 .../minimax-m2.5-b300/1k1k/dep2-1p2d.yaml     |  72 ++++++
 .../1k1k/dep2-2p3d-c6144.yaml                 |  72 ++++++
 .../minimax-m2.5-b300/1k1k/dep2-2p3d.yaml     |  72 ++++++
 .../minimax-m2.5-b300/1k1k/dep8-2p1d.yaml     |  71 ++++++
 .../vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml |  73 ++++++
 .../vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml |  69 ++++++
 .../minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml    |  71 ++++++
 .../minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml    |  69 ++++++
 .../vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml |  78 +++++++
 .../minimax-m2.5-b300/8k1k/dep4-4p1d.yaml     |  71 ++++++
 .../minimax-m2.5-b300/8k1k/dep8-4p1d.yaml     |  71 ++++++
 .../vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml |  69 ++++++
 .../minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml    |  69 ++++++
 .../minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml    |  69 ++++++
 .../vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml |  73 ++++++
 perf-changelog.yaml                           |   8 +
 runners/launch_b300-nv.sh                     |  11 +-
 18 files changed, 1300 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index b8db07e0d..e73a7a654 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -10144,3 +10144,216 @@ minimaxm2.5-fp4-gb200-dynamo-vllm:
           tp: 4
           ep: 4
           dp-attn: true
+
+minimaxm2.5-fp4-b300-dynamo-vllm:
+  image: vllm/vllm-openai:v0.20.1
+  model: nvidia/MiniMax-M2.5-NVFP4
+  model-prefix: minimaxm2.5
+  runner: b300
+  precision: fp4
+  framework: dynamo-vllm
+  multinode: true
+  disagg: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - conc-list: [4, 16]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp8-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [8, 16]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml"
+        decode:
+          num-worker: 2
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [32, 64, 128]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [64, 128, 256, 1024]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml"
+        decode:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [4096]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-1p2d.yaml"
+        decode:
+          num-worker: 2
+          tp: 2
+          ep: 2
+          dp-attn: true
+      - conc-list: [2048, 4096]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml"
+        decode:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+      - conc-list: [6144, 8192]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml"
+        decode:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+      - conc-list: [1024, 1536, 2048, 4096]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - conc-list: [2, 4, 8, 16]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp8-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [32, 128]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [64, 128, 256, 512]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [384, 512]
+        prefill:
+          num-worker: 4
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+      - conc-list: [384]
+        prefill:
+          num-worker: 4
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
new file mode 100644
index 000000000..d6e6dc53c
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
@@ -0,0 +1,72 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-2xdep2"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 2
+  gpus_per_prefill: 1
+  gpus_per_decode: 2
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 128
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 2
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+      stream-interval: 128
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "4096"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
new file mode 100644
index 000000000..3fd24aa25
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
@@ -0,0 +1,72 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-2p3xdep2-c6144"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 2
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 128
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 2
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+      stream-interval: 128
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "6144x8192"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
new file mode 100644
index 000000000..bc68f6d59
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
@@ -0,0 +1,72 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-2p3xdep2"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 2
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 128
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 2
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+      stream-interval: 128
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "2048x4096"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
new file mode 100644
index 000000000..516e51f11
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
@@ -0,0 +1,71 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-2p1xdep8"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 128
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 8
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+      stream-interval: 128
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "1024x1536x2048x4096"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
new file mode 100644
index 000000000..726b5a63b
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
@@ -0,0 +1,73 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4-1p1d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "4x16"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
new file mode 100644
index 000000000..77329ffcc
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4-1p2d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 2
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "8x16"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
new file mode 100644
index 000000000..4f25aee38
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
@@ -0,0 +1,71 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4ep-1p1d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "32x64x128"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
new file mode 100644
index 000000000..8da4cb7ca
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4ep-1p3d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 2
+  prefill_workers: 1
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "64x128x256x1024"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
new file mode 100644
index 000000000..757eeed97
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
@@ -0,0 +1,78 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp8-1p1d"
+
+# B300-only: full-node TP=8 decode (the 8 GPUs of a single B300 node).
+# Cousin of tp4-1p1d.yaml but exercises the wider TP that B300's per-node
+# GPU count makes available. Only the smallest concurrencies (1,4,8) —
+# this topology is decode-latency focused, not throughput.
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 8
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "4"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
new file mode 100644
index 000000000..258e9ba4f
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
@@ -0,0 +1,71 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-4p1xdep4"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 4
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "384x512"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
new file mode 100644
index 000000000..1f41e52e2
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
@@ -0,0 +1,71 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-4p1xdep8"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 8
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "384"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
new file mode 100644
index 000000000..91761b75f
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp4"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "2x4x8x16"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
new file mode 100644
index 000000000..76b000e8c
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp4ep"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "32x128"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
new file mode 100644
index 000000000..b34025ee2
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-2p1xtp4ep"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "64x128x256x512"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml
new file mode 100644
index 000000000..ea276c25a
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml
@@ -0,0 +1,73 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp8"
+
+# B300-only: full-node TP=8 decode at 8k input. Cousin of tp4-1p1d.yaml
+# but exercises the wider TP that B300's per-node GPU count makes
+# available. Smallest concurrencies only (1,4,8).
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260526"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+      stream-interval: 32
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 8
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+      stream-interval: 32
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "4"
+  random_range_ratio: 0.8
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 28523da86..7fba477b0 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3409,3 +3409,11 @@
   description:
     - "Add MiniMax-M2.5 NVFP4 GB200 disaggregated multinode vLLM benchmarks via Dynamo"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1642
+
+- config-keys:
+    - minimaxm2.5-fp4-b300-dynamo-vllm
+  description:
+    - "Add MiniMax-M2.5 NVFP4 B300 disaggregated multinode vLLM benchmarks via Dynamo"
+    - "Image: vllm/vllm-openai:v0.20.1"
+    - "Same 1k/1k and 8k/1k search space as gb300, plus a new tp8-1p1d at low concurrencies for both ISLs"
+  pr-link: PLACEHOLDER_PR_LINK
diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh
index 67e8b48cc..e4a253ba3 100644
--- a/runners/launch_b300-nv.sh
+++ b/runners/launch_b300-nv.sh
@@ -39,8 +39,11 @@ elif [[ $MODEL_PREFIX == "dsv4" && $PRECISION == "fp4" && $FRAMEWORK == "dynamo-
     fi
     export MODEL_PATH="${SELECTED_MODEL_PATH:-/data/models/dsv4-pro}"
     export SRT_SLURM_MODEL_PREFIX="deepseek-v4-pro"
+elif [[ $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp4" && $FRAMEWORK == "dynamo-vllm" ]]; then
+    export MODEL_PATH="/data/models/MiniMax-M2.5-NVFP4"
+    export SRT_SLURM_MODEL_PREFIX="minimax-m2.5-nvfp4"
 else
-    echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4 with dynamo-vllm"
+    echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4 with dynamo-vllm, minimaxm2.5-fp4 with dynamo-vllm"
     exit 1
 fi
 
@@ -61,6 +64,12 @@ elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "dsv4" ]]; then
     git checkout aflowers/vllm-gb200-v0.20.0
     mkdir -p recipes/vllm/deepseek-v4
     cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4" recipes/vllm/deepseek-v4
+elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" ]]; then
+    git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
+    cd "$SRT_REPO_DIR" || exit 1
+    git checkout main
+    mkdir -p recipes/vllm/minimax-m2.5
+    cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300" recipes/vllm/minimax-m2.5
 else
     git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR" || exit 1

From 57fb086b7305ea99ccc1d3846df16023f21362a8 Mon Sep 17 00:00:00 2001
From: Ankur-singh <ankusingh@nvidia.com>
Date: Tue, 2 Jun 2026 17:23:00 -0700
Subject: [PATCH 2/3] perf-changelog: link minimaxm2.5-fp4-b300 entry to PR #83

---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 7fba477b0..58cf58e6d 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3416,4 +3416,4 @@
     - "Add MiniMax-M2.5 NVFP4 B300 disaggregated multinode vLLM benchmarks via Dynamo"
     - "Image: vllm/vllm-openai:v0.20.1"
     - "Same 1k/1k and 8k/1k search space as gb300, plus a new tp8-1p1d at low concurrencies for both ISLs"
-  pr-link: PLACEHOLDER_PR_LINK
+  pr-link: https://github.com/NVIDIA/InferenceMAX/pull/83

From 6a5a06901b41cf772641619931ef8f2517470e03 Mon Sep 17 00:00:00 2001
From: Ankur-singh <ankusingh@nvidia.com>
Date: Tue, 2 Jun 2026 17:37:59 -0700
Subject: [PATCH 3/3] perf-changelog: link minimaxm2.5-fp4-b300 entry to PR
 #1652

---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 58cf58e6d..83bdf91cd 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3416,4 +3416,4 @@
     - "Add MiniMax-M2.5 NVFP4 B300 disaggregated multinode vLLM benchmarks via Dynamo"
     - "Image: vllm/vllm-openai:v0.20.1"
     - "Same 1k/1k and 8k/1k search space as gb300, plus a new tp8-1p1d at low concurrencies for both ISLs"
-  pr-link: https://github.com/NVIDIA/InferenceMAX/pull/83
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1652