diff --git a/conf/experimental/ai_dynamo/test/sglang.toml b/conf/experimental/ai_dynamo/test/sglang.toml index d1429fe11..0ebc3bd60 100644 --- a/conf/experimental/ai_dynamo/test/sglang.toml +++ b/conf/experimental/ai_dynamo/test/sglang.toml @@ -21,7 +21,7 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.9.0" -workloads = "genai_perf.sh" +workloads = "aiperf.sh" [cmd_args.dynamo] backend = "sglang" @@ -93,6 +93,14 @@ workloads = "genai_perf.sh" warmup-request-count = 5 concurrency = 2 + [cmd_args.aiperf] + + [cmd_args.aiperf.args] + concurrency = 2 + request-count = 50 + synthetic-input-tokens-mean = 300 + output-tokens-mean = 500 + [extra_env_vars] UCX_LOG_LEVEL = "warn" HF_HUB_OFFLINE = "1" diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml index f1249564f..c88ff3e10 100644 --- a/conf/experimental/ai_dynamo/test/vllm.toml +++ b/conf/experimental/ai_dynamo/test/vllm.toml @@ -21,7 +21,7 @@ extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"] [cmd_args] docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.1" -workloads = "genai_perf.sh" +workloads = "aiperf.sh" [cmd_args.dynamo] backend = "vllm" @@ -85,6 +85,14 @@ workloads = "genai_perf.sh" warmup-request-count = 5 concurrency = 2 + [cmd_args.aiperf] + + [cmd_args.aiperf.args] + concurrency = 2 + request-count = 50 + synthetic-input-tokens-mean = 300 + output-tokens-mean = 500 + [extra_env_vars] UCX_LOG_LEVEL = "warn" HF_HUB_OFFLINE = "1" diff --git a/conf/experimental/ai_dynamo/test_scenario/sglang_slurm.toml b/conf/experimental/ai_dynamo/test_scenario/sglang_slurm.toml index d6b8eac1c..26ed91285 100644 --- a/conf/experimental/ai_dynamo/test_scenario/sglang_slurm.toml +++ b/conf/experimental/ai_dynamo/test_scenario/sglang_slurm.toml @@ -18,7 +18,7 @@ name = "dynamo_sglang" [[Tests]] id = "sglang-Qwen3-0.6B" -test_name = "sglang-Qwen3-0.6B" +test_name = "sglang" time_limit = "00:20:00" [Tests.cmd_args] diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml index c63c648fe..45031da3a 100644 --- a/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml +++ b/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml @@ -19,7 +19,7 @@ job_status_check = false [[Tests]] id = "test.disagg.single-node" -test_name = "vLLM-Qwen3-0.6B" +test_name = "vLLM" time_limit = "00:10:00" [Tests.cmd_args] @@ -38,7 +38,7 @@ time_limit = "00:10:00" [[Tests]] id = "test.disagg.multinode" -test_name = "vLLM-Qwen3-0.6B" +test_name = "vLLM" time_limit = "00:10:00" [Tests.cmd_args] diff --git a/doc/workloads/ai_dynamo.rst b/doc/workloads/ai_dynamo.rst index 24d1cd310..023d92bf2 100644 --- a/doc/workloads/ai_dynamo.rst +++ b/doc/workloads/ai_dynamo.rst @@ -47,7 +47,7 @@ Node Configuration for AI Dynamo AI Dynamo jobs use three distinct types of nodes: -- **Frontend node**: Hosts the coordination services (`etcd`, `nats`), the **frontend server**, the **request generator** (`genai-perf`), and the first decode worker +- **Frontend node**: Hosts the coordination services (`etcd`, `nats`), the **frontend server**, the **request generator** (`aiperf` by default, configurable via ``workloads`` in the test TOML), and the first decode worker - **Prefill node(s)**: Handle the prefill stage of inference - **Decode node(s)**: Handle the decode stage of inference (optional, depending on model and setup) @@ -82,32 +82,71 @@ The job progress monitoring can be done using either of the following options: watch tail -n 4 ./results//*.txt -The frontend node will initially wait to allow weight loading on all nodes. Once ready, it will launch ``genai-perf``, which begins generating requests to the frontend server. All servers cooperate to complete inference, and the output will appear in ``stdout.txt``. +The frontend node will initially wait to allow weight loading on all nodes. Once ready, it will launch the configured benchmark tool (``aiperf`` by default), which begins generating requests to the frontend server. All servers cooperate to complete inference, and the output will appear in ``stdout.txt``. -Review genai-perf Benchmark Results ------------------------------------ +Choosing a Benchmark Tool +~~~~~~~~~~~~~~~~~~~~~~~~~ -After job completion, CloudAI will place the output logs and result files in the designated results directory. To analyze performance metrics and validate inference outcomes: +The benchmark tool is controlled by the ``workloads`` field in the test TOML. The default is ``aiperf.sh``: -- Navigate to the results directory (e.g., ``./results/...``) -- Most importantly, open the ``profile_genai_perf.csv`` file to examine the final benchmarking results +.. code-block:: toml -This CSV file includes detailed metrics collected by genai-perf, such as request latency, throughput, and system utilization statistics. Use this data to evaluate the model's performance and identify potential bottlenecks or optimization opportunities. + [cmd_args] + workloads = "aiperf.sh" # default — uses aiperf, writes aiperf_report.csv + +To use genai-perf instead, set: + +.. code-block:: toml + + [cmd_args] + workloads = "genai_perf.sh" # uses genai-perf, writes genai_perf_report.csv + + [cmd_args.genai_perf] + cmd = "genai-perf profile" + extra-args = "--streaming --verbose -- -v --async" + + [cmd_args.genai_perf.args] + endpoint-type = "chat" + output-tokens-mean = 500 + request-count = 50 + +Review Benchmark Results +------------------------ + +After job completion, CloudAI places output logs and result files in the designated results directory. The result file name depends on the configured ``workloads`` field: + +- ``aiperf.sh`` (default) → ``aiperf_report.csv`` +- ``genai_perf.sh`` → ``genai_perf_report.csv`` + +Navigate to ``./results///0/`` and open the CSV to examine performance metrics. + +Example ``aiperf_report.csv`` (default): :: - Metric,avg,min,max,p99,p95,p90,p75,p50,p25 - Time To First Token (ms),"1,146.31",249.48,"3,485.23","3,457.97","3,349.56","3,215.06","1,330.93",640.07,286.52 - Time To Second Token (ms),26.05,0.00,133.51,96.12,36.56,34.88,34.35,33.55,1.78 - Request Latency (ms),"6,406.20","5,371.47","9,608.72","9,436.13","9,046.58","9,028.16","6,549.60","5,690.23","5,493.63" - Inter Token Latency (ms),30.35,27.59,35.60,35.23,33.88,32.53,31.05,30.13,29.04 - Output Sequence Length (tokens),174.45,164.00,187.00,186.22,183.10,180.10,177.00,174.00,171.75 - Input Sequence Length (tokens),"3,000.05","2,999.00","3,001.00","3,001.00","3,001.00","3,000.00","3,000.00","3,000.00","3,000.00" + Metric,avg,min,max,p25,p50,p75,p99,std + Inter Token Latency (ms),2.81,2.66,2.88,2.79,2.83,2.84,2.87,0.04 + Time to First Token (ms),49.87,17.15,99.91,49.35,49.87,50.52,92.31,9.20 + Time to Second Token (ms),0.50,0.03,4.05,0.03,0.04,0.04,3.47,1.08 + Request Latency (ms),1652.30,1203.61,6433.87,1453.19,1462.99,1466.72,6431.16,976.18 + Output Sequence Length (tokens),498.06,410.00,501.00,500.00,500.00,500.00,501.00,12.62 + Input Sequence Length (tokens),300.00,300.00,300.00,300.00,300.00,300.00,300.00,0.00 Metric,Value - Output Token Throughput (per sec),261.25 - Request Throughput (per sec),1.50 - Request Count (count),40.00 + Output Token Throughput (tokens/sec),598.78 + Total Token Throughput (tokens/sec),962.32 + Request Throughput (requests/sec),1.20 + Request Count,50.00 + +Supported Backends +------------------ + +The following backends are available via the ``conf/experimental/ai_dynamo/test/`` directory: + +- **vLLM** (``vllm.toml``) — use with ``test_scenario/vllm_slurm.toml`` +- **sglang** (``sglang.toml``) — use with ``test_scenario/sglang_slurm.toml`` + +Both backends use ``aiperf`` as the default benchmark tool and support disaggregated prefill/decode. API Documentation diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py index b5f030eeb..1360ce10d 100644 --- a/src/cloudai/workloads/ai_dynamo/__init__.py +++ b/src/cloudai/workloads/ai_dynamo/__init__.py @@ -18,6 +18,7 @@ AIDynamoArgs, AIDynamoCmdArgs, AIDynamoTestDefinition, + AIPerf, GenAIPerf, LMCache, LMCacheArgs, @@ -35,6 +36,7 @@ "AIDynamoReportGenerationStrategy", "AIDynamoSlurmCommandGenStrategy", "AIDynamoTestDefinition", + "AIPerf", "GenAIPerf", "LMCache", "LMCacheArgs", diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py index 55dc1f1b3..01912f0c1 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py @@ -282,6 +282,25 @@ def installables(self) -> list[Installable]: return [self.script] +class AIPerf(Workload): + """Workload configuration for aiperf benchmarking.""" + + model_config = ConfigDict(extra="allow") + + name: str = "aiperf" + cmd: str = "aiperf profile" + script: File = File(Path(__file__).parent.parent / "ai_dynamo/aiperf.sh") + report_name: str = Field( + default="aiperf_report.csv", + serialization_alias="report-name", + validation_alias=AliasChoices("report-name", "report_name"), + ) + + @property + def installables(self) -> list[Installable]: + return [self.script] + + class Constraints(BaseModel): """Constraints for validation of AI Dynamo configurations when using DSE.""" @@ -301,12 +320,13 @@ class AIDynamoCmdArgs(CmdArgs): dynamo: AIDynamoArgs lmcache: LMCache = Field(default_factory=LMCache) genai_perf: GenAIPerf = Field(default_factory=GenAIPerf) + aiperf: AIPerf = Field(default_factory=AIPerf) workloads: str = "genai_perf.sh" @field_validator("workloads", mode="before") @classmethod def validate_workloads(cls, v: str) -> str: - allowed_workloads = ["genai_perf.sh"] + allowed_workloads = ["genai_perf.sh", "aiperf.sh"] values = [w.strip() for w in v.split(",")] for workload in values: if workload not in allowed_workloads: @@ -322,6 +342,7 @@ def installables(self) -> list[Installable]: return [ *self.lmcache.installables, *self.genai_perf.installables, + *self.aiperf.installables, ] @@ -356,6 +377,7 @@ def get_workload_map(self) -> dict[str, Workload]: """Get a map of workload scripts to workload objects.""" return { self.cmd_args.genai_perf.script.src.name: self.cmd_args.genai_perf, + self.cmd_args.aiperf.script.src.name: self.cmd_args.aiperf, } @property diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh index fd391fa5f..46f5daa42 100644 --- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh +++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh @@ -35,6 +35,8 @@ declare -A lmcache_args declare -A lmcache_config declare -A genai_perf_args declare -A genai_perf_config +declare -A aiperf_args +declare -A aiperf_config declare -A dynamo_args dynamo_args["backend"]="vllm" @@ -163,6 +165,10 @@ _parse_cli_pairs() { genai_perf_args["--${key#--genai_perf-args-}"]="$2" ;; --genai_perf-*) genai_perf_config["--${key#--genai_perf-}"]="$2" ;; + --aiperf-args-*) + aiperf_args["--${key#--aiperf-args-}"]="$2" ;; + --aiperf-*) + aiperf_config["--${key#--aiperf-}"]="$2" ;; --hf-home) HUGGINGFACE_HOME="$2" ;; --storage-cache-dir) @@ -353,6 +359,8 @@ _dump_args() { log "LMCache args:\n$(arg_array_to_string lmcache_args)" log "GenAI config params:\n$(arg_array_to_string genai_perf_config)" log "GenAI-Perf args:\n$(arg_array_to_string genai_perf_args)" + log "AIPerf config params:\n$(arg_array_to_string aiperf_config)" + log "AIPerf args:\n$(arg_array_to_string aiperf_args)" log "--------------------------------" } @@ -505,6 +513,10 @@ _is_genai_perf_workload() { [[ "${dynamo_args["workloads"]}" == *"genai_perf.sh"* ]] } +_is_aiperf_workload() { + [[ "${dynamo_args["workloads"]}" == *"aiperf.sh"* ]] +} + _init_runtime_env() { if _is_vllm || _is_sglang; then export HF_HOME="${HUGGINGFACE_HOME}" @@ -1026,6 +1038,10 @@ function launch_workloads() launch_workload genai_perf_config genai_perf_args fi + if _is_aiperf_workload; then + launch_workload aiperf_config aiperf_args + fi + mark_done } diff --git a/src/cloudai/workloads/ai_dynamo/aiperf.sh b/src/cloudai/workloads/ai_dynamo/aiperf.sh new file mode 100644 index 000000000..9f5a78b33 --- /dev/null +++ b/src/cloudai/workloads/ai_dynamo/aiperf.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# aiperf.sh — aiperf profile wrapper for ai_dynamo workloads. +# +# Called from ai_dynamo.sh's launch_workload() with: +# bash aiperf.sh --result-dir --model --url --port +# [--cmd ] [--report-name ] [--extra-args ] +# -- ... +# +# Context flags (before --) that are recognised and used: +# --result-dir Directory where artifacts and the final report are written. +# --model HuggingFace model identifier (e.g. Qwen/Qwen3-0.6B). +# --url Base URL of the dynamo.frontend (e.g. http://node01). +# --port HTTP port the dynamo.frontend is listening on. +# --report-name Output CSV name (default: aiperf_report.csv). +# --cmd Full launch command including subcommand (default: "aiperf profile"). +# --extra-args Raw string appended verbatim after all other flags. +# +# All unrecognised flags (--install-dir, --gpus-per-node, etc.) are silently +# consumed so this script is forward-compatible with launch_workload additions. +# +# Everything after -- is passed directly to the aiperf profile invocation. + +set -Eeuo pipefail + +result_dir="" +model="" +url="http://localhost" +port=8000 +report_name="aiperf_report.csv" +cmd="aiperf profile" +declare -a extra_args=() +declare -a aiperf_profile_args=() + +log() { + echo "[$(date '+%F %T') $(hostname)]: $*" +} + +_parse_aiperf_args() { + while [[ $# -ge 2 ]]; do + case "$1" in + --*) aiperf_profile_args+=("$1" "$2"); shift 2 ;; + *) shift ;; + esac + done + # Capture a trailing lone boolean flag if present. + # Use if/fi — not [[ ]] && — so set -e does not trigger on a false condition. + if [[ $# -eq 1 && "$1" == --* ]]; then + aiperf_profile_args+=("$1") + fi +} + +process_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + --result-dir) result_dir="$2"; shift 2 ;; + --model) model="$2"; shift 2 ;; + --url) url="$2"; shift 2 ;; + --port) port="$2"; shift 2 ;; + --report-name) report_name="$2"; shift 2 ;; + --cmd) cmd="$2"; shift 2 ;; + --extra-args) read -ra extra_args <<< "$2"; shift 2 ;; + --) shift; _parse_aiperf_args "$@"; break ;; + --*) if [[ -n "${2:-}" && "${2}" != -* ]]; then shift 2; else shift 1; fi ;; # consume unknown flag; shift 2 only if next arg is a value + *) shift ;; + esac + done + + log "Parsed args: + result_dir: $result_dir + model: $model + url: $url + port: $port + report_name: $report_name + cmd: $cmd + extra_args: ${extra_args[*]:-} + profile_args: ${aiperf_profile_args[*]:-}" +} + +process_results() { + local artifact_dir="$result_dir/aiperf_artifacts" + local csv_path + csv_path=$(find "$artifact_dir" -name "*.csv" -print -quit 2>/dev/null || true) + if [[ -n "$csv_path" ]]; then + cp "$csv_path" "$result_dir/$report_name" + log "aiperf report saved to $result_dir/$report_name" + else + log "ERROR: no CSV found in $artifact_dir — aiperf may not have completed" + exit 1 + fi +} + +main() { + process_args "$@" + + if [[ -z "$result_dir" ]]; then + log "ERROR: --result-dir is required"; exit 1 + fi + if [[ -z "$model" ]]; then + log "ERROR: --model is required"; exit 1 + fi + + local full_url="${url}:${port}" + local artifact_dir="$result_dir/aiperf_artifacts" + rm -rf "$artifact_dir" + + # Split cmd into an array (e.g. "aiperf profile" → ["aiperf", "profile"]) + local -a run_cmd=() + read -ra run_cmd <<< "$cmd" + + log "Launching aiperf: ${run_cmd[*]} --model $model --url $full_url" + + "${run_cmd[@]}" \ + --model "$model" \ + --url "$full_url" \ + --endpoint-type chat \ + --streaming \ + --artifact-dir "$artifact_dir" \ + --no-server-metrics \ + "${aiperf_profile_args[@]}" \ + "${extra_args[@]}" + + log "aiperf run complete" + process_results +} + +main "$@" +exit 0 diff --git a/src/cloudai/workloads/ai_dynamo/report_generation_strategy.py b/src/cloudai/workloads/ai_dynamo/report_generation_strategy.py index a2f243712..a8e4e91b8 100644 --- a/src/cloudai/workloads/ai_dynamo/report_generation_strategy.py +++ b/src/cloudai/workloads/ai_dynamo/report_generation_strategy.py @@ -44,7 +44,6 @@ def extract_metric_from_csv(self, csv_file: Path, metric_name: str, metric_type: def get_metric(self, metric: str) -> MetricValue: logging.info(f"Getting metric: {metric}") - benchmark_name = "genai_perf" metric_name = metric metric_type = "avg" @@ -54,6 +53,10 @@ def get_metric(self, metric: str) -> MetricValue: logging.warning(f"Invalid metric format: {metric}. Expected 'benchmark:metric_name:metric_type'") return METRIC_ERROR benchmark_name, metric_name, metric_type = parts + else: + # Derive from the configured workload script (e.g. "aiperf.sh" → "aiperf"). + workloads_list = getattr(getattr(self.test_run.test, "cmd_args", None), "workloads_list", None) + benchmark_name = Path(workloads_list[0]).stem if workloads_list else "aiperf" source_csv = self.test_run.output_path / f"{benchmark_name}_report.csv" logging.info(f"CSV file: {source_csv}") diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py index 4fbee5c7d..17079875c 100644 --- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py @@ -117,6 +117,7 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]: args.extend(self._get_nested_toml_args(td.cmd_args.lmcache, "--lmcache-")) args.extend(self._get_nested_toml_args(td.cmd_args.genai_perf, "--genai_perf-")) + args.extend(self._get_nested_toml_args(td.cmd_args.aiperf, "--aiperf-")) return args diff --git a/tests/ref_data/ai-dynamo.sbatch b/tests/ref_data/ai-dynamo.sbatch index f9b5150c5..492e3c427 100644 --- a/tests/ref_data/ai-dynamo.sbatch +++ b/tests/ref_data/ai-dynamo.sbatch @@ -85,4 +85,8 @@ srun \ --genai_perf-random-seed "42" \ --genai_perf-request-count "100" \ --genai_perf-synthetic-input-tokens-mean "550" \ - --genai_perf-warmup-request-count "10" \ No newline at end of file + --genai_perf-warmup-request-count "10" \ + --aiperf-name "aiperf" \ + --aiperf-cmd "aiperf profile" \ + --aiperf-script "/cloudai_install/aiperf.sh" \ + --aiperf-report-name "aiperf_report.csv" \ No newline at end of file diff --git a/tests/workloads/ai_dynamo/test_report_gen_strategy.py b/tests/workloads/ai_dynamo/test_report_gen_strategy.py index 2674f4a77..0e51c414f 100644 --- a/tests/workloads/ai_dynamo/test_report_gen_strategy.py +++ b/tests/workloads/ai_dynamo/test_report_gen_strategy.py @@ -25,6 +25,7 @@ AIDynamoArgs, AIDynamoCmdArgs, AIDynamoTestDefinition, + AIPerf, GenAIPerf, LMCache, LMCacheArgs, @@ -47,6 +48,20 @@ def get_csv_content() -> str: ) +def get_aiperf_csv_content() -> str: + return ( + "Metric,avg,min,max\n" + "Inter Token Latency (ms),2.83,2.78,2.91\n" + "Time to First Token (ms),49.87,17.15,99.91\n" + "Output Sequence Length (tokens),498.06,410.00,501.00\n" + "\n" + "Metric,Value\n" + "Output Token Throughput (tokens/sec),595.68\n" + "Total Token Throughput (tokens/sec),954.47\n" + "Request Count,50.00\n" + ) + + @pytest.fixture def ai_dynamo_tr(tmp_path: Path) -> TestRun: test = AIDynamoTestDefinition( @@ -70,6 +85,7 @@ def ai_dynamo_tr(tmp_path: Path) -> TestRun: csv_content = get_csv_content() (tr.output_path / "genai_perf_report.csv").write_text(csv_content) + (tr.output_path / "aiperf_report.csv").write_text(get_aiperf_csv_content()) (tr.output_path / "profile_genai_perf.csv").write_text(csv_content) (tr.output_path / "profile_genai_perf.json").write_text("mock json content") (tr.output_path / test.success_marker).touch() @@ -77,6 +93,32 @@ def ai_dynamo_tr(tmp_path: Path) -> TestRun: return tr +@pytest.fixture +def ai_dynamo_aiperf_tr(tmp_path: Path) -> TestRun: + test = AIDynamoTestDefinition( + name="ai_dynamo_aiperf", + description="desc", + test_template_name="t", + cmd_args=AIDynamoCmdArgs( + docker_image_url="http://url", + workloads="aiperf.sh", + dynamo=AIDynamoArgs( + prefill_worker=WorkerConfig( + cmd="python3 -m dynamo.vllm --is-prefill-worker", + worker_initialized_regex="VllmWorker.*has.been.initialized", + args=WorkerBaseArgs(), + ), + ), + aiperf=AIPerf(), + lmcache=LMCache(args=LMCacheArgs()), + ), + ) + tr = TestRun(name="ai_dynamo_aiperf", test=test, num_nodes=1, nodes=[], output_path=tmp_path) + (tr.output_path / "aiperf_report.csv").write_text(get_aiperf_csv_content()) + (tr.output_path / test.success_marker).touch() + return tr + + @pytest.fixture def csv_content() -> str: return get_csv_content() @@ -89,38 +131,43 @@ def test_ai_dynamo_can_handle_directory(slurm_system: SlurmSystem, ai_dynamo_tr: def test_ai_dynamo_generate_report(slurm_system: SlurmSystem, ai_dynamo_tr: TestRun, csv_content: str) -> None: strategy = AIDynamoReportGenerationStrategy(slurm_system, ai_dynamo_tr) - # The new implementation does not generate a report file strategy.generate_report() - # Just verify the method runs without error assert True -def test_ai_dynamo_get_metric_single_values(slurm_system: SlurmSystem, ai_dynamo_tr: TestRun) -> None: +def test_ai_dynamo_get_metric_genai_perf(slurm_system: SlurmSystem, ai_dynamo_tr: TestRun) -> None: strategy = AIDynamoReportGenerationStrategy(slurm_system, ai_dynamo_tr) - # Test that metrics from the first CSV section work + # Default fixture uses workloads="genai_perf.sh" — bare names resolve to genai_perf_report.csv. + assert strategy.get_metric("Inter Token Latency (ms)") == 12.34 assert strategy.get_metric("Output Sequence Length (tokens)") == 101.01 - assert strategy.get_metric("Input Sequence Length (tokens)") == 123.45 + # Explicit prefix also works. + assert strategy.get_metric("genai_perf:Time To First Token (ms):avg") == 111.12 + assert strategy.get_metric("genai_perf:Inter Token Latency (ms):p50") == 89.01 -def test_ai_dynamo_get_metric_statistical_values(slurm_system: SlurmSystem, ai_dynamo_tr: TestRun) -> None: - strategy = AIDynamoReportGenerationStrategy(slurm_system, ai_dynamo_tr) - # Use exact metric names from CSV (with avg column, which is default) - assert strategy.get_metric("Time To First Token (ms)") == 111.12 - assert strategy.get_metric("Time To Second Token (ms)") == 11.13 - assert strategy.get_metric("Request Latency (ms)") == 1111.14 - assert strategy.get_metric("Inter Token Latency (ms)") == 12.34 +def test_ai_dynamo_get_metric_aiperf(slurm_system: SlurmSystem, ai_dynamo_aiperf_tr: TestRun) -> None: + strategy = AIDynamoReportGenerationStrategy(slurm_system, ai_dynamo_aiperf_tr) + + # aiperf fixture uses workloads="aiperf.sh" — bare names resolve to aiperf_report.csv. + assert strategy.get_metric("Inter Token Latency (ms)") == 2.83 + assert strategy.get_metric("Output Token Throughput (tokens/sec)") == 595.68 + + # Explicit prefix. + assert strategy.get_metric("aiperf:Inter Token Latency (ms):avg") == 2.83 + assert strategy.get_metric("aiperf:Time to First Token (ms):avg") == 49.87 + assert strategy.get_metric("aiperf:Output Token Throughput (tokens/sec):avg") == 595.68 + assert strategy.get_metric("aiperf:Total Token Throughput (tokens/sec):avg") == 954.47 def test_ai_dynamo_get_metric_invalid(slurm_system: SlurmSystem, ai_dynamo_tr: TestRun) -> None: strategy = AIDynamoReportGenerationStrategy(slurm_system, ai_dynamo_tr) - assert strategy.get_metric("invalid-metric") == METRIC_ERROR + assert strategy.get_metric("nonexistent-metric") == METRIC_ERROR - # Empty the CSV file to test error handling (ai_dynamo_tr.output_path / "genai_perf_report.csv").write_text("") - assert strategy.get_metric("invalid-metric") == METRIC_ERROR + assert strategy.get_metric("Inter Token Latency (ms)") == METRIC_ERROR def test_was_run_successful(ai_dynamo_tr: TestRun) -> None: