From ae8fa23b7f032b5051ef6f239aa0538b8923a736 Mon Sep 17 00:00:00 2001 From: Omobayode Fagbohungbe Date: Thu, 7 May 2026 12:25:41 -0500 Subject: [PATCH] fix:rename VLLM_SPYRE ENV to SENDNN_INFERENCE Signed-off-by: Omobayode Fagbohungbe --- .staging/pre-1.0/infer/vllm/driver | 6 +++--- .staging/spyre/infer/vllm/driver | 6 +++--- infer/vllm/README.md | 16 ++++++++-------- infer/vllm/driver | 6 +++--- infer/vllm/process | 8 ++++---- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.staging/pre-1.0/infer/vllm/driver b/.staging/pre-1.0/infer/vllm/driver index ac6711d..c167a22 100755 --- a/.staging/pre-1.0/infer/vllm/driver +++ b/.staging/pre-1.0/infer/vllm/driver @@ -23,9 +23,9 @@ def main(): t1 = fmwork.time_get() print(); print('FMWORK DATASET', '%.6f' % (fmwork.time_diff(t1, t0))) - os.environ["VLLM_SPYRE_WARMUP_PROMPT_LENS"] = par.input_size - os.environ["VLLM_SPYRE_WARMUP_NEW_TOKENS"] = par.output_size - os.environ['VLLM_SPYRE_WARMUP_BATCH_SIZES'] = par.batch_size + os.environ["SENDNN_INFERENCE_WARMUP_PROMPT_LENS"] = par.input_size + os.environ["SENDNN_INFERENCE_WARMUP_NEW_TOKENS"] = par.output_size + os.environ['SENDNN_INFERENCE_WARMUP_BATCH_SIZES'] = par.batch_size llm() runs() diff --git a/.staging/spyre/infer/vllm/driver b/.staging/spyre/infer/vllm/driver index 4a3a584..8655f58 100755 --- a/.staging/spyre/infer/vllm/driver +++ b/.staging/spyre/infer/vllm/driver @@ -16,9 +16,9 @@ def main(): params() # spyre environment variables - os.environ["VLLM_SPYRE_WARMUP_PROMPT_LENS"] = par.input_size - os.environ["VLLM_SPYRE_WARMUP_NEW_TOKENS"] = str(max(var.output_sizes)) - os.environ['VLLM_SPYRE_WARMUP_BATCH_SIZES'] = par.batch_size + os.environ["SENDNN_INFERENCE_WARMUP_PROMPT_LENS"] = par.input_size + os.environ["SENDNN_INFERENCE_WARMUP_NEW_TOKENS"] = str(max(var.output_sizes)) + os.environ['SENDNN_INFERENCE_WARMUP_BATCH_SIZES'] = par.batch_size if par.dataset_path: fmwork.banner('DATASET') diff --git a/infer/vllm/README.md b/infer/vllm/README.md index 25cd27f..54418d0 100644 --- a/infer/vllm/README.md +++ b/infer/vllm/README.md @@ -71,9 +71,9 @@ client \ --env FLEX_RDMA_MODE_FULL=FALSE \ --env FLEX_HDMA_MODE_FULL=1 \ --env OMP_NUM_THREADS=32 \ - --env VLLM_SPYRE_WARMUP_PROMPT_LENS=1024 \ - --env VLLM_SPYRE_WARMUP_NEW_TOKENS=128 \ - --env VLLM_SPYRE_WARMUP_BATCH_SIZES=1 \ + --env SENDNN_INFERENCE_WARMUP_PROMPT_LENS=1024 \ + --env SENDNN_INFERENCE_WARMUP_NEW_TOKENS=128 \ + --env SENDNN_INFERENCE_WARMUP_BATCH_SIZES=1 \ -- \ driver \ --platform spyre \ @@ -108,7 +108,7 @@ driver \ --env FLEX_RDMA_MODE_FULL=FALSE \ --env FLEX_HDMA_MODE_FULL=1 \ --env OMP_NUM_THREADS=32 \ - --env VLLM_SPYRE_USE_CB=1 \ + --env SENDNN_INFERENCE_USE_CB=1 \ -- \ driver \ --platform spyre \ @@ -146,9 +146,9 @@ server \ --env FLEX_RDMA_MODE_FULL=FALSE \ --env FLEX_HDMA_MODE_FULL=1 \ --env OMP_NUM_THREADS=32 \ - --env VLLM_SPYRE_WARMUP_PROMPT_LENS=1024 \ - --env VLLM_SPYRE_WARMUP_NEW_TOKENS=128 \ - --env VLLM_SPYRE_WARMUP_BATCH_SIZES=1 \ + --env SENDNN_INFERENCE_WARMUP_PROMPT_LENS=1024 \ + --env SENDNN_INFERENCE_WARMUP_NEW_TOKENS=128 \ + --env SENDNN_INFERENCE_WARMUP_BATCH_SIZES=1 \ --no-enable-prefix-caching \ --max-model-len 2048 \ --max-num-seqs 1 \ @@ -182,7 +182,7 @@ server \ --env FLEX_RDMA_MODE_FULL=FALSE \ --env FLEX_HDMA_MODE_FULL=1 \ --env OMP_NUM_THREADS=32 \ - --env VLLM_SPYRE_USE_CB=1 \ + --env SENDNN_INFERENCE_USE_CB=1 \ --no-enable-prefix-caching \ --max-model-len 2048 \ --max-num-seqs 1 \ diff --git a/infer/vllm/driver b/infer/vllm/driver index c1e49c8..0da74dd 100755 --- a/infer/vllm/driver +++ b/infer/vllm/driver @@ -77,9 +77,9 @@ def setup_runtime_spyre(args): print('setup_runtime: spyre') - os.environ["VLLM_SPYRE_WARMUP_PROMPT_LENS"] = str(max(args.input_sizes)) - os.environ["VLLM_SPYRE_WARMUP_NEW_TOKENS"] = str(max(args.output_sizes)) - os.environ['VLLM_SPYRE_WARMUP_BATCH_SIZES'] = str(max(args.batch_sizes)) + os.environ["SENDNN_INFERENCE_WARMUP_PROMPT_LENS"] = str(max(args.input_sizes)) + os.environ["SENDNN_INFERENCE_WARMUP_NEW_TOKENS"] = str(max(args.output_sizes)) + os.environ['SENDNN_INFERENCE_WARMUP_BATCH_SIZES'] = str(max(args.batch_sizes)) # ------------ # setup engine diff --git a/infer/vllm/process b/infer/vllm/process index 331f492..422e1ce 100755 --- a/infer/vllm/process +++ b/infer/vllm/process @@ -246,7 +246,7 @@ def process_direct(args): split = line.strip().split(' ') opt = '--env ' + ' '.join(split[2:]) opts.append(opt) - if 'VLLM_SPYRE_USE_CB' in line: + if 'SENDNN_INFERENCE_USE_CB' in line: value = line.strip().split('=')[-1] if value == '1': batch_mode = 'continuous' @@ -460,7 +460,7 @@ def process_server(args): batch_mode = 'static' for line in open(log_server): if line.startswith('FMWORK EXP'): - if 'VLLM_SPYRE_USE_CB' in line: + if 'SENDNN_INFERENCE_USE_CB' in line: value = line.strip().split('=')[-1] if value == '1': batch_mode = 'continuous' @@ -586,8 +586,8 @@ def process_server(args): num_prompts = None # Extract warmup parameters from server.cmd - warmup_prompt_match = re.search(r'VLLM_SPYRE_WARMUP_PROMPT_LENS=(\d+)', server_cmd_content) - warmup_tokens_match = re.search(r'VLLM_SPYRE_WARMUP_NEW_TOKENS=(\d+)', server_cmd_content) + warmup_prompt_match = re.search(r'SENDNN_INFERENCE_WARMUP_PROMPT_LENS=(\d+)', server_cmd_content) + warmup_tokens_match = re.search(r'SENDNN_INFERENCE_WARMUP_NEW_TOKENS=(\d+)', server_cmd_content) warmup_input = int(warmup_prompt_match.group(1)) if warmup_prompt_match else None warmup_output = int(warmup_tokens_match.group(1)) if warmup_tokens_match else None