From e6ee2f1479fd5279afa5bc23a28416358280e39c Mon Sep 17 00:00:00 2001 From: blik616287 Date: Thu, 11 Jun 2026 23:18:44 +0000 Subject: [PATCH 1/3] Add nvidia-vss-vllm 1.0.0 (upstream VSS 2.4.1) Validated on NVIDIA GB10 / DGX Spark. Pack 1.0.0 / appVersion 2.4.1. Co-Authored-By: Claude Opus 4.8 --- packs/nvidia-vss-vllm-1.0.0/README.md | 21 +++ packs/nvidia-vss-vllm-1.0.0/logo.png | Bin 0 -> 7892 bytes .../manifests/llm-vllm.yaml | 129 ++++++++++++++++++ packs/nvidia-vss-vllm-1.0.0/pack.json | 31 +++++ packs/nvidia-vss-vllm-1.0.0/presets.yaml | 124 +++++++++++++++++ packs/nvidia-vss-vllm-1.0.0/schema.yaml | 16 +++ packs/nvidia-vss-vllm-1.0.0/values.yaml | 68 +++++++++ 7 files changed, 389 insertions(+) create mode 100644 packs/nvidia-vss-vllm-1.0.0/README.md create mode 100644 packs/nvidia-vss-vllm-1.0.0/logo.png create mode 100644 packs/nvidia-vss-vllm-1.0.0/manifests/llm-vllm.yaml create mode 100644 packs/nvidia-vss-vllm-1.0.0/pack.json create mode 100644 packs/nvidia-vss-vllm-1.0.0/presets.yaml create mode 100644 packs/nvidia-vss-vllm-1.0.0/schema.yaml create mode 100644 packs/nvidia-vss-vllm-1.0.0/values.yaml diff --git a/packs/nvidia-vss-vllm-1.0.0/README.md b/packs/nvidia-vss-vllm-1.0.0/README.md new file mode 100644 index 00000000..7235671b --- /dev/null +++ b/packs/nvidia-vss-vllm-1.0.0/README.md @@ -0,0 +1,21 @@ +# NVIDIA VSS LLM (vLLM) + +The bounded raw vLLM LLM backend for NVIDIA VSS on platforms where the TensorRT-LLM NIM is unsupported (DGX Spark / GB10 sm_121). Serves the OpenAI API as `llm-nim-svc:8000` for the CA-RAG pipeline. Part of the upstream **VSS 2.4.1** blueprint. + +## Prerequisites + +- An NGC API key (the vLLM image is `nvcr.io/nvidia/vllm`). +- A GPU node (validated on NVIDIA GB10 / DGX Spark). + +## Parameters + +| **Parameter** | **Description** | **Type** | **Default** | **Required** | +|---|---|---|---|---| +| `spectro.var.VSS_PLATFORM` | Hardware platform preset | String | `DGX-SPARK` | No | + +## Usage + +Add to vLLM-backed VSS profiles at install-priority 12 (after `nvidia-vss-core-nims`). Omit on trtllm profiles (H100/L40S) which use the NIM-LLM subchart instead. + +--- +**Upstream:** NVIDIA VSS Blueprint 2.4.1 (`vllm:25.12.post1-py3`). **Pack version:** 1.0.x. diff --git a/packs/nvidia-vss-vllm-1.0.0/logo.png b/packs/nvidia-vss-vllm-1.0.0/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..a98c78690812d65f46914a8f9319c7ed2a32fefe GIT binary patch literal 7892 zcmc(EWmFv97G*a90u2PW0HK2f_XKIQgS!Ws;O^2i(l`wPfR0951EvjB6$${<#NpkTVcj2tEflm=0RW$806}><;l3XZRNba&P~2 z^Duz_ML{@-Gw7>ofTdmFR$xJHUT$6n2@n_z7K2+_L$qY%{uzFMB+g)qK)6D9c)Yy4 zxV;3pUEnr6d?F$uJiPoo{QO+^3@&%1Gs4W9%h{dr-y;80N5;zC0&eGuuyb(+|J5}! zckw`oGcf!O^q=t@Hsh8 z=lC&o&M#sWVnI@pQqRvkYB6=!8d@m%)*k97ex9X?f`47mvF~HjZSI~ie%t$NL9vKl zszAH^qFRae)o&|y!n|N((~io9maC^WNB&gLryl@gu(1WQaz{I)PX@O}W2&za=XTF| zmEMwKVgjL3L`45zfyy59Ykt*ygwF4GE@QPvV|&Eqzkj@+&e0Oh{a(pU@O23U%BqkG ziy*@6qyTEh*Ekt8*RT^2VU=UZCh#@lM4+ObMtfs)gM(NV$XO$!Yh(mgq zS>9)?yvGvNp^ctND&_VL-~O7RMLDZ*JwWxAxFLuRj;D1{PWGulKwX68oWYL1H>o^i0m(_ZchN5wUlayW0~h;u$=3 z)lFk#ch9}k+JjPUQM?JbX#jm(FhY?GJlzyfo9kf&gGid7samc&)f&`>8JAJ#IuHDq zv|(jMD{4f+Z}o_#NTCi&Mjep1O2k~*Y9GZ53L0wjoE<6$KD?AYRR3)JPCuX?p4Lut zhko5KbT&3Vg}sT3(LIG($@1o=%a>iHGnwG-%%K6TTCR&e@9PyV_bc?9^Ist=>WZak zoYluxEf=TSjw5{i@o7%Q?Py>rw*|IRAbq>`wSMKES<_DbMmc?65~0O)p=+Sgv}VJo z`gmkUXS5JgU3Q;p1nN^BqE)PzEF}9&apN$xmx-9}EKRD(Lg$)OvbPy~XuMogL;}8~ z!KZ@@ny;;kqV87r$JWXlKLd>y-`o{>%RtpYNag15cdvd)wAFUO_;_EJNqg*EYBwww zkXa9lvqw;no6AjQK_aY^HRRWgns*_mDwSrJo-g9gi$>&O(2u7Q0l(kvk8iuHi@Rz) zVwY2{l)O{?6>yegz)?XPFOB?~_Zv&C$7-YlUwaS zal!rX{)Ov=wwz70DYWAKR0EqS7Fn{5s;oWRI?6`ydED!j)a`|?#R_rt@-JYCbV@xS zhDAQuuiNX2yul~$W83PIsmZQN(-`^WKq?E#(acoGOxvCRK^VF#u{7-Bag_wBsKA+` zu7|NdY$vzn_J{(W@sVdiv)*Rm26IsTqv31S)r3#xIHn(|oz92Pw|}Lle2)5Z;J)9k ztA>Wn$mBCYVu`C=y(v#m#PHJatMk$AUDg5;LCtt2Qv~Hv=H06e z6Y-b^&sirX?8WuE@8u4%_y@MrXZ9yj&dk*zK;s07yaq}h zjc2kt^g&Y|e$$;83>GraH$oj7rV`;89pfzWVK5eh4YhtQcn57?cQwUt6>j+v5H3yd zi$sS)*~2s)(J|y!xTh@`nPFkyerYOB1e~{g@qemh9x|FENCHC5g9&GIgz%+ zhdHEHZ1ygBwFz*)KRGk~?}e?tdp`4Vu8r^NL$I8 z)6$tR>#)YF(|?td8p^~FaQuBL;sQ1=|6rU{vB%VmIr!zE1)F@EKiD~m+|o8x|5oRb z&Fip1Rnx8|)-J!du@a&k7K~W<_H|8vwsObb(VMvOSBZxC2CuT|_wJdBG&`rT{6S!b zy@Ce#F!R5hg@8fd%MpTaF-R0SvJHJgcWH5 zOO|zsOyihIV^+f~1ja75vN(;i0#{niIXH~Eno7q5I(qYEIkKjE=TnOPLrHs_yQEDS z&6+p;R`ilq%h5S7V*ZZ$eadGM;3A8VC;)6e%7R%c|ewJl2xs z{WUCM9uQ4eKwB>Lib`Y-grboxU1(>=qq~#pr)Zw~Gq+ z=}CL=G;tE>Z5~*$twju`!nS$c6btjgwkeWBKVCWW;B65@-oFruqJ6UN{4`hcxMF^D zFma!1zl{MtkMAnsr|644DDpe+QPo*=l{2Z_otPl*dd*zl{N8AK=PiUE)ao>oqD3z< zMd(;J2Sdks?+Lo#QDar<$NYhwJaSw!zw+zrSRXpW|Lst2I$qGkb=e{BfAZw~8$Pue zRK%i7f*SPYrybj>s$X{F%SI& zHs%(W1lB0ex>zENj7eh`Q0-Bc8YM9pXZUd%63ZkYf%XjL+ml+nNPbbfvz|>h9D^}8 zbli-DOS$aqdUbV~1Fy!pV>Z@Cc1n$<@6cd6W@B6I4zBqOjdxyi2!wJzwuCRBr=vu1 zC7&EiV7uL~!;OhNl%Okto{oLLTo~3o_SsC7b@S#omNf~Y_hF+I4Ibz61$yo=i?zy{GhZf)a7S5;bQ zD}Ge=WOI22gUQ+&iFWWO6M60o5)Asho(?PZ9V=pI%lxkld)-6q4u7aA{`6 zvVz0Bu5>wz4f{5$6i836^Cn@1uPs?*-gGDieZhg*)M^H-d7{ZfRdvyH4j*#&P2ai} zkG4D>Px^F7mkset>GnW9D_wv< zBCW}Jws>TXtVNCW>DciU70V@4^~lU5b?RPPH!0iXK@(_*gxt^p4$gxu6bUXnJgE&)1ON73BOx;Z6egm%DE%l zE6_c~hC@>nkr>o+ptp#Wz|-C**^LyrUl_7RyaG+JK&NB7d^>7uH*ygX=%Sm|pIz>y z2eQgmfs2yN4thzf3Y4^M#HWd;5pgPlaPASsakt`8=X|FpLo&0oCb}!S8qdAHvwnjb^o$4=r*hqM=idCx$tzJJaK;$9C> z)+>7#Sw5^&BxO*VW?}Q*knd0ezP7sVP`7`-3FoN>3{Zrp!tD;0jt^H9WPXA=Ee8ZnkmwjXsl=~ zSIA_V=DY4f=mFWCKB9*z(xsw?%_q|x3X3O?#*qlrc}-}a&!AY5*un8*bCF{zj*Nph zr~>Nm0xxqavYJC*BB8qQzHVYHwvZVj^l}w}TH%pUhcAEcFpxviUJE(dxN8uk>Gb3hHp0MyuVXwzT?+CE} zAYmR-U%G!fVCJSyU{E5^!*8?tr=^(8DjwARekB=H+TTJ?nm-g;cpJp({By!=*CZ&{ zivLkp<&q3rMC*_2{sCriX{#yI`5~haWyzy4&V{Mju7pr*nKxvVgEGUJK_7_=N=L#+ z^+9wpNc(#7_zeuQU_1H9A758D<)#%VvC8x0&rWEIP)FfL?SCI22{%*_IR4H3J0t}^ z2@ehcs7bVijS9lLM?B#~ru(-{1$rV(&U@7Vk-!9%B9pla0-Hjma*AI6a8qp^-DkP5 zm2OK89J%;PxxHL8UpH3`2IJsh+^hbd!4EzRmeXPEyHB8EKeKYfL`5PhAZA(8O|Lfj z$piFm(1aAQdtIL!>9qD|`WcwfNkabFx1-V1kmr3OH|N^DJ$)8mN?GDd-DZLUf$f6? z&5)G|OsnLd*LYG^h6&e7ZN)0-(z)S5Ho@}_H;moJ_27>w+JZE*56RseOdH3qjbG{; zmn!VwAd!x1HpTPMmK&M*5!4fg%sft!c1{4f2NM2nEt#v-w&n9h*WR0Q*xdZHi0uA{ z%=6Ct49xw+6XP`ri%qEyq`WOQn+^x z)2gRGa)U&8x+G#dm!^rjUB;dwc-C5G_l});Ai9EaX$B`aov&k2$}o*YQblw3!+So0 z3gh-k!DH+3kjwR2l51kg0np2@6jDI(n$_;4Qm|v_=K<5^T8rB{=4>k`*hn~=B8MVx zo>GlbgnI@aadU1;knqH@y_|tbLHKF*$q>Bh8#7nBjFcqSi)hTNX#k9%U5s}Aah@0@ zns;}-NH2LVDDO0scfo|NomdF4*8Kd#(NU73jmY;#OGoe?pY4kf7AdiPCrL(sbRjO0 z{3Pc}#>B&ETABddvHRkN50m7j8Z0hpz}Y#T{zd6rlb5vA=K@FEsoklm8=i77$V|8m zX_9BPHEqoiwB(q`7NsIFIw%J09{+_J?svIji*gcmS$=x=*cR-pldA74WkZU#*qJ0e6&KS6X@l{9_Ae{W;r+}EAobcrDYP` zj1Q#Maqh4?*|$F6`-~0R+-jonzhQ9ImB^%~5qCvG=2jzf9dE278SOUhZ6YL06qwDQXD0FK2n2d>_ z9h>18q7btxWQ3BOC87kIsE({4AY6Bs#j?Q8b}bIFqi+Q(qBXE++g4^irdh zq+3JejPr5|VCN}Ea2U*N67f9KPC<)kq!BR0#{cRH`1 zYl*dtb81C#s$%zahNBY253&2*; zsevASc?b>k%LkZjsrJ@=(kHh%9t7a-K_Y9Kvb3{Y>PxC$XPY~m{9B+}!XO%O=4%C} z0p?a~UYR^IIsEdwmufAoStkrV$hwDp_WGFoF?&gC7))s)`V=PR7m9jBu^Py*yi3Ep zM*)i{L|dYuisBytM_Vn0KriE;M(UR~vm-f49LW}J%-<5r?&+vc=^1Ez? zN-$iK>uIdTj3PUZXc+yZtO?b#ai}b0ko)32+@E?}fL$IxMT&-ctGGZG3i;@Up2Mg# z&@lX1q&r>C*oHG%Po#fM$_=7;&7Z zs(}OdEC6?DB=4bhs`f1S7qGL0r1;*v95pjU7cvWZ=$F>M|K;A?xaWb=T;Qm<*J*Spp4Y41kMUaq9}c3`33fum(603y=W_mF;TQ`7I;ViV>7<4R4v>7 zb=li`QP%E3whx|HCtKs`>FF%Eus9;){+{>oJb*EMPk!kK z;7umW_>jrm!26|PobTWGX!3P16QwDIGEa$OI8n{*5Y?$`rkVAd6t@;F{pygdcWbF7 zqzo^_!(V@PG#9yQzxgR6*HfDAFd(b(?X;EX*^V-e08lcC0`(;14=@Swgh3*jk>(Q| z`-Jy_QOqjZ91J#j_54TKP@mdhY!lA?y!R#VL5fkS(XWslHu#cwED>uU%JV(oT4$Bg|QU_eexU(&JB@ie#b#bZ96WJUw) zWqa<2tLUC@5hxXnYl|}+*Dn$geeZePibs{Xu0o~;@=a*(Km1b7 zT1UZXf>B#7GWT?3<40@~?;G_3%wPnjBM23HjaeU}*2%0?^Tt>zy1&lnGTEl z#3{hjXQE6SRHRyf6c(MLO|}zQcL~-^feF?#Iq~9rly+}dM>uY@rl+E_pj24@*MGa= bWB#FUokt+KSayW}{;O4#Rg Date: Fri, 12 Jun 2026 00:52:34 +0000 Subject: [PATCH 2/3] nvidia-vss-vllm: bump vLLM 26.05.post1 + nginx 1.30.2-alpine (CVE), verified on GB10 --- packs/nvidia-vss-vllm-1.0.0/README.md | 2 ++ packs/nvidia-vss-vllm-1.0.0/presets.yaml | 6 +++--- packs/nvidia-vss-vllm-1.0.0/values.yaml | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/packs/nvidia-vss-vllm-1.0.0/README.md b/packs/nvidia-vss-vllm-1.0.0/README.md index 7235671b..a91fed2f 100644 --- a/packs/nvidia-vss-vllm-1.0.0/README.md +++ b/packs/nvidia-vss-vllm-1.0.0/README.md @@ -19,3 +19,5 @@ Add to vLLM-backed VSS profiles at install-priority 12 (after `nvidia-vss-core-n --- **Upstream:** NVIDIA VSS Blueprint 2.4.1 (`vllm:25.12.post1-py3`). **Pack version:** 1.0.x. + +> **CVE bumps (public images):** vLLM `nvcr.io/nvidia/vllm` `25.12.post1-py3` → `26.05.post1-py3` and the nginx health-proxy sidecar `1.27-alpine` → `1.30.2-alpine`. Both verified on the DGX Spark (GB10): vLLM loads `meta/llama-3.1-8b-instruct` and serves the OpenAI API, nginx `/v1/health/live` → 200, vss-engine ready. The Jetson-Thor `ghcr.io/nvidia-ai-iot/vllm` tag is left floating (Tegra-only, verify before use). diff --git a/packs/nvidia-vss-vllm-1.0.0/presets.yaml b/packs/nvidia-vss-vllm-1.0.0/presets.yaml index bdebe72e..3a58860c 100644 --- a/packs/nvidia-vss-vllm-1.0.0/presets.yaml +++ b/packs/nvidia-vss-vllm-1.0.0/presets.yaml @@ -23,7 +23,7 @@ presets: llm-vllm: replicas: 1 gpuCount: 1 - image: "nvcr.io/nvidia/vllm:25.12.post1-py3" + image: "nvcr.io/nvidia/vllm:26.05.post1-py3" hfModel: "NousResearch/Meta-Llama-3.1-8B-Instruct" servedModelName: "meta/llama-3.1-8b-instruct" tensorParallel: "1" @@ -42,7 +42,7 @@ presets: llm-vllm: replicas: 1 gpuCount: 1 - image: "nvcr.io/nvidia/vllm:25.12.post1-py3" + image: "nvcr.io/nvidia/vllm:26.05.post1-py3" hfModel: "NousResearch/Meta-Llama-3.1-8B-Instruct" servedModelName: "meta/llama-3.1-8b-instruct" tensorParallel: "1" @@ -62,7 +62,7 @@ presets: llm-vllm: replicas: 1 gpuCount: 1 - image: "nvcr.io/nvidia/vllm:25.12.post1-py3" + image: "nvcr.io/nvidia/vllm:26.05.post1-py3" hfModel: "nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8" servedModelName: "nvidia/nemotron-nano-9b-v2-fp8" tensorParallel: "1" diff --git a/packs/nvidia-vss-vllm-1.0.0/values.yaml b/packs/nvidia-vss-vllm-1.0.0/values.yaml index 080382b5..63f8796b 100644 --- a/packs/nvidia-vss-vllm-1.0.0/values.yaml +++ b/packs/nvidia-vss-vllm-1.0.0/values.yaml @@ -39,8 +39,8 @@ pack: spectrocloud.com/install-priority: "12" content: images: - - image: nvcr.io/nvidia/vllm:25.12.post1-py3 - - image: nginx:1.27-alpine + - image: nvcr.io/nvidia/vllm:26.05.post1-py3 + - image: nginx:1.30.2-alpine # Jetson Thor (AGX-THOR/IGX-THOR presets) — floating tag, verify before use - image: ghcr.io/nvidia-ai-iot/vllm:latest-jetson-thor @@ -50,8 +50,8 @@ manifests: # DGX-SPARK (GB10) default — the empirically-validated config. replicas: 1 gpuCount: 1 - image: "nvcr.io/nvidia/vllm:25.12.post1-py3" - nginxImage: "nginx:1.27-alpine" + image: "nvcr.io/nvidia/vllm:26.05.post1-py3" + nginxImage: "nginx:1.30.2-alpine" hfModel: "NousResearch/Meta-Llama-3.1-8B-Instruct" servedModelName: "meta/llama-3.1-8b-instruct" tensorParallel: "1" From 8eea83446f4f19650978470799f7a521a9fe8c21 Mon Sep 17 00:00:00 2001 From: blik616287 Date: Fri, 12 Jun 2026 01:46:57 +0000 Subject: [PATCH 3/3] nvidia-vss-vllm: keep nginx 1.30.2-alpine CVE bump; revert vLLM to clean 25.12.post1 (26.05 ships upstream JWT, no CVE gain) --- packs/nvidia-vss-vllm-1.0.0/README.md | 2 +- packs/nvidia-vss-vllm-1.0.0/presets.yaml | 6 +++--- packs/nvidia-vss-vllm-1.0.0/values.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packs/nvidia-vss-vllm-1.0.0/README.md b/packs/nvidia-vss-vllm-1.0.0/README.md index a91fed2f..0f1865ea 100644 --- a/packs/nvidia-vss-vllm-1.0.0/README.md +++ b/packs/nvidia-vss-vllm-1.0.0/README.md @@ -20,4 +20,4 @@ Add to vLLM-backed VSS profiles at install-priority 12 (after `nvidia-vss-core-n --- **Upstream:** NVIDIA VSS Blueprint 2.4.1 (`vllm:25.12.post1-py3`). **Pack version:** 1.0.x. -> **CVE bumps (public images):** vLLM `nvcr.io/nvidia/vllm` `25.12.post1-py3` → `26.05.post1-py3` and the nginx health-proxy sidecar `1.27-alpine` → `1.30.2-alpine`. Both verified on the DGX Spark (GB10): vLLM loads `meta/llama-3.1-8b-instruct` and serves the OpenAI API, nginx `/v1/health/live` → 200, vss-engine ready. The Jetson-Thor `ghcr.io/nvidia-ai-iot/vllm` tag is left floating (Tegra-only, verify before use). +> **CVE bump:** nginx health-proxy sidecar `1.27-alpine` → `1.30.2-alpine` (verified on GB10, `/v1/health/live` → 200; pack-central pax-cve confirms **0 Critical**, down from `1.27`'s 3 / 137 total). The vLLM image stays at `nvcr.io/nvidia/vllm:25.12.post1-py3` — a bump to `26.05.post1-py3` was tested on the GB10 (serves fine) but **reverted**: that image ships a JWT in its pip HTTP-cache (`/root/.cache/pip/...`, an upstream NVIDIA build-hygiene leak flagged by the secret scan) and gives **no CVE benefit** (the runtime image scans clean at both versions). The Jetson-Thor `ghcr.io/nvidia-ai-iot/vllm` tag is left floating (Tegra-only, verify before use). diff --git a/packs/nvidia-vss-vllm-1.0.0/presets.yaml b/packs/nvidia-vss-vllm-1.0.0/presets.yaml index 3a58860c..bdebe72e 100644 --- a/packs/nvidia-vss-vllm-1.0.0/presets.yaml +++ b/packs/nvidia-vss-vllm-1.0.0/presets.yaml @@ -23,7 +23,7 @@ presets: llm-vllm: replicas: 1 gpuCount: 1 - image: "nvcr.io/nvidia/vllm:26.05.post1-py3" + image: "nvcr.io/nvidia/vllm:25.12.post1-py3" hfModel: "NousResearch/Meta-Llama-3.1-8B-Instruct" servedModelName: "meta/llama-3.1-8b-instruct" tensorParallel: "1" @@ -42,7 +42,7 @@ presets: llm-vllm: replicas: 1 gpuCount: 1 - image: "nvcr.io/nvidia/vllm:26.05.post1-py3" + image: "nvcr.io/nvidia/vllm:25.12.post1-py3" hfModel: "NousResearch/Meta-Llama-3.1-8B-Instruct" servedModelName: "meta/llama-3.1-8b-instruct" tensorParallel: "1" @@ -62,7 +62,7 @@ presets: llm-vllm: replicas: 1 gpuCount: 1 - image: "nvcr.io/nvidia/vllm:26.05.post1-py3" + image: "nvcr.io/nvidia/vllm:25.12.post1-py3" hfModel: "nvidia/NVIDIA-Nemotron-Nano-9B-v2-FP8" servedModelName: "nvidia/nemotron-nano-9b-v2-fp8" tensorParallel: "1" diff --git a/packs/nvidia-vss-vllm-1.0.0/values.yaml b/packs/nvidia-vss-vllm-1.0.0/values.yaml index 63f8796b..a762f704 100644 --- a/packs/nvidia-vss-vllm-1.0.0/values.yaml +++ b/packs/nvidia-vss-vllm-1.0.0/values.yaml @@ -39,7 +39,7 @@ pack: spectrocloud.com/install-priority: "12" content: images: - - image: nvcr.io/nvidia/vllm:26.05.post1-py3 + - image: nvcr.io/nvidia/vllm:25.12.post1-py3 - image: nginx:1.30.2-alpine # Jetson Thor (AGX-THOR/IGX-THOR presets) — floating tag, verify before use - image: ghcr.io/nvidia-ai-iot/vllm:latest-jetson-thor @@ -50,7 +50,7 @@ manifests: # DGX-SPARK (GB10) default — the empirically-validated config. replicas: 1 gpuCount: 1 - image: "nvcr.io/nvidia/vllm:26.05.post1-py3" + image: "nvcr.io/nvidia/vllm:25.12.post1-py3" nginxImage: "nginx:1.30.2-alpine" hfModel: "NousResearch/Meta-Llama-3.1-8B-Instruct" servedModelName: "meta/llama-3.1-8b-instruct"