diff --git a/packs/nvidia-vss-nims-3.1.15/README.md b/packs/nvidia-vss-nims-3.1.15/README.md new file mode 100644 index 00000000..2803e895 --- /dev/null +++ b/packs/nvidia-vss-nims-3.1.15/README.md @@ -0,0 +1,3 @@ +# NVIDIA VSS NIMs (cosmos VLM + embed) + +NVIDIA VSS 3.x NIMs — the cosmos-reason2-8b Vision-Language Model NIM (in BASE) plus the rtvi-embed embedding microservice (SEARCH profile only, disabled by default). Manifest-only pack; references the ngc-pull-secret / ngc-api-secret / hf-token-secret created by the data-infrastructure pack. install-priority 10. Part of the VSS 3.x five-pack set (data-infrastructure + nims + vllm + core + ingress); all five belong in every profile. diff --git a/packs/nvidia-vss-nims-3.1.15/charts/nvidia-vss-nims-3.1.15.tgz b/packs/nvidia-vss-nims-3.1.15/charts/nvidia-vss-nims-3.1.15.tgz new file mode 100644 index 00000000..b8f707e0 Binary files /dev/null and b/packs/nvidia-vss-nims-3.1.15/charts/nvidia-vss-nims-3.1.15.tgz differ diff --git a/packs/nvidia-vss-nims-3.1.15/logo.png b/packs/nvidia-vss-nims-3.1.15/logo.png new file mode 100644 index 00000000..a98c7869 Binary files /dev/null and b/packs/nvidia-vss-nims-3.1.15/logo.png differ diff --git a/packs/nvidia-vss-nims-3.1.15/pack.json b/packs/nvidia-vss-nims-3.1.15/pack.json new file mode 100644 index 00000000..61fb941b --- /dev/null +++ b/packs/nvidia-vss-nims-3.1.15/pack.json @@ -0,0 +1,47 @@ +{ + "addonType": "system app", + "annotations": { + "source": "community", + "contributor": "spectrocloud", + "docsURL": "https://docs.nvidia.com/vss/latest/index.html", + "description": "NVIDIA VSS 3.x NIMs \u2014 the cosmos-reason2-8b Vision-Language Model NIM (in BASE) plus the rtvi-embed embedding microservice (SEARCH profile only, disabled by default). Manifest-only pack; references the ngc-pull-secret / ngc-api-secret / hf-token-secret created by the data-infrastructure pack. install-priority 10. Part of the VSS 3.x five-pack set (data-infrastructure + nims + vllm + core + ingress); all five belong in every profile. rtvi-embed (search profile) supports a self-contained dGPU NVDEC decode backend (PyNvVideoCodec via libnvcuvid, chroot build-on-deploy) for DGX-Spark dGPU-mode nodes where the stock DeepStream/pyds decoder cannot load; cosmos.enabled gates the VLM to free GPU for co-located search." + }, + "cloudTypes": [ + "all" + ], + "displayName": "NVIDIA VSS NIMs (cosmos VLM + embed)", + "layer": "addon", + "name": "nvidia-vss-nims", + "version": "3.1.15", + "constraints": { + "dependencies": [ + { + "packName": "nvidia-vss-data-infrastructure", + "layer": "addon", + "minVersion": "3.1.0", + "type": "required" + }, + { + "packName": "nvidia-vss-vllm", + "layer": "addon", + "minVersion": "3.1.0", + "type": "optional" + }, + { + "packName": "nvidia-vss-core", + "layer": "addon", + "minVersion": "3.1.0", + "type": "optional" + }, + { + "packName": "nvidia-vss-ingress", + "layer": "addon", + "minVersion": "3.1.0", + "type": "optional" + } + ] + }, + "charts": [ + "charts/nvidia-vss-nims-3.1.15.tgz" + ] +} \ No newline at end of file diff --git a/packs/nvidia-vss-nims-3.1.15/presets.yaml b/packs/nvidia-vss-nims-3.1.15/presets.yaml new file mode 100644 index 00000000..e9b2a4a1 --- /dev/null +++ b/packs/nvidia-vss-nims-3.1.15/presets.yaml @@ -0,0 +1,80 @@ +# VSS Platform presets for the VSS 3.x NIMs pack (cosmos VLM NIM + vss-rt-embed). +# +# ⚠️ OPERATOR CONTRACT: select the SAME "VSS Platform" preset here as in EVERY other +# VSS 3.x pack in the profile (data-infrastructure records the choice in the +# vss-platform ConfigMap; it has no presets and is platform-invariant). Palette does +# NOT coordinate presets across packs. See P1-P5-IMPLEMENTATION.md. +# +# This pack has NO per-platform image variance: the cosmos VLM is a /nim/ image and +# vss-rt-embed is a vss-core/* image, and per BASE-SPEC NEITHER has a -sbsa variant +# (only the VIOS services + rtvi-vlm do). So no preset flips an image tag. The only real +# per-platform difference for this pack is the cosmos NIM memory env — and only the +# DGX-SPARK (shared-GPU) values are grounded in the source (hw-DGX-SPARK-shared.env); +# they are the values.yaml defaults. Non-Spark cosmos env is NOT grounded → those presets +# leave the defaults and are marked (unvalidated). vss-rt-embed stays enabled:false +# (SEARCH-only) in every preset. +# +# DGX-SPARK is the default and the only migration target (UNVALIDATED — nothing here has +# been hardware-tested; verify NGC tags against an authenticated registry first). +presets: + - name: "DGX-SPARK" + displayName: "DGX Spark (GB10, arm64) — cosmos shared-GPU env (migration target)" + group: "VSS Platform" + remove: [] + add: | + # Grounded DGX-SPARK shared-GPU cosmos env (= values.yaml defaults, re-asserted). + cosmos: + nimKvcachePercent: "0.4" + nimMaxModelLen: "16384" + nimMaxNumSeqs: "4" + nimDisableCudaGraph: "1" + rtviEmbed: + enabled: false + + - name: "OTHER" + displayName: "Other / generic single GPU (unvalidated)" + group: "VSS Platform" + remove: [] + add: | + rtviEmbed: + enabled: false + + - name: "H100" + displayName: "H100 (x86 dGPU) (unvalidated — cosmos env not grounded)" + group: "VSS Platform" + remove: [] + add: | + rtviEmbed: + enabled: false + + - name: "L40S" + displayName: "L40S (x86 dGPU) (unvalidated — cosmos env not grounded)" + group: "VSS Platform" + remove: [] + add: | + rtviEmbed: + enabled: false + + - name: "RTXPRO6000BW" + displayName: "RTX PRO 6000 Blackwell (x86 dGPU) (unvalidated)" + group: "VSS Platform" + remove: [] + add: | + rtviEmbed: + enabled: false + + - name: "AGX-THOR" + displayName: "Jetson AGX Thor (arm64, iGPU) (unvalidated)" + group: "VSS Platform" + remove: [] + add: | + rtviEmbed: + enabled: false + + - name: "IGX-THOR" + displayName: "IGX + Thor iGPU (arm64) (unvalidated)" + group: "VSS Platform" + remove: [] + add: | + rtviEmbed: + enabled: false diff --git a/packs/nvidia-vss-nims-3.1.15/schema.yaml b/packs/nvidia-vss-nims-3.1.15/schema.yaml new file mode 100644 index 00000000..adff182e --- /dev/null +++ b/packs/nvidia-vss-nims-3.1.15/schema.yaml @@ -0,0 +1,24 @@ +# Value constraints for the VSS 3.x NIMs pack (full dotted paths into values.yaml). +# Image tags are driven by the VSS Platform presets, not constrained here. +cosmos.imageTag: + schema: '{{ format "${string}" | hints "cosmos-reason2-8b NIM image tag (multiarch /nim/ image; no -sbsa variant)." }}' +cosmos.gpuCount: + schema: '{{ format "${number}" | hints "GPUs reserved for the cosmos VLM NIM (1 on a single-GPU node)." }}' +cosmos.servicePort: + schema: '{{ format "${number}" | hints "ClusterIP port the cosmos VLM NIM publishes (agent VLM_BASE_URL); compose default 30082." }}' +cosmos.nimKvcachePercent: + schema: '{{ format "${string}" | hints "NIM_KVCACHE_PERCENT for the cosmos NIM (hw-DGX-SPARK-shared.env default 0.5)." }}' +cosmos.nimMaxModelLen: + schema: '{{ format "${string}" | hints "NIM_MAX_MODEL_LEN for the cosmos NIM." }}' +cosmos.nimMaxNumSeqs: + schema: '{{ format "${string}" | hints "NIM_MAX_NUM_SEQS for the cosmos NIM." }}' +cosmos.cacheSize: + schema: '{{ format "${string}" | hints "PVC size for the cosmos NIM model cache (e.g. 80Gi)." }}' +rtviEmbed.enabled: + schema: '{{ format "${boolean}" | hints "Render the rtvi-embed embedding microservice. SEARCH profile only; false in BASE." }}' +rtviEmbed.imageTag: + schema: '{{ format "${string}" | hints "rtvi-embed image tag (vss-core/* image; 3.1.0-sbsa on DGX-SPARK, 3.1.0 elsewhere — set by preset)." }}' +rtviEmbed.gpuCount: + schema: '{{ format "${number}" | hints "GPUs reserved for rtvi-embed (1)." }}' +rtviEmbed.servicePort: + schema: '{{ format "${number}" | hints "ClusterIP port rtvi-embed publishes (container port 8000)." }}' diff --git a/packs/nvidia-vss-nims-3.1.15/values.yaml b/packs/nvidia-vss-nims-3.1.15/values.yaml new file mode 100644 index 00000000..8962ef98 --- /dev/null +++ b/packs/nvidia-vss-nims-3.1.15/values.yaml @@ -0,0 +1,104 @@ +# spectrocloud.com/enabled-presets: VSS Platform:DGX-SPARK +# +# ┌─ VSS 3.x PLATFORM MATRIX (canonical — keep identical across all VSS 3.x packs) ─ +# │ Set ONE profile variable VSS_PLATFORM and select the matching "VSS Platform" +# │ preset in every VSS 3.x pack. In 3.x ALL FIVE packs are in every profile +# │ (data-infrastructure + nims + vllm + core + ingress) — the vLLM pack is the LLM +# │ for all platforms (no NIM-LLM in 3.x), so there is no include/omit hazard. +# │ 3.x decode is NATIVE (VIOS/rt-cv); there is NO --disable-decoding. +# │ +# │ Platform LLM (vLLM model) VLM decode vss-core tags +# │ DGX-SPARK nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0-sbsa +# │ OTHER nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0 +# │ H100 nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0 (x86) +# │ L40S nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0 (x86) +# │ RTXPRO6000BW nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0 (x86) +# │ AGX-THOR/IGX-THOR nano-9b-fp8 (jetson img) rt-vlm Tegra* 3.1.0 + runtimeClassName +# │ * Tegra: runtimeClassName nvidia via extraPodSpecs; NVIDIA container runtime injects L4T. +# │ Only DGX-SPARK is the migration target; all rows UNVALIDATED (no hardware, unverified tags). +# └────────────────────────────────────────────────────────────────────────────── +# +# NVIDIA VSS 3.x NIMs pack (manifest-only). Two services: +# - cosmos-reason2-8b: the BASE Vision-Language Model NIM (/nim/ image, NO -sbsa +# variant — same multiarch tag on every platform; only env values differ). +# - rtvi-embed: the embedding microservice (vss-core/* image, HAS a -sbsa variant). +# Gated by the SEARCH profile only — disabled by default (rtviEmbed.enabled: false). +# +# Secrets: this pack REFERENCES (does not create) ngc-pull-secret (imagePullSecret), +# ngc-api-secret (NGC_CLI_API_KEY) and hf-token-secret (HF_TOKEN) — all created by the +# data-infrastructure pack (install-priority 5, before this pack's 10). +# +# DGX-SPARK is the default preset and the only migration target; it flips the rtvi-embed +# imageTag to 3.1.0-sbsa (cosmos has no -sbsa, so its tag is unchanged). All other +# platforms are UNVALIDATED. + +pack: + namespace: nvidia-vss + spectrocloud.com/install-priority: "10" + content: + images: + # BUILD-ON-DEPLOY (0/0): only public wolfi-base is pack content. The gated cosmos + + # vss-rt-embed NIM rootfs are crane-fetched as runtime DATA at deploy and run on + # wolfi-base via the matched-ld + CUDA block validated for the vLLM pack (live GPU + # NIM validation pending a free GPU slot). + - image: cgr.dev/chainguard/wolfi-base:latest + +# Top-level namespace consumed by every manifest ({{ .Values.namespace }}). +namespace: nvidia-vss + +# Public 0/0 base that crane-fetches + runs the gated NIM binaries at deploy. +baseImage: cgr.dev/chainguard/wolfi-base:latest + +# ── cosmos-reason2-8b VLM NIM (in BASE; rendered on every platform) ────────────── +# DGX-SPARK defaults from the shared-gpu compose service + hw-DGX-SPARK-shared.env. +cosmos: + enabled: true + gatedImage: nvcr.io/nim/nvidia/cosmos-reason2-8b:1.6.0 + image: "nvcr.io/nim/nvidia/cosmos-reason2-8b" + # /nim/ image has NO -sbsa variant — same tag on every platform. + # Tag from the compose source (deployments/nim/cosmos-reason2-8b/compose.yml). + # TODO(verify): confirm 1.6.0 against an authenticated NGC registry. + imageTag: "1.6.0" + # compose user: "${UID:-1000}:${GID:-1000}" + runAsUser: 1000 + runAsGroup: 1000 + gpuCount: 1 + servicePort: 30082 + # compose: NIM_MODEL_NAME = ${VLM_CUSTOM_WEIGHTS:-} (empty in BASE). + nimModelName: "" + # env_file hw-DGX-SPARK-shared.env defaults (verbatim from source). + nimKvcachePercent: "0.4" + nimMaxModelLen: "16384" + nimMaxNumSeqs: "4" + nimDisableCudaGraph: "1" + # compose shm_size: 32gb. + shmSize: "32Gi" + cacheSize: "80Gi" + # Empty -> omit storageClassName (cluster default StorageClass). + storageClass: "" + +# ── vss-rt-embed embedding microservice (SEARCH profile only; off by default) ──── +# vss-core/* image, multiarch 3.1.0 — NO -sbsa variant (per BASE-SPEC: only the VIOS +# services + rtvi-vlm have -sbsa), so the DGX-SPARK preset does NOT flip this tag. +rtviEmbed: + gatedImage: nvcr.io/nvidia/vss-core/vss-rt-embed:3.1.0 + enabled: false + decodeBackend: "nvdec" + image: "nvcr.io/nvidia/vss-core/vss-rt-embed" + imageTag: "3.1.0" + # compose user: "1001:1001" + runAsUser: 1001 + runAsGroup: 1001 + gpuCount: 1 + servicePort: 8017 + # compose: MODEL_PATH default (deployments/rtvi/rtvi-embed/rtvi-embed-docker-compose.yml). + modelPath: "git:https://huggingface.co/nvidia/Cosmos-Embed1-448p" + # compose: KAFKA_BOOTSTRAP_SERVERS = ${HOST_IP}:9092 -> kafka Service (data-infra, search only). + kafkaBootstrapServers: "kafka:9092" + # compose: REDIS_HOST = ${REDIS_HOST:-redis} -> redis Service (data-infra). + redisHost: "redis" + ngcCacheSize: "80Gi" + hfCacheSize: "40Gi" + tritonRepoSize: "40Gi" + # Empty -> omit storageClassName (cluster default StorageClass). + storageClass: ""