spectrocloud · blik616287 · Jun 13, 2026 · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
diff --git a/packs/nvidia-vss-nims-3.1.15/README.md b/packs/nvidia-vss-nims-3.1.15/README.md
@@ -0,0 +1,3 @@
+# NVIDIA VSS NIMs (cosmos VLM + embed)
+
+NVIDIA VSS 3.x NIMs — the cosmos-reason2-8b Vision-Language Model NIM (in BASE) plus the rtvi-embed embedding microservice (SEARCH profile only, disabled by default). Manifest-only pack; references the ngc-pull-secret / ngc-api-secret / hf-token-secret created by the data-infrastructure pack. install-priority 10. Part of the VSS 3.x five-pack set (data-infrastructure + nims + vllm + core + ingress); all five belong in every profile.
diff --git a/packs/nvidia-vss-nims-3.1.15/charts/nvidia-vss-nims-3.1.15.tgz b/packs/nvidia-vss-nims-3.1.15/charts/nvidia-vss-nims-3.1.15.tgz
diff --git a/packs/nvidia-vss-nims-3.1.15/logo.png b/packs/nvidia-vss-nims-3.1.15/logo.png
diff --git a/packs/nvidia-vss-nims-3.1.15/pack.json b/packs/nvidia-vss-nims-3.1.15/pack.json
@@ -0,0 +1,47 @@
+{
+  "addonType": "system app",
+  "annotations": {
+    "source": "community",
+    "contributor": "spectrocloud",
+    "docsURL": "https://docs.nvidia.com/vss/latest/index.html",
+    "description": "NVIDIA VSS 3.x NIMs \u2014 the cosmos-reason2-8b Vision-Language Model NIM (in BASE) plus the rtvi-embed embedding microservice (SEARCH profile only, disabled by default). Manifest-only pack; references the ngc-pull-secret / ngc-api-secret / hf-token-secret created by the data-infrastructure pack. install-priority 10. Part of the VSS 3.x five-pack set (data-infrastructure + nims + vllm + core + ingress); all five belong in every profile. rtvi-embed (search profile) supports a self-contained dGPU NVDEC decode backend (PyNvVideoCodec via libnvcuvid, chroot build-on-deploy) for DGX-Spark dGPU-mode nodes where the stock DeepStream/pyds decoder cannot load; cosmos.enabled gates the VLM to free GPU for co-located search."
+  },
+  "cloudTypes": [
+    "all"
+  ],
+  "displayName": "NVIDIA VSS NIMs (cosmos VLM + embed)",
+  "layer": "addon",
+  "name": "nvidia-vss-nims",
+  "version": "3.1.15",
+  "constraints": {
+    "dependencies": [
+      {
+        "packName": "nvidia-vss-data-infrastructure",
+        "layer": "addon",
+        "minVersion": "3.1.0",
+        "type": "required"
+      },
+      {
+        "packName": "nvidia-vss-vllm",
+        "layer": "addon",
+        "minVersion": "3.1.0",
+        "type": "optional"
+      },
+      {
+        "packName": "nvidia-vss-core",
+        "layer": "addon",
+        "minVersion": "3.1.0",
+        "type": "optional"
+      },
+      {
+        "packName": "nvidia-vss-ingress",
+        "layer": "addon",
+        "minVersion": "3.1.0",
+        "type": "optional"
+      }
+    ]
+  },
+  "charts": [
+    "charts/nvidia-vss-nims-3.1.15.tgz"
+  ]
+}
diff --git a/packs/nvidia-vss-nims-3.1.15/presets.yaml b/packs/nvidia-vss-nims-3.1.15/presets.yaml
@@ -0,0 +1,80 @@
+# VSS Platform presets for the VSS 3.x NIMs pack (cosmos VLM NIM + vss-rt-embed).
+#
+# ⚠️ OPERATOR CONTRACT: select the SAME "VSS Platform" preset here as in EVERY other
+#    VSS 3.x pack in the profile (data-infrastructure records the choice in the
+#    vss-platform ConfigMap; it has no presets and is platform-invariant). Palette does
+#    NOT coordinate presets across packs. See P1-P5-IMPLEMENTATION.md.
+#
+# This pack has NO per-platform image variance: the cosmos VLM is a /nim/ image and
+# vss-rt-embed is a vss-core/* image, and per BASE-SPEC NEITHER has a -sbsa variant
+# (only the VIOS services + rtvi-vlm do). So no preset flips an image tag. The only real
+# per-platform difference for this pack is the cosmos NIM memory env — and only the
+# DGX-SPARK (shared-GPU) values are grounded in the source (hw-DGX-SPARK-shared.env);
+# they are the values.yaml defaults. Non-Spark cosmos env is NOT grounded → those presets
+# leave the defaults and are marked (unvalidated). vss-rt-embed stays enabled:false
+# (SEARCH-only) in every preset.
+#
+# DGX-SPARK is the default and the only migration target (UNVALIDATED — nothing here has
+# been hardware-tested; verify NGC tags against an authenticated registry first).
+presets:
+  - name: "DGX-SPARK"
+    displayName: "DGX Spark (GB10, arm64) — cosmos shared-GPU env (migration target)"
+    group: "VSS Platform"
+    remove: []
+    add: |
+      # Grounded DGX-SPARK shared-GPU cosmos env (= values.yaml defaults, re-asserted).
+      cosmos:
+        nimKvcachePercent: "0.4"
+        nimMaxModelLen: "16384"
+        nimMaxNumSeqs: "4"
+        nimDisableCudaGraph: "1"
+      rtviEmbed:
+        enabled: false
+
+  - name: "OTHER"
+    displayName: "Other / generic single GPU (unvalidated)"
+    group: "VSS Platform"
+    remove: []
+    add: |
+      rtviEmbed:
+        enabled: false
+
+  - name: "H100"
+    displayName: "H100 (x86 dGPU) (unvalidated — cosmos env not grounded)"
+    group: "VSS Platform"
+    remove: []
+    add: |
+      rtviEmbed:
+        enabled: false
+
+  - name: "L40S"
+    displayName: "L40S (x86 dGPU) (unvalidated — cosmos env not grounded)"
+    group: "VSS Platform"
+    remove: []
+    add: |
+      rtviEmbed:
+        enabled: false
+
+  - name: "RTXPRO6000BW"
+    displayName: "RTX PRO 6000 Blackwell (x86 dGPU) (unvalidated)"
+    group: "VSS Platform"
+    remove: []
+    add: |
+      rtviEmbed:
+        enabled: false
+
+  - name: "AGX-THOR"
+    displayName: "Jetson AGX Thor (arm64, iGPU) (unvalidated)"
+    group: "VSS Platform"
+    remove: []
+    add: |
+      rtviEmbed:
+        enabled: false
+
+  - name: "IGX-THOR"
+    displayName: "IGX + Thor iGPU (arm64) (unvalidated)"
+    group: "VSS Platform"
+    remove: []
+    add: |
+      rtviEmbed:
+        enabled: false
diff --git a/packs/nvidia-vss-nims-3.1.15/schema.yaml b/packs/nvidia-vss-nims-3.1.15/schema.yaml
@@ -0,0 +1,24 @@
+# Value constraints for the VSS 3.x NIMs pack (full dotted paths into values.yaml).
+# Image tags are driven by the VSS Platform presets, not constrained here.
+cosmos.imageTag:
+  schema: '{{ format "${string}" | hints "cosmos-reason2-8b NIM image tag (multiarch /nim/ image; no -sbsa variant)." }}'
+cosmos.gpuCount:
+  schema: '{{ format "${number}" | hints "GPUs reserved for the cosmos VLM NIM (1 on a single-GPU node)." }}'
+cosmos.servicePort:
+  schema: '{{ format "${number}" | hints "ClusterIP port the cosmos VLM NIM publishes (agent VLM_BASE_URL); compose default 30082." }}'
+cosmos.nimKvcachePercent:
+  schema: '{{ format "${string}" | hints "NIM_KVCACHE_PERCENT for the cosmos NIM (hw-DGX-SPARK-shared.env default 0.5)." }}'
+cosmos.nimMaxModelLen:
+  schema: '{{ format "${string}" | hints "NIM_MAX_MODEL_LEN for the cosmos NIM." }}'
+cosmos.nimMaxNumSeqs:
+  schema: '{{ format "${string}" | hints "NIM_MAX_NUM_SEQS for the cosmos NIM." }}'
+cosmos.cacheSize:
+  schema: '{{ format "${string}" | hints "PVC size for the cosmos NIM model cache (e.g. 80Gi)." }}'
+rtviEmbed.enabled:
+  schema: '{{ format "${boolean}" | hints "Render the rtvi-embed embedding microservice. SEARCH profile only; false in BASE." }}'
+rtviEmbed.imageTag:
+  schema: '{{ format "${string}" | hints "rtvi-embed image tag (vss-core/* image; 3.1.0-sbsa on DGX-SPARK, 3.1.0 elsewhere — set by preset)." }}'
+rtviEmbed.gpuCount:
+  schema: '{{ format "${number}" | hints "GPUs reserved for rtvi-embed (1)." }}'
+rtviEmbed.servicePort:
+  schema: '{{ format "${number}" | hints "ClusterIP port rtvi-embed publishes (container port 8000)." }}'
diff --git a/packs/nvidia-vss-nims-3.1.15/values.yaml b/packs/nvidia-vss-nims-3.1.15/values.yaml
@@ -0,0 +1,104 @@
+# spectrocloud.com/enabled-presets: VSS Platform:DGX-SPARK
+#
+# ┌─ VSS 3.x PLATFORM MATRIX (canonical — keep identical across all VSS 3.x packs) ─
+# │ Set ONE profile variable VSS_PLATFORM and select the matching "VSS Platform"
+# │ preset in every VSS 3.x pack. In 3.x ALL FIVE packs are in every profile
+# │ (data-infrastructure + nims + vllm + core + ingress) — the vLLM pack is the LLM
+# │ for all platforms (no NIM-LLM in 3.x), so there is no include/omit hazard.
+# │ 3.x decode is NATIVE (VIOS/rt-cv); there is NO --disable-decoding.
+# │
+# │ Platform          LLM (vLLM model)        VLM            decode   vss-core tags
+# │ DGX-SPARK         nemotron-nano-9b-v2-fp8 cosmos NIM     dGPU     3.1.0-sbsa
+# │ OTHER             nemotron-nano-9b-v2-fp8 cosmos NIM     dGPU     3.1.0
+# │ H100              nemotron-nano-9b-v2-fp8 cosmos NIM     dGPU     3.1.0 (x86)
+# │ L40S              nemotron-nano-9b-v2-fp8 cosmos NIM     dGPU     3.1.0 (x86)
+# │ RTXPRO6000BW      nemotron-nano-9b-v2-fp8 cosmos NIM     dGPU     3.1.0 (x86)
+# │ AGX-THOR/IGX-THOR nano-9b-fp8 (jetson img) rt-vlm        Tegra*   3.1.0 + runtimeClassName
+# │ * Tegra: runtimeClassName nvidia via extraPodSpecs; NVIDIA container runtime injects L4T.
+# │ Only DGX-SPARK is the migration target; all rows UNVALIDATED (no hardware, unverified tags).
+# └──────────────────────────────────────────────────────────────────────────────
+#
+# NVIDIA VSS 3.x NIMs pack (manifest-only). Two services:
+#   - cosmos-reason2-8b: the BASE Vision-Language Model NIM (/nim/ image, NO -sbsa
+#     variant — same multiarch tag on every platform; only env values differ).
+#   - rtvi-embed: the embedding microservice (vss-core/* image, HAS a -sbsa variant).
+#     Gated by the SEARCH profile only — disabled by default (rtviEmbed.enabled: false).
+#
+# Secrets: this pack REFERENCES (does not create) ngc-pull-secret (imagePullSecret),
+# ngc-api-secret (NGC_CLI_API_KEY) and hf-token-secret (HF_TOKEN) — all created by the
+# data-infrastructure pack (install-priority 5, before this pack's 10).
+#
+# DGX-SPARK is the default preset and the only migration target; it flips the rtvi-embed
+# imageTag to 3.1.0-sbsa (cosmos has no -sbsa, so its tag is unchanged). All other
+# platforms are UNVALIDATED.
+
+pack:
+  namespace: nvidia-vss
+  spectrocloud.com/install-priority: "10"
+  content:
+    images:
+      # BUILD-ON-DEPLOY (0/0): only public wolfi-base is pack content. The gated cosmos +
+      # vss-rt-embed NIM rootfs are crane-fetched as runtime DATA at deploy and run on
+      # wolfi-base via the matched-ld + CUDA block validated for the vLLM pack (live GPU
+      # NIM validation pending a free GPU slot).
+      - image: cgr.dev/chainguard/wolfi-base:latest
+
+# Top-level namespace consumed by every manifest ({{ .Values.namespace }}).
+namespace: nvidia-vss
+
+# Public 0/0 base that crane-fetches + runs the gated NIM binaries at deploy.
+baseImage: cgr.dev/chainguard/wolfi-base:latest
+
+# ── cosmos-reason2-8b VLM NIM (in BASE; rendered on every platform) ──────────────
+# DGX-SPARK defaults from the shared-gpu compose service + hw-DGX-SPARK-shared.env.
+cosmos:
+  enabled: true
+  gatedImage: nvcr.io/nim/nvidia/cosmos-reason2-8b:1.6.0
+  image: "nvcr.io/nim/nvidia/cosmos-reason2-8b"
+  # /nim/ image has NO -sbsa variant — same tag on every platform.
+  # Tag from the compose source (deployments/nim/cosmos-reason2-8b/compose.yml).
+  # TODO(verify): confirm 1.6.0 against an authenticated NGC registry.
+  imageTag: "1.6.0"
+  # compose user: "${UID:-1000}:${GID:-1000}"
+  runAsUser: 1000
+  runAsGroup: 1000
+  gpuCount: 1
+  servicePort: 30082
+  # compose: NIM_MODEL_NAME = ${VLM_CUSTOM_WEIGHTS:-} (empty in BASE).
+  nimModelName: ""
+  # env_file hw-DGX-SPARK-shared.env defaults (verbatim from source).
+  nimKvcachePercent: "0.4"
+  nimMaxModelLen: "16384"
+  nimMaxNumSeqs: "4"
+  nimDisableCudaGraph: "1"
+  # compose shm_size: 32gb.
+  shmSize: "32Gi"
+  cacheSize: "80Gi"
+  # Empty -> omit storageClassName (cluster default StorageClass).
+  storageClass: ""
+
+# ── vss-rt-embed embedding microservice (SEARCH profile only; off by default) ────
+# vss-core/* image, multiarch 3.1.0 — NO -sbsa variant (per BASE-SPEC: only the VIOS
+# services + rtvi-vlm have -sbsa), so the DGX-SPARK preset does NOT flip this tag.
+rtviEmbed:
+  gatedImage: nvcr.io/nvidia/vss-core/vss-rt-embed:3.1.0
+  enabled: false
+  decodeBackend: "nvdec"
+  image: "nvcr.io/nvidia/vss-core/vss-rt-embed"
+  imageTag: "3.1.0"
+  # compose user: "1001:1001"
+  runAsUser: 1001
+  runAsGroup: 1001
+  gpuCount: 1
+  servicePort: 8017
+  # compose: MODEL_PATH default (deployments/rtvi/rtvi-embed/rtvi-embed-docker-compose.yml).
+  modelPath: "git:https://huggingface.co/nvidia/Cosmos-Embed1-448p"
+  # compose: KAFKA_BOOTSTRAP_SERVERS = ${HOST_IP}:9092 -> kafka Service (data-infra, search only).
+  kafkaBootstrapServers: "kafka:9092"
+  # compose: REDIS_HOST = ${REDIS_HOST:-redis} -> redis Service (data-infra).
+  redisHost: "redis"
+  ngcCacheSize: "80Gi"
+  hfCacheSize: "40Gi"
+  tritonRepoSize: "40Gi"
+  # Empty -> omit storageClassName (cluster default StorageClass).
+  storageClass: ""
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# NVIDIA VSS NIMs (cosmos VLM + embed)

		NVIDIA VSS 3.x NIMs — the cosmos-reason2-8b Vision-Language Model NIM (in BASE) plus the rtvi-embed embedding microservice (SEARCH profile only, disabled by default). Manifest-only pack; references the ngc-pull-secret / ngc-api-secret / hf-token-secret created by the data-infrastructure pack. install-priority 10. Part of the VSS 3.x five-pack set (data-infrastructure + nims + vllm + core + ingress); all five belong in every profile.