Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions packs/nvidia-vss-nims-3.1.15/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# NVIDIA VSS NIMs (cosmos VLM + embed)

NVIDIA VSS 3.x NIMs — the cosmos-reason2-8b Vision-Language Model NIM (in BASE) plus the rtvi-embed embedding microservice (SEARCH profile only, disabled by default). Manifest-only pack; references the ngc-pull-secret / ngc-api-secret / hf-token-secret created by the data-infrastructure pack. install-priority 10. Part of the VSS 3.x five-pack set (data-infrastructure + nims + vllm + core + ingress); all five belong in every profile.
Binary file not shown.
Binary file added packs/nvidia-vss-nims-3.1.15/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 47 additions & 0 deletions packs/nvidia-vss-nims-3.1.15/pack.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"addonType": "system app",
"annotations": {
"source": "community",
"contributor": "spectrocloud",
"docsURL": "https://docs.nvidia.com/vss/latest/index.html",
"description": "NVIDIA VSS 3.x NIMs \u2014 the cosmos-reason2-8b Vision-Language Model NIM (in BASE) plus the rtvi-embed embedding microservice (SEARCH profile only, disabled by default). Manifest-only pack; references the ngc-pull-secret / ngc-api-secret / hf-token-secret created by the data-infrastructure pack. install-priority 10. Part of the VSS 3.x five-pack set (data-infrastructure + nims + vllm + core + ingress); all five belong in every profile. rtvi-embed (search profile) supports a self-contained dGPU NVDEC decode backend (PyNvVideoCodec via libnvcuvid, chroot build-on-deploy) for DGX-Spark dGPU-mode nodes where the stock DeepStream/pyds decoder cannot load; cosmos.enabled gates the VLM to free GPU for co-located search."
},
"cloudTypes": [
"all"
],
"displayName": "NVIDIA VSS NIMs (cosmos VLM + embed)",
"layer": "addon",
"name": "nvidia-vss-nims",
"version": "3.1.15",
"constraints": {
"dependencies": [
{
"packName": "nvidia-vss-data-infrastructure",
"layer": "addon",
"minVersion": "3.1.0",
"type": "required"
},
{
"packName": "nvidia-vss-vllm",
"layer": "addon",
"minVersion": "3.1.0",
"type": "optional"
},
{
"packName": "nvidia-vss-core",
"layer": "addon",
"minVersion": "3.1.0",
"type": "optional"
},
{
"packName": "nvidia-vss-ingress",
"layer": "addon",
"minVersion": "3.1.0",
"type": "optional"
}
]
},
"charts": [
"charts/nvidia-vss-nims-3.1.15.tgz"
]
}
80 changes: 80 additions & 0 deletions packs/nvidia-vss-nims-3.1.15/presets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# VSS Platform presets for the VSS 3.x NIMs pack (cosmos VLM NIM + vss-rt-embed).
#
# ⚠️ OPERATOR CONTRACT: select the SAME "VSS Platform" preset here as in EVERY other
# VSS 3.x pack in the profile (data-infrastructure records the choice in the
# vss-platform ConfigMap; it has no presets and is platform-invariant). Palette does
# NOT coordinate presets across packs. See P1-P5-IMPLEMENTATION.md.
#
# This pack has NO per-platform image variance: the cosmos VLM is a /nim/ image and
# vss-rt-embed is a vss-core/* image, and per BASE-SPEC NEITHER has a -sbsa variant
# (only the VIOS services + rtvi-vlm do). So no preset flips an image tag. The only real
# per-platform difference for this pack is the cosmos NIM memory env — and only the
# DGX-SPARK (shared-GPU) values are grounded in the source (hw-DGX-SPARK-shared.env);
# they are the values.yaml defaults. Non-Spark cosmos env is NOT grounded → those presets
# leave the defaults and are marked (unvalidated). vss-rt-embed stays enabled:false
# (SEARCH-only) in every preset.
#
# DGX-SPARK is the default and the only migration target (UNVALIDATED — nothing here has
# been hardware-tested; verify NGC tags against an authenticated registry first).
presets:
- name: "DGX-SPARK"
displayName: "DGX Spark (GB10, arm64) — cosmos shared-GPU env (migration target)"
group: "VSS Platform"
remove: []
add: |
# Grounded DGX-SPARK shared-GPU cosmos env (= values.yaml defaults, re-asserted).
cosmos:
nimKvcachePercent: "0.4"
nimMaxModelLen: "16384"
nimMaxNumSeqs: "4"
nimDisableCudaGraph: "1"
rtviEmbed:
enabled: false

- name: "OTHER"
displayName: "Other / generic single GPU (unvalidated)"
group: "VSS Platform"
remove: []
add: |
rtviEmbed:
enabled: false

- name: "H100"
displayName: "H100 (x86 dGPU) (unvalidated — cosmos env not grounded)"
group: "VSS Platform"
remove: []
add: |
rtviEmbed:
enabled: false

- name: "L40S"
displayName: "L40S (x86 dGPU) (unvalidated — cosmos env not grounded)"
group: "VSS Platform"
remove: []
add: |
rtviEmbed:
enabled: false

- name: "RTXPRO6000BW"
displayName: "RTX PRO 6000 Blackwell (x86 dGPU) (unvalidated)"
group: "VSS Platform"
remove: []
add: |
rtviEmbed:
enabled: false

- name: "AGX-THOR"
displayName: "Jetson AGX Thor (arm64, iGPU) (unvalidated)"
group: "VSS Platform"
remove: []
add: |
rtviEmbed:
enabled: false

- name: "IGX-THOR"
displayName: "IGX + Thor iGPU (arm64) (unvalidated)"
group: "VSS Platform"
remove: []
add: |
rtviEmbed:
enabled: false
24 changes: 24 additions & 0 deletions packs/nvidia-vss-nims-3.1.15/schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Value constraints for the VSS 3.x NIMs pack (full dotted paths into values.yaml).
# Image tags are driven by the VSS Platform presets, not constrained here.
cosmos.imageTag:
schema: '{{ format "${string}" | hints "cosmos-reason2-8b NIM image tag (multiarch /nim/ image; no -sbsa variant)." }}'
cosmos.gpuCount:
schema: '{{ format "${number}" | hints "GPUs reserved for the cosmos VLM NIM (1 on a single-GPU node)." }}'
cosmos.servicePort:
schema: '{{ format "${number}" | hints "ClusterIP port the cosmos VLM NIM publishes (agent VLM_BASE_URL); compose default 30082." }}'
cosmos.nimKvcachePercent:
schema: '{{ format "${string}" | hints "NIM_KVCACHE_PERCENT for the cosmos NIM (hw-DGX-SPARK-shared.env default 0.5)." }}'
cosmos.nimMaxModelLen:
schema: '{{ format "${string}" | hints "NIM_MAX_MODEL_LEN for the cosmos NIM." }}'
cosmos.nimMaxNumSeqs:
schema: '{{ format "${string}" | hints "NIM_MAX_NUM_SEQS for the cosmos NIM." }}'
cosmos.cacheSize:
schema: '{{ format "${string}" | hints "PVC size for the cosmos NIM model cache (e.g. 80Gi)." }}'
rtviEmbed.enabled:
schema: '{{ format "${boolean}" | hints "Render the rtvi-embed embedding microservice. SEARCH profile only; false in BASE." }}'
rtviEmbed.imageTag:
schema: '{{ format "${string}" | hints "rtvi-embed image tag (vss-core/* image; 3.1.0-sbsa on DGX-SPARK, 3.1.0 elsewhere — set by preset)." }}'
rtviEmbed.gpuCount:
schema: '{{ format "${number}" | hints "GPUs reserved for rtvi-embed (1)." }}'
rtviEmbed.servicePort:
schema: '{{ format "${number}" | hints "ClusterIP port rtvi-embed publishes (container port 8000)." }}'
104 changes: 104 additions & 0 deletions packs/nvidia-vss-nims-3.1.15/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# spectrocloud.com/enabled-presets: VSS Platform:DGX-SPARK
#
# ┌─ VSS 3.x PLATFORM MATRIX (canonical — keep identical across all VSS 3.x packs) ─
# │ Set ONE profile variable VSS_PLATFORM and select the matching "VSS Platform"
# │ preset in every VSS 3.x pack. In 3.x ALL FIVE packs are in every profile
# │ (data-infrastructure + nims + vllm + core + ingress) — the vLLM pack is the LLM
# │ for all platforms (no NIM-LLM in 3.x), so there is no include/omit hazard.
# │ 3.x decode is NATIVE (VIOS/rt-cv); there is NO --disable-decoding.
# │
# │ Platform LLM (vLLM model) VLM decode vss-core tags
# │ DGX-SPARK nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0-sbsa
# │ OTHER nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0
# │ H100 nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0 (x86)
# │ L40S nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0 (x86)
# │ RTXPRO6000BW nemotron-nano-9b-v2-fp8 cosmos NIM dGPU 3.1.0 (x86)
# │ AGX-THOR/IGX-THOR nano-9b-fp8 (jetson img) rt-vlm Tegra* 3.1.0 + runtimeClassName
# │ * Tegra: runtimeClassName nvidia via extraPodSpecs; NVIDIA container runtime injects L4T.
# │ Only DGX-SPARK is the migration target; all rows UNVALIDATED (no hardware, unverified tags).
# └──────────────────────────────────────────────────────────────────────────────
#
# NVIDIA VSS 3.x NIMs pack (manifest-only). Two services:
# - cosmos-reason2-8b: the BASE Vision-Language Model NIM (/nim/ image, NO -sbsa
# variant — same multiarch tag on every platform; only env values differ).
# - rtvi-embed: the embedding microservice (vss-core/* image, HAS a -sbsa variant).
# Gated by the SEARCH profile only — disabled by default (rtviEmbed.enabled: false).
#
# Secrets: this pack REFERENCES (does not create) ngc-pull-secret (imagePullSecret),
# ngc-api-secret (NGC_CLI_API_KEY) and hf-token-secret (HF_TOKEN) — all created by the
# data-infrastructure pack (install-priority 5, before this pack's 10).
#
# DGX-SPARK is the default preset and the only migration target; it flips the rtvi-embed
# imageTag to 3.1.0-sbsa (cosmos has no -sbsa, so its tag is unchanged). All other
# platforms are UNVALIDATED.

pack:
namespace: nvidia-vss
spectrocloud.com/install-priority: "10"
content:
images:
# BUILD-ON-DEPLOY (0/0): only public wolfi-base is pack content. The gated cosmos +
# vss-rt-embed NIM rootfs are crane-fetched as runtime DATA at deploy and run on
# wolfi-base via the matched-ld + CUDA block validated for the vLLM pack (live GPU
# NIM validation pending a free GPU slot).
- image: cgr.dev/chainguard/wolfi-base:latest

# Top-level namespace consumed by every manifest ({{ .Values.namespace }}).
namespace: nvidia-vss

# Public 0/0 base that crane-fetches + runs the gated NIM binaries at deploy.
baseImage: cgr.dev/chainguard/wolfi-base:latest

# ── cosmos-reason2-8b VLM NIM (in BASE; rendered on every platform) ──────────────
# DGX-SPARK defaults from the shared-gpu compose service + hw-DGX-SPARK-shared.env.
cosmos:
enabled: true
gatedImage: nvcr.io/nim/nvidia/cosmos-reason2-8b:1.6.0
image: "nvcr.io/nim/nvidia/cosmos-reason2-8b"
# /nim/ image has NO -sbsa variant — same tag on every platform.
# Tag from the compose source (deployments/nim/cosmos-reason2-8b/compose.yml).
# TODO(verify): confirm 1.6.0 against an authenticated NGC registry.
imageTag: "1.6.0"
# compose user: "${UID:-1000}:${GID:-1000}"
runAsUser: 1000
runAsGroup: 1000
gpuCount: 1
servicePort: 30082
# compose: NIM_MODEL_NAME = ${VLM_CUSTOM_WEIGHTS:-} (empty in BASE).
nimModelName: ""
# env_file hw-DGX-SPARK-shared.env defaults (verbatim from source).
nimKvcachePercent: "0.4"
nimMaxModelLen: "16384"
nimMaxNumSeqs: "4"
nimDisableCudaGraph: "1"
# compose shm_size: 32gb.
shmSize: "32Gi"
cacheSize: "80Gi"
# Empty -> omit storageClassName (cluster default StorageClass).
storageClass: ""

# ── vss-rt-embed embedding microservice (SEARCH profile only; off by default) ────
# vss-core/* image, multiarch 3.1.0 — NO -sbsa variant (per BASE-SPEC: only the VIOS
# services + rtvi-vlm have -sbsa), so the DGX-SPARK preset does NOT flip this tag.
rtviEmbed:
gatedImage: nvcr.io/nvidia/vss-core/vss-rt-embed:3.1.0
enabled: false
decodeBackend: "nvdec"
image: "nvcr.io/nvidia/vss-core/vss-rt-embed"
imageTag: "3.1.0"
# compose user: "1001:1001"
runAsUser: 1001
runAsGroup: 1001
gpuCount: 1
servicePort: 8017
# compose: MODEL_PATH default (deployments/rtvi/rtvi-embed/rtvi-embed-docker-compose.yml).
modelPath: "git:https://huggingface.co/nvidia/Cosmos-Embed1-448p"
# compose: KAFKA_BOOTSTRAP_SERVERS = ${HOST_IP}:9092 -> kafka Service (data-infra, search only).
kafkaBootstrapServers: "kafka:9092"
# compose: REDIS_HOST = ${REDIS_HOST:-redis} -> redis Service (data-infra).
redisHost: "redis"
ngcCacheSize: "80Gi"
hfCacheSize: "40Gi"
tritonRepoSize: "40Gi"
# Empty -> omit storageClassName (cluster default StorageClass).
storageClass: ""
Loading