Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/renovate.json
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,17 @@
"enabled": true,
"ignoreUnstable": false
},
{
"matchPackageNames": [
"aks/aks-gpu-cuda"
],
"groupName": "nvidia-gpu-cuda-legacy",
"versioning": "regex:^(?<major>\\d+)\\.(?<minor>\\d+)\\.(?<patch>\\d+)-(?<prerelease>\\d{14})$",
"allowedVersions": "/^580\\./",
"automerge": false,
"enabled": true,
"ignoreUnstable": false
},
{
"matchPackageNames": [
"aks/aks-gpu-grid"
Expand Down
7 changes: 7 additions & 0 deletions parts/common/components.json
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,13 @@
"latestVersion": "580.159.04-20260629214430"
}
},
{
"downloadURL": "mcr.microsoft.com/aks/aks-gpu-cuda:*",
"gpuVersion": {
"renovateTag": "registry=https://mcr.microsoft.com, name=aks/aks-gpu-cuda",
"latestVersion": "580.126.09-20260430040408"
}
},
{
"downloadURL": "mcr.microsoft.com/aks/aks-gpu-grid:*",
"gpuVersion": {
Expand Down
4 changes: 2 additions & 2 deletions pkg/agent/baker.go
Original file line number Diff line number Diff line change
Expand Up @@ -1497,7 +1497,7 @@ func GetGPUDriverVersion(size string) string {
if isStandardNCv1(size) {
return datamodel.Nvidia470CudaDriverVersion
}
return datamodel.NvidiaCudaDriverVersion
return datamodel.NvidiaCudaLTSDriverVersion
}

func isStandardNCv1(size string) bool {
Expand All @@ -1522,7 +1522,7 @@ func GetAKSGPUImageSHA(size string) string {
if useGridDrivers(size) {
return datamodel.AKSGPUGridVersionSuffix
}
return datamodel.AKSGPUCudaVersionSuffix
return datamodel.AKSGPUCudaLTSVersionSuffix
Comment on lines 1522 to +1525
}

// GetGPUDriverType maps a GPU VM size to the aks-gpu image variant used to install its driver.
Expand Down
8 changes: 4 additions & 4 deletions pkg/agent/baker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,7 @@ var _ = Describe("GetGPUDriverVersion", func() {
Expect(GetGPUDriverVersion("standard_nc6")).To(Equal(datamodel.Nvidia470CudaDriverVersion))
})
It("should use cuda with nc v3", func() {
Expect(GetGPUDriverVersion("standard_nc6_v3")).To(Equal(datamodel.NvidiaCudaDriverVersion))
Expect(GetGPUDriverVersion("standard_nc6_v3")).To(Equal(datamodel.NvidiaCudaLTSDriverVersion))
})
It("should use grid with nv v5", func() {
Expect(GetGPUDriverVersion("standard_nv6ads_a10_v5")).To(Equal(datamodel.NvidiaGridDriverVersion))
Expand All @@ -958,7 +958,7 @@ var _ = Describe("GetGPUDriverVersion", func() {
})
// NV V1 SKUs were retired in September 2023, leaving this test just for safety
It("should use cuda with nv v1", func() {
Expect(GetGPUDriverVersion("standard_nv6")).To(Equal(datamodel.NvidiaCudaDriverVersion))
Expect(GetGPUDriverVersion("standard_nv6")).To(Equal(datamodel.NvidiaCudaLTSDriverVersion))
})
})

Expand Down Expand Up @@ -995,8 +995,8 @@ var _ = Describe("GetAKSGPUImageSHA", func() {
Expect(GetAKSGPUImageSHA("standard_nc128ds_xl_rtxpro6000bse_v6")).To(Equal(datamodel.AKSGPUGridV20VersionSuffix))
Expect(GetAKSGPUImageSHA("standard_nc128lds_xl_rtxpro6000bse_v6")).To(Equal(datamodel.AKSGPUGridV20VersionSuffix))
})
It("should use newest AKSGPUCudaVersionSuffix with non grid SKU", func() {
Expect(GetAKSGPUImageSHA("standard_nc6_v3")).To(Equal(datamodel.AKSGPUCudaVersionSuffix))
It("should use newest AKSGPUCudaLTSVersionSuffix with non grid SKU", func() {
Expect(GetAKSGPUImageSHA("standard_nc6_v3")).To(Equal(datamodel.AKSGPUCudaLTSVersionSuffix))
})
Comment on lines +998 to 1000
})

Expand Down
8 changes: 8 additions & 0 deletions pkg/agent/datamodel/gpu_components.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ const Nvidia470CudaDriverVersion = "cuda-470.82.01"
//nolint:gochecknoglobals
var (
NvidiaCudaDriverVersion string
NvidiaCudaLTSDriverVersion string
NvidiaGridDriverVersion string
NvidiaGridV20DriverVersion string
AKSGPUCudaVersionSuffix string
AKSGPUCudaLTSVersionSuffix string
AKSGPUGridVersionSuffix string
AKSGPUGridV20VersionSuffix string
)
Expand Down Expand Up @@ -63,6 +65,12 @@ func LoadConfig() error {
// confused by substring matching.
switch gpuImageRepo(image.DownloadURL) {
case "aks-gpu-cuda-lts":
NvidiaCudaLTSDriverVersion = version
AKSGPUCudaLTSVersionSuffix = suffix
case "aks-gpu-cuda":
// Pre-LTS CUDA image, pinned to the R580 line (V100-capable). Loaded so its version is
// available if a SKU is ever routed to the "cuda" image in CSE; the default managed CUDA
// image is aks-gpu-cuda-lts (see GetGPUDriverType / GetGPUDriverVersion in baker.go).
NvidiaCudaDriverVersion = version
AKSGPUCudaVersionSuffix = suffix
case "aks-gpu-grid":
Expand Down
10 changes: 10 additions & 0 deletions pkg/agent/datamodel/gpu_components_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ func TestLoadConfig(t *testing.T) {
if !suffixPattern.MatchString(AKSGPUGridV20VersionSuffix) {
t.Errorf("AKSGPUGridV20VersionSuffix '%s' does not match expected format", AKSGPUGridV20VersionSuffix)
}

// aks-gpu-cuda-lts drives the render, so its version/suffix must be loaded. aks-gpu-cuda
// (NvidiaCudaDriverVersion / AKSGPUCudaVersionSuffix, checked above) is the recognized pre-LTS
// image, available if a SKU is routed to the "cuda" image in CSE later.
if !versionPattern.MatchString(NvidiaCudaLTSDriverVersion) {
t.Errorf("NvidiaCudaLTSDriverVersion '%s' does not match expected format", NvidiaCudaLTSDriverVersion)
}
if !suffixPattern.MatchString(AKSGPUCudaLTSVersionSuffix) {
t.Errorf("AKSGPUCudaLTSVersionSuffix '%s' does not match expected format", AKSGPUCudaLTSVersionSuffix)
}
}

// TestGPUImageRepo verifies that the bare repo name is extracted via exact final
Expand Down
Loading