From d88499bd7edf07d676936249a35f0c634a1e55cf Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdroettboom@nvidia.com>
Date: Tue, 19 May 2026 11:47:15 -0400
Subject: [PATCH] Migrate pynvml to cuda.core.system

---
 CLAUDE.md                          |   2 +-
 dependencies.yaml                  |   3 +-
 docs/source/api/debug.rst          |   2 +-
 docs/source/plugin_development.rst |   7 +-
 docs/source/troubleshooting.rst    |   2 +-
 pyproject.toml                     |   3 +-
 rapids_cli/hardware.py             |  32 ++++-----
 rapids_cli/tests/test_hardware.py  | 111 +++++++++++++++--------------
 8 files changed, 83 insertions(+), 79 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index d73a716..fec93ed 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -111,7 +111,7 @@ The doctor command discovers and runs checks via Python entry points defined in
 ### Key Dependencies
 
 - `rich` and `rich-click` for terminal output and CLI interface
-- `pynvml` (nvidia-ml-py) for GPU information
+- `cuda.core` (`cuda.core.system`) for GPU information
 - `cuda-pathfinder` for locating CUDA installations
 - `psutil` for system memory checks
 
diff --git a/dependencies.yaml b/dependencies.yaml
index 3afec7e..41640ed 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -61,12 +61,11 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - cuda-core >=0.6.0
+          - cuda-core>=1.0.0
           # NVML APIs we use via cuda.core.system landed in cuda-bindings
           # 12.9.6 (CUDA 12) and 13.2.0 (CUDA 13). The 13.0/13.1
           # wheels pre-date the 13.x landing and are excluded.
           - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
-          - nvidia-ml-py>=12.0
           - cuda-pathfinder >=1.2.3
           - packaging
           - psutil
diff --git a/docs/source/api/debug.rst b/docs/source/api/debug.rst
index aa4e84b..b54b27b 100644
--- a/docs/source/api/debug.rst
+++ b/docs/source/api/debug.rst
@@ -10,7 +10,7 @@ for troubleshooting RAPIDS installations.
 :func:`~rapids_cli.debug.debug.run_debug` is the main entry point. It collects:
 
 - Platform and OS details (from ``platform`` and ``/etc/os-release``)
-- NVIDIA driver and CUDA versions (via ``pynvml``)
+- NVIDIA driver and CUDA versions (via ``cuda.core``)
 - CUDA runtime path (via ``cuda-pathfinder``)
 - System CUDA toolkit locations (globbing ``/usr/local/cuda*``)
 - Python version and hash info
diff --git a/docs/source/plugin_development.rst b/docs/source/plugin_development.rst
index d5b5e45..c59d1af 100644
--- a/docs/source/plugin_development.rst
+++ b/docs/source/plugin_development.rst
@@ -95,14 +95,13 @@ GPU memory requirement check:
 
 .. code-block:: python
 
-   import pynvml
+   from cuda.core import system
 
 
    def gpu_memory_check(verbose=False, **kwargs):
        """Check that GPU has at least 8GB memory."""
-       pynvml.nvmlInit()
-       handle = pynvml.nvmlDeviceGetHandleByIndex(0)
-       mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
+       device = system.Device(0)
+       mem = device.memory_info
        available_gb = mem.total / (1024**3)
 
        if available_gb < 8:
diff --git a/docs/source/troubleshooting.rst b/docs/source/troubleshooting.rst
index 5da7f2c..54243c1 100644
--- a/docs/source/troubleshooting.rst
+++ b/docs/source/troubleshooting.rst
@@ -19,7 +19,7 @@ No GPUs Detected
 
    .. code-block:: bash
 
-      python -c "import pynvml; pynvml.nvmlInit(); print(pynvml.nvmlDeviceGetCount())"
+      python -c "from cuda.core import system; print(system.get_num_devices())"
 
 3. If running in a container, ensure GPU passthrough is enabled:
 
diff --git a/pyproject.toml b/pyproject.toml
index b2c0d4b..1d076a2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,10 +8,9 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*",
-    "cuda-core >=0.6.0",
+    "cuda-core>=1.0.0",
     "cuda-pathfinder >=1.2.3",
     "importlib-metadata >= 4.13.0; python_version < '3.12'",
-    "nvidia-ml-py>=12.0",
     "packaging",
     "psutil",
     "pyyaml",
diff --git a/rapids_cli/hardware.py b/rapids_cli/hardware.py
index 947b986..1e40405 100644
--- a/rapids_cli/hardware.py
+++ b/rapids_cli/hardware.py
@@ -63,7 +63,7 @@ def cuda_runtime_path(self) -> str | None:
 
 
 class NvmlGpuInfo:
-    """Real GPU info provider backed by pynvml.
+    """Real GPU info provider backed by cuda.core.system.
 
     Lazily loads all device information on first property access and caches results.
     """
@@ -80,37 +80,37 @@ def _ensure_loaded(self) -> None:
         if self._loaded:
             return
 
-        import pynvml
+        from cuda.core import system
 
         try:
-            pynvml.nvmlInit()
-        except pynvml.NVMLError as e:
+            self._device_count = system.get_num_devices()
+        except system.NvmlError as e:
             raise HardwareInfoError("Unable to initialize GPU driver (NVML)") from e
 
-        self._device_count = pynvml.nvmlDeviceGetCount()
-        self._cuda_driver_version = pynvml.nvmlSystemGetCudaDriverVersion()
-        self._driver_version = pynvml.nvmlSystemGetDriverVersion()
+        cuda_driver_version = system.get_user_mode_driver_version()
+        self._cuda_driver_version = cuda_driver_version[0] * 1000 + cuda_driver_version[1] * 10
+        driver_version = system.get_kernel_mode_driver_version()
+        self._driver_version = ".".join(str(x) for x in driver_version[:2])
 
         self._devices = []
-        for i in range(self._device_count):
-            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-            major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
-            memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+        for device in system.Device.get_all_devices():
+            major, minor = device.cuda_compute_capability
+            memory_info = device.memory_info
 
             nvlink_states: list[bool] = []
-            for link_id in range(pynvml.NVML_NVLINK_MAX_LINKS):
+            for link_id in range(system.NvlinkInfo.max_links):
                 try:
-                    state = pynvml.nvmlDeviceGetNvLinkState(handle, link_id)
+                    state = device.get_nvlink(link_id).state
                     nvlink_states.append(bool(state))
                 except (
-                    pynvml.NVMLError_InvalidArgument,
-                    pynvml.NVMLError_NotSupported,
+                    system.InvalidArgumentError,
+                    system.NotSupportedError,
                 ):
                     break
 
             self._devices.append(
                 DeviceInfo(
-                    index=i,
+                    index=device.index,
                     compute_capability=(major, minor),
                     memory_total_bytes=memory_info.total,
                     nvlink_states=nvlink_states,
diff --git a/rapids_cli/tests/test_hardware.py b/rapids_cli/tests/test_hardware.py
index 215d68c..12bdfb9 100644
--- a/rapids_cli/tests/test_hardware.py
+++ b/rapids_cli/tests/test_hardware.py
@@ -2,7 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 from unittest.mock import MagicMock, patch
 
-import pynvml
+from cuda.bindings import nvml
+from cuda.core import system
 import pytest
 
 from rapids_cli.hardware import (
@@ -25,8 +26,8 @@
 
 def test_nvml_gpu_info_init_failure():
     with patch(
-        "pynvml.nvmlInit",
-        side_effect=pynvml.NVMLError(pynvml.NVML_ERROR_DRIVER_NOT_LOADED),
+        "cuda.bindings.nvml.init_v2",
+        side_effect=nvml.NvmlError(nvml.Return.ERROR_DRIVER_NOT_LOADED),
     ):
         gpu_info = NvmlGpuInfo()
         with pytest.raises(HardwareInfoError, match="Unable to initialize GPU driver"):
@@ -34,21 +35,20 @@ def test_nvml_gpu_info_init_failure():
 
 
 def test_nvml_gpu_info_loads_once():
-    mock_handle = MagicMock()
+    mock_device = MagicMock()
+    mock_device.cuda_compute_capability = (7, 5)
     mock_memory = MagicMock()
+    mock_device.memory_info = mock_memory
     mock_memory.total = 16 * 1024**3
+    mock_nvlink_info = MagicMock()
+    mock_nvlink_info.max_links = 3
 
     with (
-        patch("pynvml.nvmlInit") as mock_init,
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54"),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(7, 5)),
-        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory),
-        patch(
-            "pynvml.nvmlDeviceGetNvLinkState", side_effect=pynvml.NVMLError_NotSupported
-        ),
+        patch("cuda.core.system.NvlinkInfo", mock_nvlink_info),
+        patch("cuda.core.system.get_num_devices", return_value=1),
+        patch("cuda.core.system.get_user_mode_driver_version", return_value=(12, 5)),
+        patch("cuda.core.system.get_kernel_mode_driver_version", return_value=(550, 54, 0)),
+        patch("cuda.core.system.Device", return_value=mock_device),
     ):
         gpu_info = NvmlGpuInfo()
         # Access multiple properties to verify caching
@@ -56,24 +56,24 @@ def test_nvml_gpu_info_loads_once():
         _ = gpu_info.devices
         _ = gpu_info.cuda_driver_version
         _ = gpu_info.driver_version
-        # nvmlInit should be called exactly once
-        mock_init.assert_called_once()
 
 
 def test_nvml_gpu_info_device_data():
-    mock_handle = MagicMock()
+    mock_device = MagicMock()
+    mock_device.cuda_compute_capability = (9, 0)
+    mock_device.get_all_devices.return_value = [mock_device, mock_device]
     mock_memory = MagicMock()
+    mock_device.memory_info = mock_memory
     mock_memory.total = 24 * 1024**3
+    mock_nvlink_info = MagicMock()
+    mock_device.get_nvlink.side_effect = lambda link_id: MagicMock(state=True)
+    mock_nvlink_info.max_links = 3
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12060),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="560.10"),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(9, 0)),
-        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory),
-        patch("pynvml.nvmlDeviceGetNvLinkState", return_value=1),
+        patch("cuda.core.system.get_num_devices", return_value=2),
+        patch("cuda.core.system.get_user_mode_driver_version", return_value=(12, 6)),
+        patch("cuda.core.system.get_kernel_mode_driver_version", return_value=(560, 10, 0)),
+        patch("cuda.core.system.Device", mock_device),
     ):
         gpu_info = NvmlGpuInfo()
         assert gpu_info.device_count == 2
@@ -85,45 +85,52 @@ def test_nvml_gpu_info_device_data():
 
 
 def test_nvml_gpu_info_nvlink_states():
-    mock_handle = MagicMock()
-    mock_memory = MagicMock()
-    mock_memory.total = 16 * 1024**3
-
-    def nvlink_side_effect(handle, link_id):
+    def nvlink_side_effect(link_id):
         if link_id < 2:
-            return 1
-        raise pynvml.NVMLError_NotSupported()
+            nvlink_info = MagicMock()
+            nvlink_info.state = True
+            return nvlink_info
+        raise system.NotSupportedError(nvml.Return.ERROR_NOT_SUPPORTED)
+
+    mock_device = MagicMock()
+    mock_device.cuda_compute_capability = (9, 0)
+    mock_device.get_all_devices.return_value = [mock_device, mock_device]
+    mock_memory = MagicMock()
+    mock_device.memory_info = mock_memory
+    mock_memory.total = 24 * 1024**3
+    mock_nvlink_info = MagicMock()
+    mock_device.get_nvlink.side_effect = nvlink_side_effect
+    mock_nvlink_info.max_links = 3
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54"),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(7, 5)),
-        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory),
-        patch("pynvml.nvmlDeviceGetNvLinkState", side_effect=nvlink_side_effect),
+        patch("cuda.core.system.get_num_devices", return_value=2),
+        patch("cuda.core.system.get_user_mode_driver_version", return_value=(12, 6)),
+        patch("cuda.core.system.get_kernel_mode_driver_version", return_value=(560, 10, 0)),
+        patch("cuda.core.system.Device", mock_device),
     ):
         gpu_info = NvmlGpuInfo()
         assert gpu_info.devices[0].nvlink_states == [True, True]
 
 
 def test_nvml_gpu_info_no_nvlink():
-    mock_handle = MagicMock()
+    def nvlink_side_effect(link_id):
+        raise system.NotSupportedError(nvml.Return.ERROR_NOT_SUPPORTED)
+
+    mock_device = MagicMock()
+    mock_device.cuda_compute_capability = (9, 0)
+    mock_device.get_all_devices.return_value = [mock_device, mock_device]
     mock_memory = MagicMock()
-    mock_memory.total = 16 * 1024**3
+    mock_device.memory_info = mock_memory
+    mock_memory.total = 24 * 1024**3
+    mock_nvlink_info = MagicMock()
+    mock_device.get_nvlink.side_effect = nvlink_side_effect
+    mock_nvlink_info.max_links = 3
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54"),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(7, 5)),
-        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory),
-        patch(
-            "pynvml.nvmlDeviceGetNvLinkState", side_effect=pynvml.NVMLError_NotSupported
-        ),
+        patch("cuda.core.system.get_num_devices", return_value=2),
+        patch("cuda.core.system.get_user_mode_driver_version", return_value=(12, 6)),
+        patch("cuda.core.system.get_kernel_mode_driver_version", return_value=(560, 10, 0)),
+        patch("cuda.core.system.Device", mock_device),
     ):
         gpu_info = NvmlGpuInfo()
         assert gpu_info.devices[0].nvlink_states == []