From 11a2124a17497beec5ce7273b1b525823ce81fc0 Mon Sep 17 00:00:00 2001 From: gilbertlee-amd <44450918+gilbertlee-amd@users.noreply.github.com> Date: Thu, 26 Mar 2026 00:29:54 -0600 Subject: [PATCH 1/8] Minor fix for potential filesystem::canonical issue (#249) --- src/header/TransferBench.hpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/header/TransferBench.hpp b/src/header/TransferBench.hpp index 245781f3..113de968 100644 --- a/src/header/TransferBench.hpp +++ b/src/header/TransferBench.hpp @@ -2595,9 +2595,9 @@ static bool IsConfiguredGid(union ibv_gid const& gid) } ibvDevice.busId = ""; { - std::string device_path(ibvDevice.devicePtr->dev_path); + std::string device_path = std::string(ibvDevice.devicePtr->dev_path) + "/device"; if (std::filesystem::exists(device_path)) { - std::string pciPath = std::filesystem::canonical(device_path + "/device").string(); + std::string pciPath = std::filesystem::canonical(device_path).string(); std::size_t pos = pciPath.find_last_of('/'); if (pos != std::string::npos) { ibvDevice.busId = pciPath.substr(pos + 1); @@ -2608,9 +2608,8 @@ static bool IsConfiguredGid(union ibv_gid const& gid) // Get nearest numa node for this device ibvDevice.numaNode = -1; std::filesystem::path devicePath = "/sys/bus/pci/devices/" + ibvDevice.busId + "/numa_node"; - std::string canonicalPath = std::filesystem::canonical(devicePath).string(); - - if (std::filesystem::exists(canonicalPath)) { + if (std::filesystem::exists(devicePath)) { + std::string canonicalPath = std::filesystem::canonical(devicePath).string(); std::ifstream file(canonicalPath); if (file.is_open()) { std::string numaNodeStr; @@ -2658,13 +2657,15 @@ static bool IsConfiguredGid(union ibv_gid const& gid) std::string const& description, PCIeNode& root) { - std::filesystem::path devicePath = "/sys/bus/pci/devices/" + pcieAddress; - std::string canonicalPath = std::filesystem::canonical(devicePath).string(); + std::string lowerAddress = pcieAddress; + std::transform(lowerAddress.begin(), lowerAddress.end(), lowerAddress.begin(), ::tolower); + std::filesystem::path devicePath = "/sys/bus/pci/devices/" + lowerAddress; if (!std::filesystem::exists(devicePath)) { return {ERR_FATAL, "Device path %s does not exist", devicePath.c_str()}; } + std::string canonicalPath = std::filesystem::canonical(devicePath).string(); std::istringstream iss(canonicalPath); std::string token; From 73427c51d512d9f7c485cac06939774bf9c9a46c Mon Sep 17 00:00:00 2001 From: Arm Patinyasakdikul Date: Thu, 23 Apr 2026 17:30:36 -0500 Subject: [PATCH 2/8] Fix portability for AlmaLinux 8 / manylinux_2_28 (gcc 8) (#261) gcc 8.5 (AlmaLinux 8 base, used by quay.io/pypa/manylinux_2_28_x86_64) is older than what Ubuntu 22.04 ships and doesn't pull in some headers transitively or expose std::filesystem from the default libstdc++. - Drop unused #include from TransferBench.hpp. is C++20 and requires libstdc++ >= 10. std::barrier was never used. - Add explicit #include , , to Utilities.hpp, NicPeerToPeer.hpp, AllToAll.hpp, AllToAllN.hpp. Older libstdc++ does not pull these in via other standard headers. - Link libstdc++fs unconditionally for std::filesystem. gcc < 9 ships it as a separate static archive in a gcc-private libdir (e.g. /usr/lib/gcc/x86_64-redhat-linux/8/libstdc++fs.a), which CMake's find_library does not search. Bare target_link_libraries(... stdc++fs) lets the compiler driver find it. On gcc 9+ it resolves to a no-op stub archive shipped for compatibility, so it's harmless elsewhere. Verified by reproducing the build inside quay.io/pypa/manylinux_2_28_x86_64 locally. Co-authored-by: Claude Opus 4 --- CMakeLists.txt | 4 ++++ src/client/Presets/AllToAll.hpp | 2 ++ src/client/Presets/AllToAllN.hpp | 1 + src/client/Presets/NicPeerToPeer.hpp | 2 ++ src/client/Utilities.hpp | 1 + src/header/TransferBench.hpp | 1 - 6 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b533f01a..2da6baa9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -259,6 +259,10 @@ target_link_libraries(TransferBench PRIVATE dl) target_link_libraries(TransferBench PRIVATE ${NUMA_LIBRARY}) target_link_libraries(TransferBench PRIVATE ${HSA_LIBRARY}) +# gcc <9 ships std::filesystem in a separate library (libstdc++fs). +# Required on AlmaLinux 8 / manylinux_2_28; harmless no-op stub on newer toolchains. +target_link_libraries(TransferBench PRIVATE stdc++fs) + rocm_install(TARGETS TransferBench COMPONENT devel) rocm_setup_version(VERSION ${VERSION_STRING}) diff --git a/src/client/Presets/AllToAll.hpp b/src/client/Presets/AllToAll.hpp index 49ea5db7..2beae8af 100644 --- a/src/client/Presets/AllToAll.hpp +++ b/src/client/Presets/AllToAll.hpp @@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + int AllToAllPreset(EnvVars& ev, size_t const numBytesPerTransfer, std::string const presetName) diff --git a/src/client/Presets/AllToAllN.hpp b/src/client/Presets/AllToAllN.hpp index 82887470..7dac6b22 100644 --- a/src/client/Presets/AllToAllN.hpp +++ b/src/client/Presets/AllToAllN.hpp @@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include "EnvVars.hpp" int AllToAllRdmaPreset(EnvVars& ev, diff --git a/src/client/Presets/NicPeerToPeer.hpp b/src/client/Presets/NicPeerToPeer.hpp index 11463f53..24f5d71f 100644 --- a/src/client/Presets/NicPeerToPeer.hpp +++ b/src/client/Presets/NicPeerToPeer.hpp @@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + // Helper functions // Returns a schedule of round robin pairing of N elements, using Circle Method diff --git a/src/client/Utilities.hpp b/src/client/Utilities.hpp index 7604c7ae..0ba93fc6 100644 --- a/src/client/Utilities.hpp +++ b/src/client/Utilities.hpp @@ -21,6 +21,7 @@ THE SOFTWARE. */ #pragma once +#include #include #include #include "TransferBench.hpp" diff --git a/src/header/TransferBench.hpp b/src/header/TransferBench.hpp index 113de968..7b74dc5b 100644 --- a/src/header/TransferBench.hpp +++ b/src/header/TransferBench.hpp @@ -25,7 +25,6 @@ THE SOFTWARE. #include #include #include -#include #include #include #include From 04e62a58f839f3190eeefaa89342d70af1146ffd Mon Sep 17 00:00:00 2001 From: Arm Patinyasakdikul Date: Fri, 24 Apr 2026 13:59:20 -0500 Subject: [PATCH 3/8] CI: build relocatable packages via TheRock SDK (#262) * Add GitHub Actions workflow for relocatable package builds via TheRock Adds a CI workflow that builds DEB/RPM/TGZ packages of TransferBench against the TheRock nightly ROCm SDK, modeled on the ROCmValidationSuite packaging workflow. Packages install to /opt/rocm/extras- with $ORIGIN-relative RPATH so they are relocatable. - build_packages_local.sh: single source of truth for both local and CI builds. Detects Ubuntu vs AlmaLinux/manylinux, installs deps, fetches TheRock SDK tarball, configures CMake with relocatable RPATH and the new BUILD_RELOCATABLE_PACKAGE option, builds, and invokes CPack for DEB/RPM/TGZ. - .github/workflows/build-relocatable-packages.yml: parallel Ubuntu 22.04 + manylinux_2_28 jobs triggered on push, PR, daily cron, and workflow_dispatch. OIDC-based S3 upload gated on AWS_S3_BUCKET being set; apt/yum repo metadata generated for non-PR builds. Build report artifact summarizes S3 paths. - .github/workflows/README_BUILD_PACKAGES.md: workflow docs covering triggers, local usage, S3 layout, IAM trust policy, and apt/yum install snippets. - CMakeLists.txt: new BUILD_RELOCATABLE_PACKAGE option that bypasses rocm_install/rocm_create_package, names the package amdrocm-transferbench, and honors caller-set install prefix and CPACK_*_PACKAGE_RELEASE env vars. Default cmake .. behavior is unchanged. --- .github/workflows/README_BUILD_PACKAGES.md | 226 ++++++++++++++ .../workflows/build-relocatable-packages.yml | 294 ++++++++++++++++++ CMakeLists.txt | 80 ++++- build_packages_local.sh | 243 +++++++++++++++ 4 files changed, 830 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/README_BUILD_PACKAGES.md create mode 100644 .github/workflows/build-relocatable-packages.yml create mode 100755 build_packages_local.sh diff --git a/.github/workflows/README_BUILD_PACKAGES.md b/.github/workflows/README_BUILD_PACKAGES.md new file mode 100644 index 00000000..2490f1b7 --- /dev/null +++ b/.github/workflows/README_BUILD_PACKAGES.md @@ -0,0 +1,226 @@ +# Building Relocatable TransferBench Packages with GitHub Actions + +This document describes the GitHub Actions workflow for building relocatable +TransferBench packages using the ROCm SDK from +[TheRock](https://github.com/ROCm/TheRock). + +The workflow (`.github/workflows/build-relocatable-packages.yml`) and the +`build_packages_local.sh` script at the repo root produce: + +- **DEB** packages for Ubuntu/Debian +- **RPM** packages for AlmaLinux/Rocky/RHEL (built in `manylinux_2_28`) +- **TGZ** archives for any Linux distribution + +All packages install to `/opt/rocm/extras-` and use relocatable +`$ORIGIN`-relative `RPATH` so the install tree itself can be moved without +hard-coded library paths. These artifacts are **not** fully self-contained: +target systems must still provide the required ROCm/HSA runtime libraries +(declared as package dependencies: `hsa-rocr` and `numactl`). + +This workflow is modeled on the +[ROCmValidationSuite packaging workflow](https://github.com/ROCm/ROCmValidationSuite/blob/master/.github/workflows/README_BUILD_PACKAGES.md). + +## Workflow Triggers + +| Trigger | Behavior | +|---------|----------| +| Push to `develop`, `mainline`, `release/**` | Build + upload to S3 (if configured) + regenerate apt/yum repo metadata | +| Pull request to `develop`, `mainline` | Build + upload to ref-specific S3 path (no repo metadata) | +| Schedule (daily 13:00 UTC) | Same as push, with auto-fetched latest ROCm | +| `workflow_dispatch` | Manual trigger with `rocm_version` and `gpu_family` inputs | + +### Manual trigger inputs + +- **`rocm_version`** (e.g. `7.11.0a20260121`). Empty = auto-fetch latest from TheRock. +- **`gpu_family`** — one of: + - `gfx94X-dcgpu` (MI300A/MI300X) — **default** + - `gfx950-dcgpu` (MI350X/MI355X) + - `gfx110X-all` (RX 7900 XTX, 7800 XT, 7700S, Radeon 780M) + - `gfx120X-all` (RX 9060/XT, 9070/XT) + - `gfx1151` (Strix Halo iGPU) + +## Build features enabled in CI + +The workflow always builds with: + +- `ENABLE_NIC_EXEC=OFF` — RDMA NIC executor disabled (would require libibverbs.so.1 at runtime; not bundled by TheRock SDK) +- `ENABLE_MPI_COMM=OFF` — MPI multi-node communicator disabled (would require OpenMPI at runtime; not bundled by TheRock SDK). Packages are built to run out of the box with only `numactl`/`libnuma1` from the OS. +- `DISABLE_DMABUF=OFF` — DMA-BUF support for GPU Direct RDMA +- `BUILD_RELOCATABLE_PACKAGE=ON` — RVS-style install prefix + package naming +- `GPU_TARGETS` — full data-center + consumer set (gfx906, 908, 90a, 942, 950, 1030, 1100/01/02, 1150/51, 1200/01) + +## Local builds + +The same script the workflow uses also works locally: + +```bash +# Auto-fetch latest ROCm +sudo ./build_packages_local.sh + +# Pin a specific version (use sudo -E to preserve env) +sudo -E ROCM_VERSION=7.11.0a20260121 GPU_FAMILY=gfx94X-dcgpu ./build_packages_local.sh + +# Debug build +sudo -E BUILD_TYPE=Debug ./build_packages_local.sh +``` + +`sudo` is required because the script installs system packages +(`libnuma-dev`, `libibverbs-dev`, `libopenmpi-dev`, etc). + +After the script completes, packages live under `build/`: + +``` +build/amdrocm7-transferbench_1.66.02-_amd64.deb +build/amdrocm7-transferbench-1.66.02-.x86_64.rpm +build/amdrocm7-transferbench-1.66.02-Linux.tar.gz +``` + +## Installing built packages + +### Ubuntu / Debian + +```bash +sudo dpkg -i build/amdrocm7-transferbench_*.deb +/opt/rocm/extras-7/bin/TransferBench +``` + +### Rocky / RHEL / AlmaLinux + +```bash +sudo rpm -i --replacefiles --nodeps build/amdrocm7-transferbench-*.rpm +/opt/rocm/extras-7/bin/TransferBench +``` + +### Any Linux (TGZ — relocatable install tree, requires ROCm runtime on target) + +```bash +sudo mkdir -p /opt/rocm/extras-7 +sudo tar -xzf build/amdrocm7-transferbench-*.tar.gz -C /opt/rocm/extras-7 --strip-components=1 +export PATH=/opt/rocm/extras-7/bin:$PATH +TransferBench +``` + +## S3 upload (OIDC) + +S3 upload runs only when: +- The repository is `ROCm/TransferBench`, **and** +- The `AWS_S3_BUCKET` repository variable is set. + +Upload uses **AWS OIDC** — no long-term keys are stored in the repo. + +### S3 path layout + +| Trigger | Path | +|---------|------| +| `release/*` push or dispatch | `release/transferbench/{deb,rpm,tar}/` | +| Schedule, push to `develop`/`mainline`, dispatch on non-release | `nightly/transferbench/{deb,rpm,tar}/` | +| Pull request (same repo) | `transferbench///{ubuntu-22.04,manylinux_2_28}/` | + +### Required repository setup + +In **Settings → Secrets and variables → Actions**: + +**Secrets tab:** +- `AWS_ROLE_ARN` — IAM role ARN with OIDC trust for this repo (e.g. `arn:aws:iam::123456789012:role/rocm-transferbench-s3-upload`) + +**Variables tab:** +- `AWS_S3_BUCKET` — bucket name (e.g. `rocm-transferbench-packages`) +- `RUNNER_LABEL` (optional) — override Ubuntu runner label (default `ubuntu-22.04`) +- `RUNNER_LABEL_CONTAINER` (optional) — override container-job runner label (default `ubuntu-latest`) +- `RUNNER_LABEL_UTILITY` (optional) — override summary-job runner label (default `ubuntu-latest`) + +### IAM role trust policy + +The role in `AWS_ROLE_ARN` must trust GitHub's OIDC provider: + +```json +{ + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam:::oidc-provider/token.actions.githubusercontent.com" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { "token.actions.githubusercontent.com:aud": "sts.amazonaws.com" }, + "StringLike": { "token.actions.githubusercontent.com:sub": "repo:ROCm/TransferBench:*" } + } + }] +} +``` + +Permissions needed: `s3:PutObject`, `s3:GetObject`, `s3:ListBucket`, `s3:DeleteObject` on the bucket. + +## Using the S3 paths as apt / yum repos + +Push and scheduled builds also publish APT / YUM metadata so the S3 paths +work directly as native package repositories. + +### apt (Ubuntu / Debian) + +```bash +echo "deb [trusted=yes] https://.s3.amazonaws.com/nightly/transferbench/deb/ ./" \ + | sudo tee /etc/apt/sources.list.d/transferbench-nightly.list +sudo apt update +sudo apt install amdrocm7-transferbench +``` + +### yum / dnf (Rocky / RHEL / AlmaLinux) + +```bash +sudo tee /etc/yum.repos.d/transferbench-nightly.repo <<'EOF' +[transferbench-nightly] +name=TransferBench Nightly +baseurl=https://.s3.amazonaws.com/nightly/transferbench/rpm/ +enabled=1 +gpgcheck=0 +EOF +sudo dnf install amdrocm7-transferbench +``` + +> **Note:** `[trusted=yes]` / `gpgcheck=0` skip GPG verification. For +> production deployments, sign packages and metadata with a GPG key. + +## Verifying RPATH + +```bash +readelf -d /opt/rocm/extras-7/bin/TransferBench | grep -E 'RPATH|RUNPATH' +# Should contain $ORIGIN, $ORIGIN/../lib, /opt/rocm/extras-7/lib +``` + +## Troubleshooting + +### S3 step fails with "Credentials could not be loaded" + +- PR from a fork: OIDC is unavailable; the upload step is skipped. +- Same-repo: confirm `AWS_ROLE_ARN` secret is set and the role's trust + policy allows `repo:ROCm/TransferBench:*`. + +### Build fails: missing `libibverbs.h` / `mpi.h` + +The packaged builds disable both `ENABLE_NIC_EXEC` and `ENABLE_MPI_COMM`, so these +headers are not required. If you've manually re-enabled either flag for a local +build, install the dev packages yourself: + +```bash +# Ubuntu — for ENABLE_NIC_EXEC=ON +sudo apt install -y libibverbs-dev rdma-core +# Ubuntu — for ENABLE_MPI_COMM=ON +sudo apt install -y libopenmpi-dev openmpi-bin +# Rocky/RHEL +sudo dnf install -y rdma-core-devel openmpi-devel +``` + +### TheRock tarball download 404s + +Check available builds at +. Set +`ROCM_VERSION` explicitly to a known-good version. + +## References + +- [TheRock Releases](https://github.com/ROCm/TheRock/blob/main/RELEASES.md) +- [TheRock nightly tarballs](https://therock-nightly-tarball.s3.amazonaws.com/index.html) +- [ROCmValidationSuite packaging workflow](https://github.com/ROCm/ROCmValidationSuite/blob/master/.github/workflows/README_BUILD_PACKAGES.md) — reference implementation +- [TransferBench README](../../README.md) diff --git a/.github/workflows/build-relocatable-packages.yml b/.github/workflows/build-relocatable-packages.yml new file mode 100644 index 00000000..2f9d6a4b --- /dev/null +++ b/.github/workflows/build-relocatable-packages.yml @@ -0,0 +1,294 @@ +name: Build Relocatable Packages + +on: + push: + branches: [develop, mainline, 'release/**', candidate] + pull_request: + branches: [develop, mainline] + schedule: + # Daily at 13:00 UTC (5:00 AM PST) + - cron: '0 13 * * *' + workflow_dispatch: + inputs: + rocm_version: + description: 'ROCm version (empty = auto-fetch latest from TheRock)' + required: false + default: '' + gpu_family: + description: 'GPU family target' + required: false + default: 'gfx94X-dcgpu' + type: choice + options: + - gfx94X-dcgpu + - gfx950-dcgpu + - gfx110X-all + - gfx120X-all + - gfx1151 + +permissions: + contents: read + id-token: write # Required for OIDC S3 upload + +env: + ROCM_VERSION: ${{ github.event.inputs.rocm_version || '' }} + GPU_FAMILY: ${{ github.event.inputs.gpu_family || 'gfx94X-dcgpu' }} + BUILD_TYPE: Release + +jobs: + # ============================================================ + # Ubuntu 22.04 — DEB + TGZ + # ============================================================ + build-ubuntu: + name: Build (Ubuntu 22.04) + runs-on: ${{ vars.RUNNER_LABEL || 'ubuntu-22.04' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 # for branch.commit version tags + + - name: Set environment + run: | + echo "ROCM_VERSION=${ROCM_VERSION}" >> "$GITHUB_ENV" + echo "GPU_FAMILY=${GPU_FAMILY}" >> "$GITHUB_ENV" + echo "BUILD_TYPE=${BUILD_TYPE}" >> "$GITHUB_ENV" + + - name: Build packages + run: | + chmod +x ./build_packages_local.sh + sudo -E ROCM_VERSION="${ROCM_VERSION}" \ + GPU_FAMILY="${GPU_FAMILY}" \ + BUILD_TYPE="${BUILD_TYPE}" \ + GITHUB_RUN_NUMBER="${GITHUB_RUN_NUMBER}" \ + GITHUB_REF_NAME="${GITHUB_REF_NAME}" \ + ./build_packages_local.sh + + - name: Verify DEB package + run: | + shopt -s nullglob + for deb in build/amdrocm*-transferbench*.deb; do + echo "==> ${deb}" + dpkg-deb -I "${deb}" + dpkg-deb -c "${deb}" | head -50 + done + + - name: Upload artifacts (always, for inspection) + uses: actions/upload-artifact@v4 + with: + name: ubuntu-22.04-packages + path: | + build/amdrocm*-transferbench*.deb + build/amdrocm*-transferbench*.tar.gz + if-no-files-found: error + + - name: Configure AWS credentials (OIDC) + if: github.repository == 'ROCm/TransferBench' && vars.AWS_S3_BUCKET != '' + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: us-east-1 + + - name: Upload to S3 + if: github.repository == 'ROCm/TransferBench' && vars.AWS_S3_BUCKET != '' + env: + AWS_S3_BUCKET: ${{ vars.AWS_S3_BUCKET }} + run: | + set -euo pipefail + if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then + S3_PATH="s3://${AWS_S3_BUCKET}/transferbench/${GITHUB_HEAD_REF//\//_}/${GITHUB_RUN_NUMBER}/ubuntu-22.04" + METADATA="skip" + elif [[ "${GITHUB_REF_NAME}" == release/* ]]; then + S3_PATH="s3://${AWS_S3_BUCKET}/release/transferbench/deb" + METADATA="generate" + else + S3_PATH="s3://${AWS_S3_BUCKET}/nightly/transferbench/deb" + METADATA="generate" + fi + echo "S3_DEB_PATH=${S3_PATH}" >> "$GITHUB_ENV" + echo "DEB_METADATA=${METADATA}" >> "$GITHUB_ENV" + aws s3 cp build/ "${S3_PATH}/" --recursive --exclude "*" \ + --include "amdrocm*-transferbench*.deb" \ + --include "amdrocm*-transferbench*.tar.gz" + echo "Uploaded to ${S3_PATH}" + + - name: Generate apt repo metadata + if: github.repository == 'ROCm/TransferBench' && env.DEB_METADATA == 'generate' + env: + AWS_S3_BUCKET: ${{ vars.AWS_S3_BUCKET }} + run: | + set -euo pipefail + WORK="$(mktemp -d)" + aws s3 sync "${S3_DEB_PATH}/" "${WORK}/" --exclude "*" --include "*.deb" + pushd "${WORK}" >/dev/null + dpkg-scanpackages -m . /dev/null > Packages + gzip -kf Packages + { + echo "Origin: ROCm-TransferBench" + echo "Label: ROCm TransferBench Packages" + echo "Suite: stable" + echo "Codename: stable" + echo "Architectures: amd64" + echo "Components: main" + echo "Description: TransferBench DEB packages built from TheRock SDK" + echo "Date: $(date -Ru)" + } > Release + aws s3 cp Packages "${S3_DEB_PATH}/Packages" + aws s3 cp Packages.gz "${S3_DEB_PATH}/Packages.gz" + aws s3 cp Release "${S3_DEB_PATH}/Release" + popd >/dev/null + + # ============================================================ + # manylinux_2_28 (AlmaLinux 8) — RPM + TGZ + # ============================================================ + build-manylinux: + name: Build (manylinux_2_28) + runs-on: ${{ vars.RUNNER_LABEL_CONTAINER || 'ubuntu-latest' }} + container: + image: quay.io/pypa/manylinux_2_28_x86_64 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Set environment + run: | + echo "ROCM_VERSION=${ROCM_VERSION}" >> "$GITHUB_ENV" + echo "GPU_FAMILY=${GPU_FAMILY}" >> "$GITHUB_ENV" + echo "BUILD_TYPE=${BUILD_TYPE}" >> "$GITHUB_ENV" + + - name: Build packages + run: | + chmod +x ./build_packages_local.sh + # No sudo: container runs as root + ROCM_VERSION="${ROCM_VERSION}" \ + GPU_FAMILY="${GPU_FAMILY}" \ + BUILD_TYPE="${BUILD_TYPE}" \ + GITHUB_RUN_NUMBER="${GITHUB_RUN_NUMBER}" \ + GITHUB_REF_NAME="${GITHUB_REF_NAME}" \ + ./build_packages_local.sh + + - name: Verify RPM package + run: | + shopt -s nullglob + for rpm in build/amdrocm*-transferbench*.rpm; do + echo "==> ${rpm}" + rpm -qip "${rpm}" + rpm -qlp "${rpm}" | head -50 + done + + - name: Upload artifacts (always, for inspection) + uses: actions/upload-artifact@v4 + with: + name: manylinux_2_28-packages + path: | + build/amdrocm*-transferbench*.rpm + build/amdrocm*-transferbench*.tar.gz + if-no-files-found: error + + - name: Install AWS CLI + if: github.repository == 'ROCm/TransferBench' && vars.AWS_S3_BUCKET != '' + run: | + curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscli.zip + (cd /tmp && unzip -q awscli.zip && ./aws/install) + + - name: Configure AWS credentials (OIDC) + if: github.repository == 'ROCm/TransferBench' && vars.AWS_S3_BUCKET != '' + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: us-east-1 + + - name: Upload to S3 + if: github.repository == 'ROCm/TransferBench' && vars.AWS_S3_BUCKET != '' + env: + AWS_S3_BUCKET: ${{ vars.AWS_S3_BUCKET }} + run: | + set -euo pipefail + if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then + S3_RPM="s3://${AWS_S3_BUCKET}/transferbench/${GITHUB_HEAD_REF//\//_}/${GITHUB_RUN_NUMBER}/manylinux_2_28" + S3_TAR="${S3_RPM}" + METADATA="skip" + elif [[ "${GITHUB_REF_NAME}" == release/* ]]; then + S3_RPM="s3://${AWS_S3_BUCKET}/release/transferbench/rpm" + S3_TAR="s3://${AWS_S3_BUCKET}/release/transferbench/tar" + METADATA="generate" + else + S3_RPM="s3://${AWS_S3_BUCKET}/nightly/transferbench/rpm" + S3_TAR="s3://${AWS_S3_BUCKET}/nightly/transferbench/tar" + METADATA="generate" + fi + echo "S3_RPM_PATH=${S3_RPM}" >> "$GITHUB_ENV" + echo "S3_TAR_PATH=${S3_TAR}" >> "$GITHUB_ENV" + echo "RPM_METADATA=${METADATA}" >> "$GITHUB_ENV" + aws s3 cp build/ "${S3_RPM}/" --recursive --exclude "*" --include "amdrocm*-transferbench*.rpm" + aws s3 cp build/ "${S3_TAR}/" --recursive --exclude "*" --include "amdrocm*-transferbench*.tar.gz" + echo "Uploaded RPM to ${S3_RPM}, TGZ to ${S3_TAR}" + + - name: Generate yum repo metadata + if: github.repository == 'ROCm/TransferBench' && env.RPM_METADATA == 'generate' + env: + AWS_S3_BUCKET: ${{ vars.AWS_S3_BUCKET }} + run: | + set -euo pipefail + dnf install -y createrepo_c || yum install -y createrepo_c + WORK="$(mktemp -d)" + aws s3 sync "${S3_RPM_PATH}/" "${WORK}/" --exclude "*" --include "*.rpm" + createrepo_c "${WORK}" + aws s3 sync "${WORK}/repodata/" "${S3_RPM_PATH}/repodata/" --delete + + # ============================================================ + # Build report — collects S3 paths for browsing + # ============================================================ + release-summary: + name: Build Report + needs: [build-ubuntu, build-manylinux] + if: always() + runs-on: ${{ vars.RUNNER_LABEL_UTILITY || 'ubuntu-latest' }} + steps: + - name: Generate report + env: + AWS_S3_BUCKET: ${{ vars.AWS_S3_BUCKET }} + run: | + set -euo pipefail + mkdir -p report + { + echo "# TransferBench Build Report" + echo "" + echo "- Event: \`${GITHUB_EVENT_NAME}\`" + echo "- Ref: \`${GITHUB_REF_NAME}\`" + echo "- Run number: \`${GITHUB_RUN_NUMBER}\`" + echo "- ROCm: \`${ROCM_VERSION:-auto}\`" + echo "- GPU family: \`${GPU_FAMILY}\`" + echo "- Ubuntu job: \`${{ needs.build-ubuntu.result }}\`" + echo "- manylinux: \`${{ needs.build-manylinux.result }}\`" + echo "" + if [[ -n "${AWS_S3_BUCKET:-}" ]]; then + echo "## S3 Upload Locations" + if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then + BASE="transferbench/${GITHUB_HEAD_REF//\//_}/${GITHUB_RUN_NUMBER}" + echo "- [DEB (Ubuntu)](https://s3.console.aws.amazon.com/s3/buckets/${AWS_S3_BUCKET}?prefix=${BASE}/ubuntu-22.04/)" + echo "- [RPM/TGZ (manylinux)](https://s3.console.aws.amazon.com/s3/buckets/${AWS_S3_BUCKET}?prefix=${BASE}/manylinux_2_28/)" + elif [[ "${GITHUB_REF_NAME}" == release/* ]]; then + echo "- [DEB](https://s3.console.aws.amazon.com/s3/buckets/${AWS_S3_BUCKET}?prefix=release/transferbench/deb/)" + echo "- [RPM](https://s3.console.aws.amazon.com/s3/buckets/${AWS_S3_BUCKET}?prefix=release/transferbench/rpm/)" + echo "- [TGZ](https://s3.console.aws.amazon.com/s3/buckets/${AWS_S3_BUCKET}?prefix=release/transferbench/tar/)" + else + echo "- [DEB](https://s3.console.aws.amazon.com/s3/buckets/${AWS_S3_BUCKET}?prefix=nightly/transferbench/deb/)" + echo "- [RPM](https://s3.console.aws.amazon.com/s3/buckets/${AWS_S3_BUCKET}?prefix=nightly/transferbench/rpm/)" + echo "- [TGZ](https://s3.console.aws.amazon.com/s3/buckets/${AWS_S3_BUCKET}?prefix=nightly/transferbench/tar/)" + fi + else + echo "_S3 upload not configured (\`AWS_S3_BUCKET\` variable not set)._" + fi + } > report/build-report.md + cat report/build-report.md >> "$GITHUB_STEP_SUMMARY" + + - name: Upload report + uses: actions/upload-artifact@v4 + with: + name: build-report + path: report/build-report.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 2da6baa9..2b6591d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on this machine option(ENABLE_NIC_EXEC "Enable RDMA NIC Executor in TransferBench" OFF) option(ENABLE_MPI_COMM "Enable MPI Communicator support" OFF) option(DISABLE_DMABUF "Disable DMA-BUF support for GPU Direct RDMA" ON) +option(BUILD_RELOCATABLE_PACKAGE "Build with RVS-style relocatable RPATH and amdrocm-transferbench package naming" OFF) # Default GPU architectures to build #================================================================================================== @@ -263,20 +264,73 @@ target_link_libraries(TransferBench PRIVATE ${HSA_LIBRARY}) # Required on AlmaLinux 8 / manylinux_2_28; harmless no-op stub on newer toolchains. target_link_libraries(TransferBench PRIVATE stdc++fs) -rocm_install(TARGETS TransferBench COMPONENT devel) -rocm_setup_version(VERSION ${VERSION_STRING}) +if(BUILD_RELOCATABLE_PACKAGE) + # RVS-style relocatable packaging: bypass rocm_install/rocm_create_package and + # drive CPack directly so CMAKE_INSTALL_PREFIX / CPACK_PACKAGING_INSTALL_PREFIX + # set by the caller (build_packages_local.sh) are honored. + if(NOT DEFINED ROCM_MAJOR_VERSION) + set(ROCM_MAJOR_VERSION "7") + endif() -# Package specific CPACK vars -rocm_package_add_dependencies(DEPENDS "numactl" "hsa-rocr") + install(TARGETS TransferBench RUNTIME DESTINATION bin COMPONENT devel) + + set(CPACK_PACKAGE_NAME "amdrocm${ROCM_MAJOR_VERSION}-transferbench") + set(CPACK_PACKAGE_VERSION "${VERSION_STRING}") + set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") + set(CPACK_PACKAGE_CONTACT "RCCL Team ") + set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "TransferBench: benchmark simultaneous transfers between CPU/GPU/NIC") + set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") + + # DEB + set(CPACK_DEBIAN_PACKAGE_NAME "${CPACK_PACKAGE_NAME}") + set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") + set(CPACK_DEBIAN_PACKAGE_DEPENDS "numactl, libnuma1, hsa-rocr") + set(CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_CONTACT}") + if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) + set(CPACK_DEBIAN_PACKAGE_RELEASE "$ENV{CPACK_DEBIAN_PACKAGE_RELEASE}") + endif() -set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") -set(CPACK_RPM_PACKAGE_LICENSE "MIT") + # RPM + set(CPACK_RPM_PACKAGE_NAME "${CPACK_PACKAGE_NAME}") + set(CPACK_RPM_PACKAGE_LICENSE "MIT") + set(CPACK_RPM_PACKAGE_REQUIRES "numactl, hsa-rocr") + set(CPACK_RPM_PACKAGE_VENDOR "${CPACK_PACKAGE_VENDOR}") + if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) + set(CPACK_RPM_PACKAGE_RELEASE "$ENV{CPACK_RPM_PACKAGE_RELEASE}") + endif() + # Use the actual install prefix (caller-controlled in relocatable mode) + # rather than hard-coded /opt/... paths. + if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX) + set(_rpm_exclude_prefix "${CPACK_PACKAGING_INSTALL_PREFIX}") + else() + set(_rpm_exclude_prefix "${CMAKE_INSTALL_PREFIX}") + endif() + set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION + "/opt" "/opt/rocm" + "${_rpm_exclude_prefix}" + "${_rpm_exclude_prefix}/bin") -set(PACKAGE_NAME TB) -set(LIBRARY_NAME TransferBench) + # TGZ + set(CPACK_ARCHIVE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-Linux") -rocm_create_package( - NAME ${LIBRARY_NAME} - DESCRIPTION "TransferBench package" - MAINTAINER "RCCL Team " -) + set(CPACK_GENERATOR "DEB;RPM;TGZ") + include(CPack) +else() + rocm_install(TARGETS TransferBench COMPONENT devel) + rocm_setup_version(VERSION ${VERSION_STRING}) + + # Package specific CPACK vars + rocm_package_add_dependencies(DEPENDS "numactl" "hsa-rocr") + + set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") + set(CPACK_RPM_PACKAGE_LICENSE "MIT") + + set(PACKAGE_NAME TB) + set(LIBRARY_NAME TransferBench) + + rocm_create_package( + NAME ${LIBRARY_NAME} + DESCRIPTION "TransferBench package" + MAINTAINER "RCCL Team " + ) +endif() diff --git a/build_packages_local.sh b/build_packages_local.sh new file mode 100755 index 00000000..d10ec86e --- /dev/null +++ b/build_packages_local.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +# +# Copyright (c) Advanced Micro Devices, Inc. All rights reserved. +# +# build_packages_local.sh — single source of truth for building relocatable +# TransferBench packages (DEB / RPM / TGZ) against TheRock ROCm SDK. +# Used by both local developers and the GitHub Actions workflow. +# +# Usage: +# sudo ./build_packages_local.sh +# sudo -E ROCM_VERSION=7.11.0a20260121 GPU_FAMILY=gfx94X-dcgpu ./build_packages_local.sh +# +# Requires root (installs system packages). + +set -euo pipefail + +# -------- pretty output -------- +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m' +log() { echo -e "${BLUE}[INFO]${NC} $*"; } +ok() { echo -e "${GREEN}[ OK ]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +err() { echo -e "${RED}[FAIL]${NC} $*" >&2; } + +trap 'err "Build failed at line $LINENO"' ERR + +# -------- root check -------- +if [[ ${EUID} -ne 0 ]]; then + err "This script installs system packages and must run as root. Re-run with: sudo -E $0" + exit 1 +fi + +# -------- inputs -------- +ROCM_VERSION="${ROCM_VERSION:-}" # empty => auto-fetch latest +GPU_FAMILY="${GPU_FAMILY:-gfx94X-dcgpu}" +BUILD_TYPE="${BUILD_TYPE:-Release}" +GITHUB_RUN_NUMBER="${GITHUB_RUN_NUMBER:-1}" + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BUILD_DIR="${REPO_ROOT}/build" +SDK_DIR="${HOME}/rocm-sdk" +ROCM_PATH="${SDK_DIR}/install" + +# Default GPU targets baked into every package, regardless of GPU_FAMILY tarball. +DEFAULT_GPU_TARGETS="gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201" +GPU_TARGETS="${GPU_TARGETS:-$DEFAULT_GPU_TARGETS}" + +# -------- detect OS -------- +if [[ -f /etc/os-release ]]; then + # shellcheck disable=SC1091 + . /etc/os-release + OS_ID="${ID:-unknown}" + OS_LIKE="${ID_LIKE:-}" +else + err "/etc/os-release not found; cannot detect distro"; exit 1 +fi + +case "${OS_ID}:${OS_LIKE}" in + ubuntu:*|debian:*|*:*debian*) DISTRO="ubuntu" ;; + almalinux:*|rocky:*|rhel:*|centos:*|*:*rhel*|*:*fedora*) DISTRO="almalinux" ;; + *) + if command -v apt-get >/dev/null 2>&1; then DISTRO="ubuntu" + elif command -v yum >/dev/null 2>&1 || command -v dnf >/dev/null 2>&1; then DISTRO="almalinux" + else err "Unsupported distro: ${OS_ID}"; exit 1 + fi + ;; +esac +log "Detected distro: ${DISTRO} (${OS_ID})" + +# -------- install dependencies -------- +log "Installing build dependencies..." +if [[ "${DISTRO}" == "ubuntu" ]]; then + export DEBIAN_FRONTEND=noninteractive + apt-get update -y + apt-get install -y --no-install-recommends \ + build-essential cmake git curl tar xz-utils ca-certificates pkg-config \ + python3 python3-pip \ + libnuma-dev \ + dpkg-dev rpm file apt-utils + CMAKE_BIN="cmake" + CMAKE_CXX_COMPILER_OVERRIDE="" +else + # AlmaLinux / Rocky / RHEL / manylinux_2_28 + if command -v dnf >/dev/null 2>&1; then PKG="dnf"; else PKG="yum"; fi + ${PKG} install -y epel-release || true + # Enable PowerTools/CRB for createrepo_c, etc. + ${PKG} config-manager --set-enabled powertools 2>/dev/null \ + || ${PKG} config-manager --set-enabled crb 2>/dev/null || true + ${PKG} install -y \ + gcc gcc-c++ make cmake3 git curl tar xz ca-certificates pkgconfig \ + python3 python3-pip \ + numactl-devel \ + rpm-build dpkg createrepo_c file + CMAKE_BIN="cmake3" + command -v cmake3 >/dev/null 2>&1 || CMAKE_BIN="cmake" + CMAKE_CXX_COMPILER_OVERRIDE="${ROCM_PATH}/bin/hipcc" +fi +ok "Dependencies installed" + +# -------- fetch ROCm SDK from TheRock -------- +TARBALL_BASE="https://therock-nightly-tarball.s3.amazonaws.com" +TAR_PREFIX="therock-dist-linux-${GPU_FAMILY}-" + +if [[ -z "${ROCM_VERSION}" ]]; then + log "ROCM_VERSION not set; auto-fetching latest for ${GPU_FAMILY}..." + # No LATEST.txt is published; list the bucket and pick the highest version key. + LIST_URL="${TARBALL_BASE}/?list-type=2&max-keys=1000&prefix=${TAR_PREFIX}" + # Filter to versioned tarballs only (skip ADHOCBUILD-* and other non-release keys); + # match: ..<...>.tar.gz + LATEST_KEY="$(curl -fsSL "${LIST_URL}" 2>/dev/null \ + | tr '<' '\n' \ + | sed -n 's|^Key>||p' \ + | grep -E "^${TAR_PREFIX}[0-9]+\.[0-9]+\.[0-9a-z]+\.tar\.gz$" \ + | sort -V \ + | tail -1 || true)" + if [[ -n "${LATEST_KEY}" ]]; then + ROCM_VERSION="${LATEST_KEY#${TAR_PREFIX}}" + ROCM_VERSION="${ROCM_VERSION%.tar.gz}" + ok "Latest ROCm version for ${GPU_FAMILY}: ${ROCM_VERSION}" + else + warn "Could not list ${LIST_URL}; falling back to pinned default" + ROCM_VERSION="7.13.0a20260423" + fi +fi + +TARBALL_NAME="${TAR_PREFIX}${ROCM_VERSION}.tar.gz" +TARBALL_URL="${TARBALL_BASE}/${TARBALL_NAME}" + +mkdir -p "${SDK_DIR}" +if [[ ! -d "${ROCM_PATH}" ]] || [[ ! -f "${SDK_DIR}/.installed-${ROCM_VERSION}-${GPU_FAMILY}" ]]; then + log "Downloading ${TARBALL_URL}..." + curl -fSL "${TARBALL_URL}" -o "${SDK_DIR}/${TARBALL_NAME}" + log "Extracting to ${SDK_DIR}..." + rm -rf "${ROCM_PATH}" + mkdir -p "${ROCM_PATH}" + tar -xzf "${SDK_DIR}/${TARBALL_NAME}" -C "${ROCM_PATH}" --strip-components=1 \ + || tar -xzf "${SDK_DIR}/${TARBALL_NAME}" -C "${ROCM_PATH}" + rm -f "${SDK_DIR}/${TARBALL_NAME}" + touch "${SDK_DIR}/.installed-${ROCM_VERSION}-${GPU_FAMILY}" + ok "ROCm SDK installed at ${ROCM_PATH}" +else + log "Reusing cached ROCm SDK at ${ROCM_PATH}" +fi + +export ROCM_PATH +export PATH="${ROCM_PATH}/bin:${PATH}" +export LD_LIBRARY_PATH="${ROCM_PATH}/lib:${LD_LIBRARY_PATH:-}" +export CMAKE_PREFIX_PATH="${ROCM_PATH}:${CMAKE_PREFIX_PATH:-}" + +# Locate HIP device libraries (amdgcn bitcode) +for candidate in \ + "${ROCM_PATH}/amdgcn/bitcode" \ + "${ROCM_PATH}/lib/llvm/amdgcn/bitcode" \ + "${ROCM_PATH}/lib/clang/amdgcn/bitcode"; do + if [[ -d "${candidate}" ]]; then export HIP_DEVICE_LIB_PATH="${candidate}"; break; fi +done +if [[ -n "${HIP_DEVICE_LIB_PATH:-}" ]]; then + ok "HIP_DEVICE_LIB_PATH=${HIP_DEVICE_LIB_PATH}" +else + warn "amdgcn bitcode directory not found under ${ROCM_PATH}; build may fail" +fi + +# -------- compute version helpers -------- +# ROCM_MAJOR / MINOR / patch helpers (e.g. 7.11.0a20260121 -> major=7 minor=11) +ROCM_MAJOR="$(echo "${ROCM_VERSION}" | sed -E 's/^([0-9]+)\..*/\1/')" +ROCM_MINOR="$(echo "${ROCM_VERSION}" | sed -E 's/^[0-9]+\.([0-9]+).*/\1/')" +printf -v ROCM_LIBPATCH_VERSION '%02d%02d' "${ROCM_MAJOR}" "${ROCM_MINOR}" +export ROCM_MAJOR ROCM_MINOR ROCM_LIBPATCH_VERSION +log "ROCm major=${ROCM_MAJOR} minor=${ROCM_MINOR} libpatch=${ROCM_LIBPATCH_VERSION}" + +# Package release string: branch.commit for dev, run_number for release branches +GIT_BRANCH="${GITHUB_REF_NAME:-$(git -C "${REPO_ROOT}" rev-parse --abbrev-ref HEAD 2>/dev/null || echo unknown)}" +GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)" +if [[ "${GIT_BRANCH}" == rel* ]] || [[ "${GIT_BRANCH}" == release/* ]]; then + PKG_RELEASE="${GITHUB_RUN_NUMBER}" +else + # Sanitize: DEB/RPM release fields disallow many punctuation chars. + # Collapse anything that's not [A-Za-z0-9] into a single dot, then trim. + SAFE_BRANCH="$(printf '%s' "${GIT_BRANCH}" | sed -E 's/[^[:alnum:]]+/./g; s/^\.+//; s/\.+$//')" + SAFE_BRANCH="${SAFE_BRANCH:-unknown}" + PKG_RELEASE="${SAFE_BRANCH}.${GIT_COMMIT}" +fi +export CPACK_DEBIAN_PACKAGE_RELEASE="${CPACK_DEBIAN_PACKAGE_RELEASE:-$PKG_RELEASE}" +export CPACK_RPM_PACKAGE_RELEASE="${CPACK_RPM_PACKAGE_RELEASE:-$PKG_RELEASE}" +log "Package release tag: ${PKG_RELEASE}" + +# -------- configure -------- +INSTALL_PREFIX="/opt/rocm/extras-${ROCM_MAJOR}" +# Relocatable RPATH: $ORIGIN-relative + install prefix + the conventional +# install-time ROCm locations. Do NOT embed ${ROCM_PATH} (the ephemeral +# build-time SDK download path) — that would leak CI paths into the +# packaged binary and break relocatability. +RPATH_LIST="\$ORIGIN:\$ORIGIN/../lib:${INSTALL_PREFIX}/lib:/opt/rocm/lib:/opt/rocm/lib64" + +log "Configuring CMake..." +rm -rf "${BUILD_DIR}" +mkdir -p "${BUILD_DIR}" + +CMAKE_ARGS=( + -B "${BUILD_DIR}" + -S "${REPO_ROOT}" + -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" + -DROCM_PATH="${ROCM_PATH}" + -DROCM_MAJOR_VERSION="${ROCM_MAJOR}" + -DHIP_PLATFORM=amd + -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" + -DCPACK_PACKAGING_INSTALL_PREFIX="${INSTALL_PREFIX}" + -DCMAKE_SKIP_RPATH=FALSE + -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE + -DCMAKE_INSTALL_RPATH="${RPATH_LIST}" + -DCMAKE_VERBOSE_MAKEFILE=ON + -DBUILD_RELOCATABLE_PACKAGE=ON + -DBUILD_LOCAL_GPU_TARGET_ONLY=OFF + -DENABLE_NIC_EXEC=OFF + -DENABLE_MPI_COMM=OFF + -DDISABLE_DMABUF=OFF + -DGPU_TARGETS="${GPU_TARGETS}" +) +if [[ -n "${CMAKE_CXX_COMPILER_OVERRIDE}" ]]; then + CMAKE_ARGS+=(-DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER_OVERRIDE}") +fi + +"${CMAKE_BIN}" "${CMAKE_ARGS[@]}" +ok "CMake configured" + +# -------- build -------- +log "Building TransferBench (-j$(nproc))..." +"${CMAKE_BIN}" --build "${BUILD_DIR}" -- -j"$(nproc)" +ok "Build complete" + +# -------- package -------- +log "Packaging (DEB / RPM / TGZ via CPack)..." +pushd "${BUILD_DIR}" >/dev/null +if [[ "${DISTRO}" == "ubuntu" ]]; then + cpack -G DEB + cpack -G TGZ +else + cpack -G RPM + cpack -G TGZ +fi +popd >/dev/null + +ok "Packages written under ${BUILD_DIR}:" +ls -lh "${BUILD_DIR}"/amdrocm*-transferbench* 2>/dev/null || ls -lh "${BUILD_DIR}"/*.deb "${BUILD_DIR}"/*.rpm "${BUILD_DIR}"/*.tar.gz 2>/dev/null || true From 5965fa5a11c5df81ea434f23d54a4b03d5646624 Mon Sep 17 00:00:00 2001 From: Arm Patinyasakdikul Date: Fri, 24 Apr 2026 15:23:07 -0500 Subject: [PATCH 4/8] ci: fix S3 metadata sync without requiring DeleteObject permission (#265) * ci: fix S3 metadata sync without requiring DeleteObject permission Follow RVS pattern for apt/yum repo metadata generation: - Download existing packages from S3 first - Regenerate metadata for all packages (existing + new) - Sync everything back (metadata files overwrite naturally) - Remove --delete flag that required s3:DeleteObject permission This fixes the AccessDenied error when syncing yum repodata: "User: arn:aws:sts::317668459450:assumed-role/therock-rocm-transferbench-releases-s3-oidc/GitHubActions is not authorized to perform: s3:DeleteObject" Changes: - apt metadata: Use aws s3 sync instead of individual cp commands - yum metadata: Remove --delete flag, sync down existing RPMs first - Both: Add --no-progress flag and human-readable output - Both: Copy current build packages explicitly before metadata generation Reference: ROCm/ROCmValidationSuite workflow (lines 175-204, 342-370) * ci: address Copilot review feedback - Remove || true from S3 sync commands to catch real errors - Restrict apt sync to *.deb only (exclude .tar.gz downloads) - Update RPM log message to reflect that RPMs + repodata are synced Copilot feedback rationale: 1. Silent failures (|| true) can lead to incomplete metadata generation 2. Empty S3 prefixes typically succeed anyway, so explicit handling unnecessary 3. Downloading only *.deb files is more efficient and faster 4. Log message should accurately reflect what's being synced --- .../workflows/build-relocatable-packages.yml | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-relocatable-packages.yml b/.github/workflows/build-relocatable-packages.yml index 2f9d6a4b..af90b466 100644 --- a/.github/workflows/build-relocatable-packages.yml +++ b/.github/workflows/build-relocatable-packages.yml @@ -120,10 +120,14 @@ jobs: run: | set -euo pipefail WORK="$(mktemp -d)" - aws s3 sync "${S3_DEB_PATH}/" "${WORK}/" --exclude "*" --include "*.deb" - pushd "${WORK}" >/dev/null - dpkg-scanpackages -m . /dev/null > Packages - gzip -kf Packages + echo "Downloading existing DEBs from ${S3_DEB_PATH}/ ..." + aws s3 sync "${S3_DEB_PATH}/" "${WORK}/" \ + --exclude "*" --include "*.deb" \ + --no-progress + cp build/amdrocm*-transferbench*.deb "${WORK}/" 2>/dev/null || true + cd "${WORK}" + dpkg-scanpackages --multiversion . /dev/null > Packages + gzip -k -f Packages { echo "Origin: ROCm-TransferBench" echo "Label: ROCm TransferBench Packages" @@ -134,10 +138,9 @@ jobs: echo "Description: TransferBench DEB packages built from TheRock SDK" echo "Date: $(date -Ru)" } > Release - aws s3 cp Packages "${S3_DEB_PATH}/Packages" - aws s3 cp Packages.gz "${S3_DEB_PATH}/Packages.gz" - aws s3 cp Release "${S3_DEB_PATH}/Release" - popd >/dev/null + aws s3 sync "${WORK}/" "${S3_DEB_PATH}/" --no-progress + echo "=== DEB repo metadata uploaded to ${S3_DEB_PATH}/ ===" + aws s3 ls "${S3_DEB_PATH}/" --human-readable # ============================================================ # manylinux_2_28 (AlmaLinux 8) — RPM + TGZ @@ -236,9 +239,13 @@ jobs: set -euo pipefail dnf install -y createrepo_c || yum install -y createrepo_c WORK="$(mktemp -d)" - aws s3 sync "${S3_RPM_PATH}/" "${WORK}/" --exclude "*" --include "*.rpm" + echo "Downloading existing RPMs from s3://${AWS_S3_BUCKET}/${S3_RPM_PATH#s3://${AWS_S3_BUCKET}/}/ ..." + aws s3 sync "${S3_RPM_PATH}/" "${WORK}/" --exclude "repodata/*" --no-progress + cp build/amdrocm*-transferbench*.rpm "${WORK}/" 2>/dev/null || true createrepo_c "${WORK}" - aws s3 sync "${WORK}/repodata/" "${S3_RPM_PATH}/repodata/" --delete + aws s3 sync "${WORK}/" "${S3_RPM_PATH}/" --no-progress + echo "=== RPM repository contents (RPMs + repodata) synced to ${S3_RPM_PATH}/ ===" + aws s3 ls "${S3_RPM_PATH}/repodata/" --human-readable # ============================================================ # Build report — collects S3 paths for browsing From a76e516802569f10bac3713864e095d3dc014318 Mon Sep 17 00:00:00 2001 From: Arm Patinyasakdikul Date: Fri, 24 Apr 2026 15:43:55 -0500 Subject: [PATCH 5/8] ci: add CodeQL security scanning workflow (#264) * ci: add CodeQL security scanning workflow Add CodeQL static analysis workflow following ROCm project standards (amdsmi/aqlprofile pattern). Scans C/C++ code for security vulnerabilities. - Runs on develop/mainline branch pushes and PRs - Weekly scheduled scan on Fridays - Uses security-extended query suite - Builds with minimal dependencies (no NIC/MPI) for faster analysis Part of TheRock component onboarding requirements. * fix: use ROCm container for CodeQL build CodeQL analysis needs ROCm/HIP installed to build TransferBench. Switch to rocm/dev-ubuntu-22.04 container following aqlprofile pattern. - Add git installation in container - Configure git safe directory - Add CMAKE_PREFIX_PATH=/opt/rocm for hip-config.cmake discovery * ci: add 'candidate' branch to CodeQL trigger list * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * ci: address Copilot review feedback on CodeQL workflow - Add -y flag to apt-add-repository to avoid interactive prompts - Replace safe.directory wildcard with GITHUB_WORKSPACE for minimal permissions Rationale: 1. Interactive prompts can hang CI jobs waiting for user input 2. Using '*' for safe.directory is unnecessarily permissive; GITHUB_WORKSPACE provides sufficient access while maintaining defense-in-depth Note: Container image intentionally remains unpinned per maintainer preference --------- Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .github/workflows/codeql.yml | 71 ++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..6d3ba2f9 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,71 @@ +name: "CodeQL Security Scanning" + +on: + push: + branches: + - develop + - mainline + - candidate + pull_request: + branches: + - develop + - mainline + schedule: + # Weekly scan on Fridays at 6:34 PM UTC + - cron: '34 18 * * 5' + +jobs: + analyze: + name: Analyze (C/C++) + runs-on: ubuntu-latest + container: rocm/dev-ubuntu-22.04:latest + permissions: + security-events: write + packages: read + actions: read + contents: read + + steps: + - name: Install OS requirements + timeout-minutes: 10 + run: | + apt update + apt install -y software-properties-common + apt-add-repository -y ppa:git-core/ppa + apt-get update + apt install -y git + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Configure Git Safe Directory + run: | + git config --global --add safe.directory "${GITHUB_WORKSPACE}" + + - name: Install dependencies + run: | + apt-get update + apt-get install -y build-essential cmake libnuma-dev + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: c-cpp + build-mode: manual + queries: security-extended + + - name: Build TransferBench + run: | + mkdir -p build + cd build + cmake .. -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_LOCAL_GPU_TARGET_ONLY=ON \ + -DENABLE_NIC_EXEC=OFF \ + -DENABLE_MPI_COMM=OFF \ + -DCMAKE_PREFIX_PATH=/opt/rocm + make -j $(nproc) + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:c-cpp" From 1b56fdc28e0ec075b46593c19665ab469f076a9e Mon Sep 17 00:00:00 2001 From: Arm Patinyasakdikul Date: Tue, 5 May 2026 13:05:27 -0500 Subject: [PATCH 6/8] ci: align relocatable package flow with RVS reference (#287) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ci: align relocatable package flow with RVS reference Brings TransferBench's TheRock-SDK-based package build into closer parity with the ROCmValidationSuite reference flow: - CMakeLists.txt: auto-compute patch version from `git describe --tags --match v..*` (commits since the last matching tag), falling back to the hardcoded patch when no tag is reachable. No behavior change today since no v1.66.* tag exists yet; once one is created, builds become 1.66.. - CMakeLists.txt: move relocatable RPATH defaults out of build_packages_local.sh and into the BUILD_RELOCATABLE_PACKAGE block so plain `cmake -DBUILD_RELOCATABLE_PACKAGE=ON ..` produces the same RPATH as a packaged build. RPATH list now includes /opt/rocm/lib/llvm, /opt/rocm/core-/lib, and the matching llvm path. - build_packages_local.sh: package release tag now follows the RVS format (`r.` for default builds, `r...` for PRs, GITHUB_RUN_NUMBER for release/* branches). UTC date captured once so long builds can't straddle midnight. Drops the over-permissive `rel*` prefix matcher. - build_packages_local.sh: drop the now-redundant -DCMAKE_INSTALL_RPATH / -DCMAKE_SKIP_RPATH / -DCMAKE_INSTALL_RPATH_USE_LINK_PATH flags (CMakeLists.txt is now the single source of truth). - docs/install/INSTALL_TGZ.rst: new user-facing TGZ install guide covering ROCm pre-install, runtime deps, extraction, PATH / LD_LIBRARY_PATH setup, and persistent profile.d configuration. - docs/install/install.rst: cross-link to the new TGZ doc. - README_BUILD_PACKAGES.md: replace the inline TGZ install snippet with a link to the new doc plus a minimal smoke test for CI maintainers. * docs: drop --help (does not exist on develop) for plain TransferBench Address @gilbertlee-amd review on PR #287: --help doesn't exist. In candidate we already introduce the "help" preset [...]. For CI moving forward, we likely want to use ./TransferBench smoketest. However, that's also in candidate branch still. On develop today, running TransferBench with no arguments prints version, usage, the available preset list, and the detected topology then exits 0 (Client.cpp:41 — `if (argc <= 1)` branch). That is a valid load-time smoke test for the binary. Replace `TransferBench --help` with `TransferBench` in: - .github/workflows/README_BUILD_PACKAGES.md (CI maintainer smoke test) - docs/install/INSTALL_TGZ.rst (end-user verify step) Add a forward-looking note in the README that once the `help` and `smoketest` presets graduate from candidate to develop, the smoke test should switch to `TransferBench smoketest`. --- .github/workflows/README_BUILD_PACKAGES.md | 13 ++ CMakeLists.txt | 52 +++++++- build_packages_local.sh | 38 +++--- docs/install/INSTALL_TGZ.rst | 136 +++++++++++++++++++++ docs/install/install.rst | 6 + 5 files changed, 228 insertions(+), 17 deletions(-) create mode 100644 docs/install/INSTALL_TGZ.rst diff --git a/.github/workflows/README_BUILD_PACKAGES.md b/.github/workflows/README_BUILD_PACKAGES.md index 2490f1b7..bbaecd83 100644 --- a/.github/workflows/README_BUILD_PACKAGES.md +++ b/.github/workflows/README_BUILD_PACKAGES.md @@ -93,13 +93,26 @@ sudo rpm -i --replacefiles --nodeps build/amdrocm7-transferbench-*.rpm ### Any Linux (TGZ — relocatable install tree, requires ROCm runtime on target) +End-user instructions (pre-install ROCm, runtime dependencies, extract, +`PATH` / `LD_LIBRARY_PATH`, troubleshooting) live in the project docs at +[docs/install/INSTALL_TGZ.rst](../../docs/install/INSTALL_TGZ.rst). + +Quick smoke test from the repo root after a successful build: + ```bash sudo mkdir -p /opt/rocm/extras-7 sudo tar -xzf build/amdrocm7-transferbench-*.tar.gz -C /opt/rocm/extras-7 --strip-components=1 export PATH=/opt/rocm/extras-7/bin:$PATH +# With no args, TransferBench prints version, usage, available presets, +# and detected topology — a fast end-to-end check that the binary loads +# its ROCm libs correctly. TransferBench ``` +> Once the `help` and `smoketest` presets land on `develop` (currently on +> `candidate`), prefer `TransferBench help` for usage and `TransferBench +> smoketest` for a real correctness check. + ## S3 upload (OIDC) S3 upload runs only when: diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b6591d3..23eb07a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,41 @@ if (NOT CMAKE_TOOLCHAIN_FILE) message(STATUS "CMAKE_TOOLCHAIN_FILE: ${CMAKE_TOOLCHAIN_FILE}") endif() -set(VERSION_STRING "1.66.02") +set(TRANSFERBENCH_VERSION_MAJOR 1) +set(TRANSFERBENCH_VERSION_MINOR 66) +set(TRANSFERBENCH_VERSION_PATCH_FALLBACK "02") + +# Auto-compute patch from git: count commits since the last v..* tag. +# Falls back to TRANSFERBENCH_VERSION_PATCH_FALLBACK when git is unavailable, +# this is not a git checkout, or no matching tag exists. Mirrors the RVS flow. +set(TRANSFERBENCH_VERSION_PATCH "${TRANSFERBENCH_VERSION_PATCH_FALLBACK}") +find_package(Git QUIET) +if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") + execute_process( + COMMAND "${GIT_EXECUTABLE}" describe --tags --abbrev=0 --match + "v${TRANSFERBENCH_VERSION_MAJOR}.${TRANSFERBENCH_VERSION_MINOR}.*" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + OUTPUT_VARIABLE _tb_last_tag + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _tb_describe_rc + ERROR_QUIET) + if(_tb_describe_rc EQUAL 0 AND _tb_last_tag) + execute_process( + COMMAND "${GIT_EXECUTABLE}" rev-list --count "${_tb_last_tag}..HEAD" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + OUTPUT_VARIABLE _tb_commit_count + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _tb_count_rc + ERROR_QUIET) + if(_tb_count_rc EQUAL 0 AND _tb_commit_count MATCHES "^[0-9]+$") + set(TRANSFERBENCH_VERSION_PATCH "${_tb_commit_count}") + endif() + endif() +endif() + +set(VERSION_STRING + "${TRANSFERBENCH_VERSION_MAJOR}.${TRANSFERBENCH_VERSION_MINOR}.${TRANSFERBENCH_VERSION_PATCH}") +message(STATUS "TransferBench version: ${VERSION_STRING}") project(TransferBench VERSION ${VERSION_STRING} LANGUAGES CXX) ## Load CMake modules @@ -272,6 +306,22 @@ if(BUILD_RELOCATABLE_PACKAGE) set(ROCM_MAJOR_VERSION "7") endif() + # Relocatable RPATH (matches the RVS reference flow). Mirrors what + # build_packages_local.sh used to inject via -DCMAKE_INSTALL_RPATH=, so + # plain `cmake -DBUILD_RELOCATABLE_PACKAGE=ON ..` now produces the same + # RPATH as a CI/packaged build. + set(CMAKE_SKIP_RPATH FALSE) + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) + set(CMAKE_INSTALL_RPATH + "\$ORIGIN:\$ORIGIN/../lib:/opt/rocm/extras-${ROCM_MAJOR_VERSION}/lib:/opt/rocm/lib:/opt/rocm/lib/llvm/lib:/opt/rocm/core-${ROCM_MAJOR_VERSION}/lib:/opt/rocm/core-${ROCM_MAJOR_VERSION}/lib/llvm/lib") + set(CMAKE_BUILD_RPATH "${CMAKE_INSTALL_RPATH}") + # Strip implicit SDK-from-build-host paths on install so the ephemeral + # $HOME/rocm-sdk/install path the CI script uses does not leak into the + # packaged binary's RPATH. + if(NOT CMAKE_VERSION VERSION_LESS "3.16") + set(CMAKE_INSTALL_REMOVE_ENVIRONMENT_RPATH TRUE) + endif() + install(TARGETS TransferBench RUNTIME DESTINATION bin COMPONENT devel) set(CPACK_PACKAGE_NAME "amdrocm${ROCM_MAJOR_VERSION}-transferbench") diff --git a/build_packages_local.sh b/build_packages_local.sh index d10ec86e..cf12b831 100755 --- a/build_packages_local.sh +++ b/build_packages_local.sh @@ -167,17 +167,29 @@ printf -v ROCM_LIBPATCH_VERSION '%02d%02d' "${ROCM_MAJOR}" "${ROCM_MINOR}" export ROCM_MAJOR ROCM_MINOR ROCM_LIBPATCH_VERSION log "ROCm major=${ROCM_MAJOR} minor=${ROCM_MINOR} libpatch=${ROCM_LIBPATCH_VERSION}" -# Package release string: branch.commit for dev, run_number for release branches +# Package release string. Format mirrors the RVS reference flow: +# default (push/schedule/dispatch/local): r. +# pull request: r... +# release/* branch (non-PR): ${GITHUB_RUN_NUMBER} (fallback 1) GIT_BRANCH="${GITHUB_REF_NAME:-$(git -C "${REPO_ROOT}" rev-parse --abbrev-ref HEAD 2>/dev/null || echo unknown)}" GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)" -if [[ "${GIT_BRANCH}" == rel* ]] || [[ "${GIT_BRANCH}" == release/* ]]; then - PKG_RELEASE="${GITHUB_RUN_NUMBER}" +BUILD_DATE_UTC="$(date -u +%Y%m%d)" + +# Collapse non-alphanumerics into single dots and trim — DEB/RPM release +# fields reject most punctuation. +sanitize_release() { + local s + s="$(printf '%s' "$1" | sed -E 's/[^[:alnum:]]+/./g; s/^\.+//; s/\.+$//')" + printf '%s' "${s:-unknown}" +} + +if [[ "${GITHUB_EVENT_NAME:-}" == "pull_request" ]]; then + PR_BRANCH="$(sanitize_release "${GITHUB_HEAD_REF:-${GIT_BRANCH}}")" + PKG_RELEASE="r${ROCM_LIBPATCH_VERSION}.${BUILD_DATE_UTC}.${PR_BRANCH}.${GIT_COMMIT}" +elif [[ "${GIT_BRANCH}" == release/* ]]; then + PKG_RELEASE="${GITHUB_RUN_NUMBER:-1}" else - # Sanitize: DEB/RPM release fields disallow many punctuation chars. - # Collapse anything that's not [A-Za-z0-9] into a single dot, then trim. - SAFE_BRANCH="$(printf '%s' "${GIT_BRANCH}" | sed -E 's/[^[:alnum:]]+/./g; s/^\.+//; s/\.+$//')" - SAFE_BRANCH="${SAFE_BRANCH:-unknown}" - PKG_RELEASE="${SAFE_BRANCH}.${GIT_COMMIT}" + PKG_RELEASE="r${ROCM_LIBPATCH_VERSION}.${BUILD_DATE_UTC}" fi export CPACK_DEBIAN_PACKAGE_RELEASE="${CPACK_DEBIAN_PACKAGE_RELEASE:-$PKG_RELEASE}" export CPACK_RPM_PACKAGE_RELEASE="${CPACK_RPM_PACKAGE_RELEASE:-$PKG_RELEASE}" @@ -185,11 +197,8 @@ log "Package release tag: ${PKG_RELEASE}" # -------- configure -------- INSTALL_PREFIX="/opt/rocm/extras-${ROCM_MAJOR}" -# Relocatable RPATH: $ORIGIN-relative + install prefix + the conventional -# install-time ROCm locations. Do NOT embed ${ROCM_PATH} (the ephemeral -# build-time SDK download path) — that would leak CI paths into the -# packaged binary and break relocatability. -RPATH_LIST="\$ORIGIN:\$ORIGIN/../lib:${INSTALL_PREFIX}/lib:/opt/rocm/lib:/opt/rocm/lib64" +# Relocatable RPATH defaults live in CMakeLists.txt under +# if(BUILD_RELOCATABLE_PACKAGE); enabling that option below activates them. log "Configuring CMake..." rm -rf "${BUILD_DIR}" @@ -204,9 +213,6 @@ CMAKE_ARGS=( -DHIP_PLATFORM=amd -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" -DCPACK_PACKAGING_INSTALL_PREFIX="${INSTALL_PREFIX}" - -DCMAKE_SKIP_RPATH=FALSE - -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE - -DCMAKE_INSTALL_RPATH="${RPATH_LIST}" -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_RELOCATABLE_PACKAGE=ON -DBUILD_LOCAL_GPU_TARGET_ONLY=OFF diff --git a/docs/install/INSTALL_TGZ.rst b/docs/install/INSTALL_TGZ.rst new file mode 100644 index 00000000..b365cef4 --- /dev/null +++ b/docs/install/INSTALL_TGZ.rst @@ -0,0 +1,136 @@ +:orphan: + +.. meta:: + :description: Install the relocatable TransferBench TGZ archive on any Linux distribution + :keywords: TransferBench, TGZ, tarball, install, relocatable + +.. _install-transferbench-tgz: + +------------------------------------------------ +Installing TransferBench from the TGZ archive +------------------------------------------------ + +The TransferBench TGZ archive (``amdrocm-transferbench-*.tar.gz``) is a +relocatable install tree that works on any Linux distribution where a +compatible ROCm runtime is already present. Use it when you cannot or do not +want to install the DEB or RPM package — for example on a distribution +without a native ROCm package, or inside a non-root container. + +The TGZ ships only the ``TransferBench`` binary and its supporting files. It +does **not** bundle ROCm; the host system must already provide the ROCm +runtime libraries (``hsa-rocr`` and the HIP runtime). + +Pre-install: ROCm +----------------- + +Install ROCm on the target system before extracting the TGZ. Follow the +official AMD documentation: + +* `ROCm documentation `_ +* `Linux install guide `_ + +After installing, ``ROCM_PATH`` (typically ``/opt/rocm``) must be set +correctly and the ROCm libraries must be loadable by the dynamic linker. + +Install runtime dependencies +---------------------------- + +The DEB and RPM packages declare these runtime dependencies; TGZ users must +install them manually on the target host. + +.. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Family + - Required packages + * - Debian / Ubuntu + - ``numactl``, ``libnuma1``, plus the ROCm runtime (``hsa-rocr``) + * - RHEL / Rocky / AlmaLinux + - ``numactl``, plus the ROCm runtime (``hsa-rocr``) + +Install commands: + +.. code-block:: bash + + # Ubuntu / Debian + sudo apt update && sudo apt install -y numactl libnuma1 + + # RHEL / Rocky / AlmaLinux + sudo dnf install -y numactl + +The ROCm packages (``hsa-rocr`` and friends) come from the ROCm repo +configured in the pre-install step above. + +Extract the TGZ +--------------- + +Extract the archive into ``/opt/rocm/extras-``, where ```` is +the ROCm major version the package was built against (encoded in the package +name, for example ``amdrocm7-transferbench-*.tar.gz`` → major ``7``). + +.. code-block:: bash + + # Example for ROCm major 7 — match your package + sudo mkdir -p /opt/rocm/extras-7 + sudo tar -xzf amdrocm7-transferbench-*.tar.gz -C /opt/rocm/extras-7 --strip-components=1 + +The ``--strip-components=1`` option discards the top-level directory inside +the tarball so files land directly under ``/opt/rocm/extras-7/{bin,lib,...}``. + +Configure ``PATH`` and ``LD_LIBRARY_PATH`` +------------------------------------------ + +Point the shell at the extracted prefix and your ROCm install. Copy and paste +the block as one unit (replace paths with your real ``ROCM_PATH`` and major +version): + +.. code-block:: bash + + export ROCM_PATH=/opt/rocm # or your real ROCm root + export PATH=/opt/rocm/extras-7/bin:$ROCM_PATH/bin:$PATH + export LD_LIBRARY_PATH=/opt/rocm/extras-7/lib:$ROCM_PATH/lib:$ROCM_PATH/lib/llvm/lib:$LD_LIBRARY_PATH + +The ``TransferBench`` binary embeds an ``RPATH`` covering ``$ORIGIN``, +``$ORIGIN/../lib``, ``/opt/rocm/extras-/lib``, ``/opt/rocm/lib``, +``/opt/rocm/lib/llvm/lib``, ``/opt/rocm/core-/lib``, and +``/opt/rocm/core-/lib/llvm/lib``. The ``LD_LIBRARY_PATH`` export above +is mainly defensive — useful if your ROCm tree lives somewhere non-standard +or if you want to override which copy of a library is loaded for +troubleshooting. + +Verify the install +------------------ + +.. code-block:: bash + + TransferBench + +Run with no arguments, ``TransferBench`` prints its version, usage, the +list of available preset benchmarks, and the detected GPU/CPU topology, +then exits. Seeing that output confirms the binary loaded its ROCm +libraries correctly. + +If the binary fails to load a shared library, inspect: + +.. code-block:: bash + + ldd /opt/rocm/extras-7/bin/TransferBench + readelf -d /opt/rocm/extras-7/bin/TransferBench | grep -E 'RPATH|RUNPATH' + +Make a persistent shell setup +----------------------------- + +To avoid re-exporting every shell, drop the variables into a profile script: + +.. code-block:: bash + + sudo tee /etc/profile.d/transferbench.sh >/dev/null <<'EOF' + export ROCM_PATH=/opt/rocm + export PATH=/opt/rocm/extras-7/bin:$ROCM_PATH/bin:$PATH + export LD_LIBRARY_PATH=/opt/rocm/extras-7/lib:$ROCM_PATH/lib:$ROCM_PATH/lib/llvm/lib:$LD_LIBRARY_PATH + EOF + sudo chmod 0644 /etc/profile.d/transferbench.sh + +Log out and back in (or ``source /etc/profile.d/transferbench.sh``) for the +changes to apply. diff --git a/docs/install/install.rst b/docs/install/install.rst index 4a44ff59..367fc307 100644 --- a/docs/install/install.rst +++ b/docs/install/install.rst @@ -83,3 +83,9 @@ TransferBench looks for NVCC in ``/usr/local/cuda`` by default. To modify the lo .. code-block:: bash CUDA_PATH=/usr/local/cuda make + +Installing from the relocatable TGZ archive +------------------------------------------- + +If you want to install a pre-built TransferBench binary on a system where +you cannot install the DEB or RPM package, see :ref:`install-transferbench-tgz`. From 0e824cce615c48324b81d13a946306be622ab361 Mon Sep 17 00:00:00 2001 From: Arm Patinyasakdikul Date: Wed, 13 May 2026 16:16:56 -0500 Subject: [PATCH 7/8] ci: fix package versioning across DEB/RPM/TGZ builds (#299) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ci: fix package versioning across DEB/RPM/TGZ builds Two issues surfaced from run 25393634415 on release/test_artifact: 1. manylinux container falls back to TRANSFERBENCH_VERSION_PATCH="02" because git's "dubious ownership" guard rejects the host-UID-owned workspace when the container runs as root. Same commit produced 1.66.11 on Ubuntu and 1.66.02 on manylinux. Mark the repo safe for the container's root user before CMake's git probe runs. 2. CPack writes --Linux.{deb,rpm,tar.gz}, omitting the release tag. Successive release/* runs overwrite each other in S3 and the run number is invisible from the filename. Switch DEB/RPM to canonical CPack filenames (which embed release + arch) and thread the release tag into CPACK_ARCHIVE_FILE_NAME for the TGZ. * ci: pass package release as -D arg, not just env Run 25819267330 confirmed the version-sync fix works (both jobs report 1.66.13) and that DEB/RPM filenames now embed the release. But the Ubuntu TGZ filename still came out as `…-1.66.13-Linux.tar.gz` — no release tag — while manylinux's TGZ embedded it correctly. Root cause: CMakeLists derived the release tag from `ENV{CPACK_RPM_PACKAGE_RELEASE}` at configure time. Ubuntu invokes the script via `sudo -E …`, and although the script then `export`s those env vars, they don't reliably reach CMake's environment on the sudo path. The DEB filename still worked because `DEB-DEFAULT` is processed by CPack at package time, not configure time, so the env var propagated to the `cpack -G DEB` child directly. Fix: pass `PKG_RELEASE` as `-DTRANSFERBENCH_PACKAGE_RELEASE=…` from the build script. CMake reads it directly from the cache, bypassing env propagation entirely. Env-var fallbacks remain so direct cmake invocations (without the wrapper script) keep working. * ci: temporary debug print to diagnose Ubuntu TGZ release-tag miss * ci: dump CPackConfig.cmake archive name to diagnose Ubuntu TGZ * ci: also set CPACK_PACKAGE_FILE_NAME for TGZ on older CMake CMake 3.26 (manylinux container) honors CPACK_ARCHIVE_FILE_NAME for the TGZ generator. CMake 3.22 (Ubuntu 22.04 system cmake) falls back to CPACK_PACKAGE_FILE_NAME instead, ignoring my ARCHIVE_FILE_NAME override. Diagnostic dump confirmed both jobs wrote identical CPackConfig.cmake — the divergence is purely in cpack runtime behavior between the two CMake versions. Set both to the same suffixed value. DEB/RPM are unaffected because they use the explicit DEB-DEFAULT / RPM-DEFAULT canonical-naming tokens, which take precedence over CPACK_PACKAGE_FILE_NAME. Also remove the temporary debug prints from CMakeLists.txt and the CPackConfig.cmake dump from build_packages_local.sh. * ci: address Copilot PR review on packaging changes - Replace `git config --global --add safe.directory` with the GIT_CONFIG_* env-var triple so the safe.directory entry is scoped to this build (and inherited by CMake's `execute_process(git …)` children) instead of being written to root's ~/.gitconfig under sudo. Also drops the `|| true` so a setup failure surfaces immediately instead of letting the version probe silently fall back. - Quote `"${VAR}"` in `STREQUAL ""` comparisons. CMake's `if(AND ...)` does not short-circuit, and bare-identifier dereferencing inside `if()` depends on policy CMP0054. Quoting makes the comparison unambiguous. - Fail fast when BUILD_RELOCATABLE_PACKAGE is requested on CMake older than 3.13. The block uses CPACK_ARCHIVE_FILE_NAME (3.13+) and the DEB-DEFAULT / RPM-DEFAULT canonical-naming sentinels (3.6+); on a 3.5 CMake those would silently produce a literal "DEB-DEFAULT" filename. The project-wide cmake_minimum_required of 3.5 is left alone (out of scope here); the gate is local to the relocatable-package path. --- CMakeLists.txt | 52 +++++++++++++++++++++++++++++++++++++---- build_packages_local.sh | 14 +++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 23eb07a6..f8d1c747 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -302,6 +302,18 @@ if(BUILD_RELOCATABLE_PACKAGE) # RVS-style relocatable packaging: bypass rocm_install/rocm_create_package and # drive CPack directly so CMAKE_INSTALL_PREFIX / CPACK_PACKAGING_INSTALL_PREFIX # set by the caller (build_packages_local.sh) are honored. + # + # The CPack flow below uses CPACK_ARCHIVE_FILE_NAME (3.13+) and the + # DEB-DEFAULT / RPM-DEFAULT canonical-naming sentinels (3.6+). The + # project-wide cmake_minimum_required is 3.5 for the non-packaging build, so + # gate the relocatable path locally to avoid silently producing a literal + # "DEB-DEFAULT" filename on a too-old CMake. + if(CMAKE_VERSION VERSION_LESS 3.13) + message(FATAL_ERROR + "BUILD_RELOCATABLE_PACKAGE requires CMake >= 3.13 " + "(found ${CMAKE_VERSION}); needed for CPACK_ARCHIVE_FILE_NAME and " + "the DEB-DEFAULT / RPM-DEFAULT canonical-naming sentinels.") + endif() if(NOT DEFINED ROCM_MAJOR_VERSION) set(ROCM_MAJOR_VERSION "7") endif() @@ -331,23 +343,45 @@ if(BUILD_RELOCATABLE_PACKAGE) set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "TransferBench: benchmark simultaneous transfers between CPU/GPU/NIC") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") + # Per-build release tag, threaded into DEB/RPM metadata AND the TGZ filename. + # Prefer the explicit -D from build_packages_local.sh; fall back to the env + # vars CPack itself reads (so direct cmake invocations still work). + # Quote variable references throughout so the comparison is unambiguous + # under any CMP0054 setting. + set(_tb_pkg_release "") + if(NOT "${TRANSFERBENCH_PACKAGE_RELEASE}" STREQUAL "") + set(_tb_pkg_release "${TRANSFERBENCH_PACKAGE_RELEASE}") + elseif(NOT "$ENV{CPACK_RPM_PACKAGE_RELEASE}" STREQUAL "") + set(_tb_pkg_release "$ENV{CPACK_RPM_PACKAGE_RELEASE}") + elseif(NOT "$ENV{CPACK_DEBIAN_PACKAGE_RELEASE}" STREQUAL "") + set(_tb_pkg_release "$ENV{CPACK_DEBIAN_PACKAGE_RELEASE}") + endif() + # DEB set(CPACK_DEBIAN_PACKAGE_NAME "${CPACK_PACKAGE_NAME}") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") set(CPACK_DEBIAN_PACKAGE_DEPENDS "numactl, libnuma1, hsa-rocr") set(CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_CONTACT}") - if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) + if(NOT "${_tb_pkg_release}" STREQUAL "") + set(CPACK_DEBIAN_PACKAGE_RELEASE "${_tb_pkg_release}") + elseif(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) set(CPACK_DEBIAN_PACKAGE_RELEASE "$ENV{CPACK_DEBIAN_PACKAGE_RELEASE}") endif() + # Canonical filename: _-_.deb + set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") # RPM set(CPACK_RPM_PACKAGE_NAME "${CPACK_PACKAGE_NAME}") set(CPACK_RPM_PACKAGE_LICENSE "MIT") set(CPACK_RPM_PACKAGE_REQUIRES "numactl, hsa-rocr") set(CPACK_RPM_PACKAGE_VENDOR "${CPACK_PACKAGE_VENDOR}") - if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) + if(NOT "${_tb_pkg_release}" STREQUAL "") + set(CPACK_RPM_PACKAGE_RELEASE "${_tb_pkg_release}") + elseif(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) set(CPACK_RPM_PACKAGE_RELEASE "$ENV{CPACK_RPM_PACKAGE_RELEASE}") endif() + # Canonical filename: --..rpm + set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") # Use the actual install prefix (caller-controlled in relocatable mode) # rather than hard-coded /opt/... paths. if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX) @@ -360,8 +394,18 @@ if(BUILD_RELOCATABLE_PACKAGE) "${_rpm_exclude_prefix}" "${_rpm_exclude_prefix}/bin") - # TGZ - set(CPACK_ARCHIVE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-Linux") + # TGZ — embed release tag so successive runs do not collide on the same key. + # CMake 3.13+ honors CPACK_ARCHIVE_FILE_NAME for archive generators, but + # CMake 3.22 (Ubuntu 22.04) falls back to CPACK_PACKAGE_FILE_NAME for TGZ. + # Set both to the same suffixed value. DEB/RPM are unaffected because they + # use the explicit DEB-DEFAULT / RPM-DEFAULT canonical-naming tokens above. + if("${_tb_pkg_release}" STREQUAL "") + set(_tb_archive_name "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-Linux") + else() + set(_tb_archive_name "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${_tb_pkg_release}-Linux") + endif() + set(CPACK_ARCHIVE_FILE_NAME "${_tb_archive_name}") + set(CPACK_PACKAGE_FILE_NAME "${_tb_archive_name}") set(CPACK_GENERATOR "DEB;RPM;TGZ") include(CPack) diff --git a/build_packages_local.sh b/build_packages_local.sh index cf12b831..91ace56b 100755 --- a/build_packages_local.sh +++ b/build_packages_local.sh @@ -40,6 +40,19 @@ BUILD_DIR="${REPO_ROOT}/build" SDK_DIR="${HOME}/rocm-sdk" ROCM_PATH="${SDK_DIR}/install" +# Containerized builds (e.g. manylinux on a host-mounted workspace) hit git's +# "dubious ownership" guard because the checkout is host-UID-owned but we run +# as root. Without this, `git describe` in CMakeLists.txt silently fails and +# TRANSFERBENCH_VERSION_PATCH falls back to its hard-coded default. +# +# Use GIT_CONFIG_* env vars (git >= 2.31) so the scoped safe.directory entry +# is inherited by CMake's `execute_process(git …)` children without touching +# the user's persistent ~/.gitconfig (especially harmful under sudo, where +# the modification would land in root's global config). +export GIT_CONFIG_COUNT=1 +export GIT_CONFIG_KEY_0="safe.directory" +export GIT_CONFIG_VALUE_0="${REPO_ROOT}" + # Default GPU targets baked into every package, regardless of GPU_FAMILY tarball. DEFAULT_GPU_TARGETS="gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1150;gfx1151;gfx1200;gfx1201" GPU_TARGETS="${GPU_TARGETS:-$DEFAULT_GPU_TARGETS}" @@ -220,6 +233,7 @@ CMAKE_ARGS=( -DENABLE_MPI_COMM=OFF -DDISABLE_DMABUF=OFF -DGPU_TARGETS="${GPU_TARGETS}" + -DTRANSFERBENCH_PACKAGE_RELEASE="${PKG_RELEASE}" ) if [[ -n "${CMAKE_CXX_COMPILER_OVERRIDE}" ]]; then CMAKE_ARGS+=(-DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER_OVERRIDE}") From a4cd6e5bb2688984b46514bf7e1bcee918c3a9ca Mon Sep 17 00:00:00 2001 From: Arm Patinyasakdikul Date: Fri, 15 May 2026 08:56:27 -0500 Subject: [PATCH 8/8] pkg: drop hsa-rocr dep, warn at install time instead (#300) The relocatable DEB/RPM hard-depended on hsa-rocr, but ROCm 7.13 no longer ships a package by that name -- install fails on TheRock 7.13 with "amdrocm7-transferbench depends on hsa-rocr; however: Package hsa-rocr is not configured yet" (ROCM-24669). The dep also never made sense for the relocatable artifact: it is built from the TheRock SDK and meant to install on systems where ROCm came from a tarball and is not tracked by apt/dpkg at all. Any apt dep on a ROCm component breaks that audience. Drop hsa-rocr from CPACK_DEBIAN_PACKAGE_DEPENDS, CPACK_RPM_PACKAGE_REQUIRES, and the legacy rocm_package_add_dependencies call. Wire a shared scriptlet (packaging/postinst-check-hsa.sh) as the DEB postinst and the RPM %post that probes ldconfig + the standard /opt/rocm{,-*,/extras-*, /core-*}/lib prefixes for libhsa-runtime64.so.1 and prints a stderr warning when none of them have it. Always exits 0 so install never fails on its account -- the warning surfaces the missing runtime at install time instead of as a dynamic-linker error on first run. numactl / libnuma1 stay in the dep list since those are real OS packages that exist independent of any ROCm install. --- CMakeLists.txt | 17 +++++++++-- packaging/postinst-check-hsa.sh | 50 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) create mode 100755 packaging/postinst-check-hsa.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index f8d1c747..0acb9c43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -360,7 +360,7 @@ if(BUILD_RELOCATABLE_PACKAGE) # DEB set(CPACK_DEBIAN_PACKAGE_NAME "${CPACK_PACKAGE_NAME}") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") - set(CPACK_DEBIAN_PACKAGE_DEPENDS "numactl, libnuma1, hsa-rocr") + set(CPACK_DEBIAN_PACKAGE_DEPENDS "numactl, libnuma1") set(CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_CONTACT}") if(NOT "${_tb_pkg_release}" STREQUAL "") set(CPACK_DEBIAN_PACKAGE_RELEASE "${_tb_pkg_release}") @@ -373,7 +373,7 @@ if(BUILD_RELOCATABLE_PACKAGE) # RPM set(CPACK_RPM_PACKAGE_NAME "${CPACK_PACKAGE_NAME}") set(CPACK_RPM_PACKAGE_LICENSE "MIT") - set(CPACK_RPM_PACKAGE_REQUIRES "numactl, hsa-rocr") + set(CPACK_RPM_PACKAGE_REQUIRES "numactl") set(CPACK_RPM_PACKAGE_VENDOR "${CPACK_PACKAGE_VENDOR}") if(NOT "${_tb_pkg_release}" STREQUAL "") set(CPACK_RPM_PACKAGE_RELEASE "${_tb_pkg_release}") @@ -394,6 +394,17 @@ if(BUILD_RELOCATABLE_PACKAGE) "${_rpm_exclude_prefix}" "${_rpm_exclude_prefix}/bin") + # Advisory install-time check for libhsa-runtime64.so.1. The package declares + # no hard ROCm dep so it can install on TheRock-tarball systems where no ROCm + # component is tracked by apt/dpkg; the postinst warns (never fails) when the + # HSA runtime is not discoverable, so a missing runtime surfaces at install + # time instead of as a dynamic-linker error on first invocation. + set(_tb_postinst_src "${CMAKE_CURRENT_SOURCE_DIR}/packaging/postinst-check-hsa.sh") + set(_tb_postinst_deb "${CMAKE_BINARY_DIR}/packaging/postinst") + configure_file("${_tb_postinst_src}" "${_tb_postinst_deb}" COPYONLY) + set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${_tb_postinst_deb}") + set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${_tb_postinst_src}") + # TGZ — embed release tag so successive runs do not collide on the same key. # CMake 3.13+ honors CPACK_ARCHIVE_FILE_NAME for archive generators, but # CMake 3.22 (Ubuntu 22.04) falls back to CPACK_PACKAGE_FILE_NAME for TGZ. @@ -414,7 +425,7 @@ else() rocm_setup_version(VERSION ${VERSION_STRING}) # Package specific CPACK vars - rocm_package_add_dependencies(DEPENDS "numactl" "hsa-rocr") + rocm_package_add_dependencies(DEPENDS "numactl") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md") set(CPACK_RPM_PACKAGE_LICENSE "MIT") diff --git a/packaging/postinst-check-hsa.sh b/packaging/postinst-check-hsa.sh new file mode 100755 index 00000000..9b0b2505 --- /dev/null +++ b/packaging/postinst-check-hsa.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# Advisory check used as both the DEB postinst and the RPM %post scriptlet. +# TransferBench links against libhsa-runtime64.so.1; without it the binary +# fails at first run with a dynamic-linker error. The relocatable package +# declares no hard dep on hsa-rocr (or any ROCm component) because it is +# expected to install on TheRock-tarball systems where no ROCm package is +# tracked by apt/dpkg. Warn here so the user diagnoses a missing runtime +# at install time, not at TransferBench launch. +# +# Always exits 0 — this is advisory, never fatal. + +set -e + +found=0 + +if command -v ldconfig >/dev/null 2>&1; then + if ldconfig -p 2>/dev/null | grep -q 'libhsa-runtime64\.so\.1'; then + found=1 + fi +fi + +if [ "$found" -eq 0 ]; then + for d in /opt/rocm/lib /opt/rocm/lib64 /opt/rocm-*/lib /opt/rocm/extras-*/lib /opt/rocm/core-*/lib; do + if [ -e "$d/libhsa-runtime64.so.1" ]; then + found=1 + break + fi + done +fi + +if [ "$found" -eq 0 ]; then + cat >&2 <<'EOF' +==================================================================== +TransferBench: WARNING + +libhsa-runtime64.so.1 was not found on the dynamic loader path or +under any of /opt/rocm/lib, /opt/rocm-*/lib, or /opt/rocm/extras-*/lib. + +TransferBench requires the ROCm HSA runtime at run time. Install a +ROCm 7.x stack (system packages or TheRock SDK) before invoking +TransferBench, or set LD_LIBRARY_PATH to a directory containing +libhsa-runtime64.so.1. + +Without it, TransferBench will fail at startup with: + error while loading shared libraries: libhsa-runtime64.so.1 +==================================================================== +EOF +fi + +exit 0