From 6d314ce236694b0cb90c5cef78512b415664a530 Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Wed, 24 Jun 2026 00:23:37 +0200 Subject: [PATCH] Fix direct transpose output offset Signed-off-by: Minh Vu --- include/internal/transpose.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/internal/transpose.h b/include/internal/transpose.h index 76e07c3..8b5a93c 100644 --- a/include/internal/transpose.h +++ b/include/internal/transpose.h @@ -481,10 +481,10 @@ static void cudecompTranspose_(int ax, int dir, const cudecompHandle_t handle, c size_t shift_b = offsets_b[src_rank]; for (int i = 0; i < 3; ++i) { if (pinfo_b_h.order[i] == ax_b) break; - shift *= shape_g_b_h[pinfo_b_h.order[i]]; + shift_b *= shape_g_b_h[pinfo_b_h.order[i]]; } - dst = o1 + shift + getPencilPtrOffset(pinfo_b_h, output_halo_extents); + dst = o1 + shift_b + getPencilPtrOffset(pinfo_b_h, output_halo_extents); } for (int i = 0; i < 3; ++i) {