From 658d9831d9aa14e8313a882a045aba236d12c79c Mon Sep 17 00:00:00 2001 From: Josh Romero Date: Mon, 4 May 2026 10:10:22 -0700 Subject: [PATCH 1/3] Use RAII cudaEvent wrapper to simplify event creation/cleanup. Signed-off-by: Josh Romero --- include/internal/common.h | 29 ++++++------ include/internal/cuda_event.h | 84 +++++++++++++++++++++++++++++++++++ src/autotune.cc | 18 ++------ src/cudecomp.cc | 40 ++--------------- src/performance.cc | 16 +++---- 5 files changed, 114 insertions(+), 73 deletions(-) create mode 100644 include/internal/cuda_event.h diff --git a/include/internal/common.h b/include/internal/common.h index d763cdd..a083b7b 100644 --- a/include/internal/common.h +++ b/include/internal/common.h @@ -45,6 +45,7 @@ #include "cudecomp.h" #include "internal/checks.h" +#include "internal/cuda_event.h" #include "internal/graph.h" namespace cudecomp { @@ -158,10 +159,10 @@ struct cudecompCommInfo { // Structure to contain data for transpose performance sample struct cudecompTransposePerformanceSample { - cudaEvent_t transpose_start_event; - cudaEvent_t transpose_end_event; - std::vector alltoall_start_events; - std::vector alltoall_end_events; + cudecomp::cudaEvent transpose_start_event; + cudecomp::cudaEvent transpose_end_event; + std::vector alltoall_start_events; + std::vector alltoall_end_events; int32_t alltoall_timing_count = 0; size_t alltoall_bytes = 0; bool valid = false; @@ -176,10 +177,10 @@ struct cudecompTransposePerformanceSampleCollection { // Structure to contain data for halo performance sample struct cudecompHaloPerformanceSample { - cudaEvent_t halo_start_event; - cudaEvent_t halo_end_event; - cudaEvent_t sendrecv_start_event; - cudaEvent_t sendrecv_end_event; + cudecomp::cudaEvent halo_start_event; + cudecomp::cudaEvent halo_end_event; + cudecomp::cudaEvent sendrecv_start_event; + cudecomp::cudaEvent sendrecv_end_event; size_t sendrecv_bytes = 0; bool valid = false; }; @@ -202,8 +203,8 @@ struct cudecompGridDesc { cudecompCommInfo row_comm_info; // row communicator information cudecompCommInfo col_comm_info; // column communicator information - std::vector events{nullptr}; // CUDA events used for scheduling - cudaEvent_t nvshmem_sync_event = nullptr; // NVSHMEM event used for synchronization + std::vector events; // CUDA events used for scheduling + cudecomp::cudaEvent nvshmem_sync_event; // NVSHMEM event used for synchronization #ifdef ENABLE_NVSHMEM int* nvshmem_block_counters = nullptr; // device memory counters for SM alltoallv last-block detection @@ -217,11 +218,11 @@ struct cudecompGridDesc { nccl_local_comm; // NCCL communicator (intra-node, or intra-clique on MNNVL systems), shared from handle // Performance reporting related entries - std::vector alltoall_start_events; // events for alltoall timing - std::vector alltoall_end_events; // events for alltoall timing + std::vector alltoall_start_events; // events for alltoall timing + std::vector alltoall_end_events; // events for alltoall timing int32_t alltoall_timing_count = 0; // count of alltoall timing events pairs (for pipelined alltoall) - cudaEvent_t transpose_start_event; // event for transpose timing - cudaEvent_t transpose_end_event; // event for transpose timing + cudecomp::cudaEvent transpose_start_event; // event for transpose timing + cudecomp::cudaEvent transpose_end_event; // event for transpose timing std::unordered_map, std::array, std::array, std::array, bool, bool, cudecompDataType_t>, diff --git a/include/internal/cuda_event.h b/include/internal/cuda_event.h new file mode 100644 index 0000000..31554a8 --- /dev/null +++ b/include/internal/cuda_event.h @@ -0,0 +1,84 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CUDECOMP_CUDA_EVENT_H +#define CUDECOMP_CUDA_EVENT_H + +#include + +#include + +#include "internal/checks.h" + +namespace cudecomp { + +class cudaEvent { +public: + cudaEvent() = default; + ~cudaEvent() noexcept { resetNoThrow(); } + + cudaEvent(const cudaEvent&) = delete; + cudaEvent& operator=(const cudaEvent&) = delete; + + cudaEvent(cudaEvent&& other) noexcept : event_(std::exchange(other.event_, nullptr)) {} + + cudaEvent& operator=(cudaEvent&& other) noexcept { + if (this != &other) { + resetNoThrow(); + event_ = std::exchange(other.event_, nullptr); + } + return *this; + } + + void create() { + reset(); + cudaEvent_t event = nullptr; + CHECK_CUDA(cudaEventCreate(&event)); + event_ = event; + } + + void createWithFlags(unsigned int flags) { + reset(); + cudaEvent_t event = nullptr; + CHECK_CUDA(cudaEventCreateWithFlags(&event, flags)); + event_ = event; + } + + void reset() { + if (event_) { + CHECK_CUDA(cudaEventDestroy(event_)); + event_ = nullptr; + } + } + + void resetNoThrow() noexcept { + if (event_) { + cudaEventDestroy(event_); + event_ = nullptr; + } + } + + cudaEvent_t get() const noexcept { return event_; } + operator cudaEvent_t() const noexcept { return event_; } + +private: + cudaEvent_t event_ = nullptr; +}; + +} // namespace cudecomp + +#endif // CUDECOMP_CUDA_EVENT_H diff --git a/src/autotune.cc b/src/autotune.cc index a1ce48f..ae12edb 100644 --- a/src/autotune.cc +++ b/src/autotune.cc @@ -101,9 +101,9 @@ void autotuneTransposeBackend(cudecompHandle_t handle, cudecompGridDesc_t grid_d double t_start = MPI_Wtime(); // Create cuda_events for intermediate timings (5 events per trial: start + 4 op boundaries) - std::vector events(5 * options->n_trials); + std::vector events(5 * options->n_trials); for (auto& event : events) { - CHECK_CUDA(cudaEventCreate(&event)); + event.create(); } bool autotune_comm = options->autotune_transpose_backend; @@ -536,11 +536,6 @@ void autotuneTransposeBackend(cudecompHandle_t handle, cudecompGridDesc_t grid_d CHECK_CUDA(cudaFree(data)); if (need_data2) { CHECK_CUDA(cudaFree(data2)); } - // Delete cuda events - for (auto& event : events) { - CHECK_CUDA(cudaEventDestroy(event)); - } - // Set handle to best option (broadcast from rank 0 for consistency) CHECK_MPI(MPI_Bcast(&comm_backend_best, sizeof(cudecompTransposeCommBackend_t), MPI_CHAR, 0, handle->mpi_comm)); CHECK_MPI(MPI_Bcast(pdims_best, 2 * sizeof(int), MPI_INT, 0, handle->mpi_comm)); @@ -587,9 +582,9 @@ void autotuneHaloBackend(cudecompHandle_t handle, cudecompGridDesc_t grid_desc, double t_start = MPI_Wtime(); // Create cuda events for timing (one per trial boundary: n_trials + 1 total) - std::vector events(options->n_trials + 1); + std::vector events(options->n_trials + 1); for (auto& event : events) { - CHECK_CUDA(cudaEventCreate(&event)); + event.create(); } bool autotune_comm = options->autotune_halo_backend; @@ -901,11 +896,6 @@ void autotuneHaloBackend(cudecompHandle_t handle, cudecompGridDesc_t grid_desc, CHECK_CUDA(cudaFree(data)); - // Delete cuda events - for (auto& event : events) { - CHECK_CUDA(cudaEventDestroy(event)); - } - // Set handle to best option (broadcast from rank 0 for consistency) CHECK_MPI(MPI_Bcast(&comm_backend_best, sizeof(cudecompHaloCommBackend_t), MPI_CHAR, 0, handle->mpi_comm)); CHECK_MPI(MPI_Bcast(pdims_best, 2 * sizeof(int), MPI_INT, 0, handle->mpi_comm)); diff --git a/src/cudecomp.cc b/src/cudecomp.cc index 53b71b2..8437d28 100644 --- a/src/cudecomp.cc +++ b/src/cudecomp.cc @@ -805,10 +805,10 @@ cudecompResult_t cudecompGridDescCreate(cudecompHandle_t handle, cudecompGridDes // Create CUDA events for scheduling grid_desc->events.resize(handle->nranks); for (auto& event : grid_desc->events) { - CHECK_CUDA(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); + event.createWithFlags(cudaEventDisableTiming); } #ifdef ENABLE_NVSHMEM - CHECK_CUDA(cudaEventCreateWithFlags(&grid_desc->nvshmem_sync_event, cudaEventDisableTiming)); + grid_desc->nvshmem_sync_event.createWithFlags(cudaEventDisableTiming); #endif // Run autotuning if requested @@ -922,43 +922,9 @@ cudecompResult_t cudecompGridDescDestroy(cudecompHandle_t handle, cudecompGridDe CHECK_MPI(MPI_Comm_free(&grid_desc->col_comm_info.mpi_comm)); } - for (auto e : grid_desc->events) { - if (e) { CHECK_CUDA(cudaEventDestroy(e)); } - } - -#ifdef ENABLE_NVSHMEM - if (grid_desc->nvshmem_sync_event) { CHECK_CUDA(cudaEventDestroy(grid_desc->nvshmem_sync_event)); } -#endif - + // Print performance report if enabled if (handle->performance_report_enable) { - // Print performance report before destroying events printPerformanceReport(handle, grid_desc); - - // Destroy all transpose performance sample events in the map - for (auto& entry : grid_desc->transpose_perf_samples_map) { - auto& collection = entry.second; - for (auto& sample : collection.samples) { - CHECK_CUDA(cudaEventDestroy(sample.transpose_start_event)); - CHECK_CUDA(cudaEventDestroy(sample.transpose_end_event)); - for (auto& event : sample.alltoall_start_events) { - CHECK_CUDA(cudaEventDestroy(event)); - } - for (auto& event : sample.alltoall_end_events) { - CHECK_CUDA(cudaEventDestroy(event)); - } - } - } - - // Destroy all halo performance sample events in the map - for (auto& entry : grid_desc->halo_perf_samples_map) { - auto& collection = entry.second; - for (auto& sample : collection.samples) { - CHECK_CUDA(cudaEventDestroy(sample.halo_start_event)); - CHECK_CUDA(cudaEventDestroy(sample.halo_end_event)); - CHECK_CUDA(cudaEventDestroy(sample.sendrecv_start_event)); - CHECK_CUDA(cudaEventDestroy(sample.sendrecv_end_event)); - } - } } if (transposeBackendRequiresNccl(grid_desc->config.transpose_comm_backend) || diff --git a/src/performance.cc b/src/performance.cc index 3a91b76..8256a3b 100644 --- a/src/performance.cc +++ b/src/performance.cc @@ -211,15 +211,15 @@ getOrCreateTransposePerformanceSamples(const cudecompHandle_t handle, cudecompGr // Create events for each sample for (auto& sample : collection.samples) { - CHECK_CUDA(cudaEventCreate(&sample.transpose_start_event)); - CHECK_CUDA(cudaEventCreate(&sample.transpose_end_event)); + sample.transpose_start_event.create(); + sample.transpose_end_event.create(); sample.alltoall_start_events.resize(handle->nranks); sample.alltoall_end_events.resize(handle->nranks); for (auto& event : sample.alltoall_start_events) { - CHECK_CUDA(cudaEventCreate(&event)); + event.create(); } for (auto& event : sample.alltoall_end_events) { - CHECK_CUDA(cudaEventCreate(&event)); + event.create(); } sample.valid = false; } @@ -244,10 +244,10 @@ cudecompHaloPerformanceSampleCollection& getOrCreateHaloPerformanceSamples(const // Create events for each sample for (auto& sample : collection.samples) { - CHECK_CUDA(cudaEventCreate(&sample.halo_start_event)); - CHECK_CUDA(cudaEventCreate(&sample.halo_end_event)); - CHECK_CUDA(cudaEventCreate(&sample.sendrecv_start_event)); - CHECK_CUDA(cudaEventCreate(&sample.sendrecv_end_event)); + sample.halo_start_event.create(); + sample.halo_end_event.create(); + sample.sendrecv_start_event.create(); + sample.sendrecv_end_event.create(); sample.valid = false; } From 3736b996edd61901a0d68183e8bd9be871530911 Mon Sep 17 00:00:00 2001 From: Josh Romero Date: Mon, 4 May 2026 10:17:20 -0700 Subject: [PATCH 2/3] Formatting. Signed-off-by: Josh Romero --- include/internal/common.h | 10 +++++----- src/cudecomp.cc | 4 +--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/include/internal/common.h b/include/internal/common.h index a083b7b..ca50247 100644 --- a/include/internal/common.h +++ b/include/internal/common.h @@ -203,8 +203,8 @@ struct cudecompGridDesc { cudecompCommInfo row_comm_info; // row communicator information cudecompCommInfo col_comm_info; // column communicator information - std::vector events; // CUDA events used for scheduling - cudecomp::cudaEvent nvshmem_sync_event; // NVSHMEM event used for synchronization + std::vector events; // CUDA events used for scheduling + cudecomp::cudaEvent nvshmem_sync_event; // NVSHMEM event used for synchronization #ifdef ENABLE_NVSHMEM int* nvshmem_block_counters = nullptr; // device memory counters for SM alltoallv last-block detection @@ -220,9 +220,9 @@ struct cudecompGridDesc { // Performance reporting related entries std::vector alltoall_start_events; // events for alltoall timing std::vector alltoall_end_events; // events for alltoall timing - int32_t alltoall_timing_count = 0; // count of alltoall timing events pairs (for pipelined alltoall) - cudecomp::cudaEvent transpose_start_event; // event for transpose timing - cudecomp::cudaEvent transpose_end_event; // event for transpose timing + int32_t alltoall_timing_count = 0; // count of alltoall timing events pairs (for pipelined alltoall) + cudecomp::cudaEvent transpose_start_event; // event for transpose timing + cudecomp::cudaEvent transpose_end_event; // event for transpose timing std::unordered_map, std::array, std::array, std::array, bool, bool, cudecompDataType_t>, diff --git a/src/cudecomp.cc b/src/cudecomp.cc index 8437d28..31624b3 100644 --- a/src/cudecomp.cc +++ b/src/cudecomp.cc @@ -923,9 +923,7 @@ cudecompResult_t cudecompGridDescDestroy(cudecompHandle_t handle, cudecompGridDe } // Print performance report if enabled - if (handle->performance_report_enable) { - printPerformanceReport(handle, grid_desc); - } + if (handle->performance_report_enable) { printPerformanceReport(handle, grid_desc); } if (transposeBackendRequiresNccl(grid_desc->config.transpose_comm_backend) || haloBackendRequiresNccl(grid_desc->config.halo_comm_backend)) { From d7a9afdff586df198deb598049a778c1639b3193 Mon Sep 17 00:00:00 2001 From: Josh Romero Date: Mon, 4 May 2026 10:59:54 -0700 Subject: [PATCH 3/3] Remove explicit create calls. Signed-off-by: Josh Romero --- include/internal/common.h | 26 ++++++++++---------- include/internal/cuda_event.h | 46 +++++++++++------------------------ src/autotune.cc | 10 ++------ src/cudecomp.cc | 6 ----- src/performance.cc | 22 +---------------- 5 files changed, 30 insertions(+), 80 deletions(-) diff --git a/include/internal/common.h b/include/internal/common.h index ca50247..514cf0a 100644 --- a/include/internal/common.h +++ b/include/internal/common.h @@ -159,10 +159,10 @@ struct cudecompCommInfo { // Structure to contain data for transpose performance sample struct cudecompTransposePerformanceSample { - cudecomp::cudaEvent transpose_start_event; - cudecomp::cudaEvent transpose_end_event; - std::vector alltoall_start_events; - std::vector alltoall_end_events; + cudecomp::cudaEventTimed transpose_start_event; + cudecomp::cudaEventTimed transpose_end_event; + std::vector alltoall_start_events; + std::vector alltoall_end_events; int32_t alltoall_timing_count = 0; size_t alltoall_bytes = 0; bool valid = false; @@ -177,10 +177,10 @@ struct cudecompTransposePerformanceSampleCollection { // Structure to contain data for halo performance sample struct cudecompHaloPerformanceSample { - cudecomp::cudaEvent halo_start_event; - cudecomp::cudaEvent halo_end_event; - cudecomp::cudaEvent sendrecv_start_event; - cudecomp::cudaEvent sendrecv_end_event; + cudecomp::cudaEventTimed halo_start_event; + cudecomp::cudaEventTimed halo_end_event; + cudecomp::cudaEventTimed sendrecv_start_event; + cudecomp::cudaEventTimed sendrecv_end_event; size_t sendrecv_bytes = 0; bool valid = false; }; @@ -218,11 +218,11 @@ struct cudecompGridDesc { nccl_local_comm; // NCCL communicator (intra-node, or intra-clique on MNNVL systems), shared from handle // Performance reporting related entries - std::vector alltoall_start_events; // events for alltoall timing - std::vector alltoall_end_events; // events for alltoall timing - int32_t alltoall_timing_count = 0; // count of alltoall timing events pairs (for pipelined alltoall) - cudecomp::cudaEvent transpose_start_event; // event for transpose timing - cudecomp::cudaEvent transpose_end_event; // event for transpose timing + std::vector alltoall_start_events; // events for alltoall timing + std::vector alltoall_end_events; // events for alltoall timing + int32_t alltoall_timing_count = 0; // count of alltoall timing events pairs (for pipelined alltoall) + cudecomp::cudaEventTimed transpose_start_event; // event for transpose timing + cudecomp::cudaEventTimed transpose_end_event; // event for transpose timing std::unordered_map, std::array, std::array, std::array, bool, bool, cudecompDataType_t>, diff --git a/include/internal/cuda_event.h b/include/internal/cuda_event.h index 31554a8..27e7100 100644 --- a/include/internal/cuda_event.h +++ b/include/internal/cuda_event.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,17 +26,17 @@ namespace cudecomp { -class cudaEvent { +template class cudaEventBase { public: - cudaEvent() = default; - ~cudaEvent() noexcept { resetNoThrow(); } + cudaEventBase() { CHECK_CUDA(cudaEventCreateWithFlags(&event_, flags)); } + ~cudaEventBase() noexcept { resetNoThrow(); } - cudaEvent(const cudaEvent&) = delete; - cudaEvent& operator=(const cudaEvent&) = delete; + cudaEventBase(const cudaEventBase&) = delete; + cudaEventBase& operator=(const cudaEventBase&) = delete; - cudaEvent(cudaEvent&& other) noexcept : event_(std::exchange(other.event_, nullptr)) {} + cudaEventBase(cudaEventBase&& other) noexcept : event_(std::exchange(other.event_, nullptr)) {} - cudaEvent& operator=(cudaEvent&& other) noexcept { + cudaEventBase& operator=(cudaEventBase&& other) noexcept { if (this != &other) { resetNoThrow(); event_ = std::exchange(other.event_, nullptr); @@ -44,27 +44,10 @@ class cudaEvent { return *this; } - void create() { - reset(); - cudaEvent_t event = nullptr; - CHECK_CUDA(cudaEventCreate(&event)); - event_ = event; - } - - void createWithFlags(unsigned int flags) { - reset(); - cudaEvent_t event = nullptr; - CHECK_CUDA(cudaEventCreateWithFlags(&event, flags)); - event_ = event; - } - - void reset() { - if (event_) { - CHECK_CUDA(cudaEventDestroy(event_)); - event_ = nullptr; - } - } + cudaEvent_t get() const noexcept { return event_; } + operator cudaEvent_t() const noexcept { return event_; } +private: void resetNoThrow() noexcept { if (event_) { cudaEventDestroy(event_); @@ -72,13 +55,12 @@ class cudaEvent { } } - cudaEvent_t get() const noexcept { return event_; } - operator cudaEvent_t() const noexcept { return event_; } - -private: cudaEvent_t event_ = nullptr; }; +using cudaEvent = cudaEventBase; +using cudaEventTimed = cudaEventBase; + } // namespace cudecomp #endif // CUDECOMP_CUDA_EVENT_H diff --git a/src/autotune.cc b/src/autotune.cc index ae12edb..03b7f70 100644 --- a/src/autotune.cc +++ b/src/autotune.cc @@ -101,10 +101,7 @@ void autotuneTransposeBackend(cudecompHandle_t handle, cudecompGridDesc_t grid_d double t_start = MPI_Wtime(); // Create cuda_events for intermediate timings (5 events per trial: start + 4 op boundaries) - std::vector events(5 * options->n_trials); - for (auto& event : events) { - event.create(); - } + std::vector events(5 * options->n_trials); bool autotune_comm = options->autotune_transpose_backend; bool autotune_pdims = (grid_desc->config.pdims[0] == 0 && grid_desc->config.pdims[1] == 0); @@ -582,10 +579,7 @@ void autotuneHaloBackend(cudecompHandle_t handle, cudecompGridDesc_t grid_desc, double t_start = MPI_Wtime(); // Create cuda events for timing (one per trial boundary: n_trials + 1 total) - std::vector events(options->n_trials + 1); - for (auto& event : events) { - event.create(); - } + std::vector events(options->n_trials + 1); bool autotune_comm = options->autotune_halo_backend; bool autotune_pdims = (grid_desc->config.pdims[0] == 0 && grid_desc->config.pdims[1] == 0); diff --git a/src/cudecomp.cc b/src/cudecomp.cc index 31624b3..a10a32e 100644 --- a/src/cudecomp.cc +++ b/src/cudecomp.cc @@ -804,12 +804,6 @@ cudecompResult_t cudecompGridDescCreate(cudecompHandle_t handle, cudecompGridDes // Create CUDA events for scheduling grid_desc->events.resize(handle->nranks); - for (auto& event : grid_desc->events) { - event.createWithFlags(cudaEventDisableTiming); - } -#ifdef ENABLE_NVSHMEM - grid_desc->nvshmem_sync_event.createWithFlags(cudaEventDisableTiming); -#endif // Run autotuning if requested if (options) { diff --git a/src/performance.cc b/src/performance.cc index 8256a3b..0b65027 100644 --- a/src/performance.cc +++ b/src/performance.cc @@ -207,21 +207,11 @@ getOrCreateTransposePerformanceSamples(const cudecompHandle_t handle, cudecompGr // Create new sample collection for this configuration cudecompTransposePerformanceSampleCollection collection; collection.samples.resize(handle->performance_report_samples); - collection.sample_idx = 0; - // Create events for each sample + // Create per-rank alltoall events for each sample for (auto& sample : collection.samples) { - sample.transpose_start_event.create(); - sample.transpose_end_event.create(); sample.alltoall_start_events.resize(handle->nranks); sample.alltoall_end_events.resize(handle->nranks); - for (auto& event : sample.alltoall_start_events) { - event.create(); - } - for (auto& event : sample.alltoall_end_events) { - event.create(); - } - sample.valid = false; } samples_map[config] = std::move(collection); @@ -240,16 +230,6 @@ cudecompHaloPerformanceSampleCollection& getOrCreateHaloPerformanceSamples(const // Create new sample collection for this configuration cudecompHaloPerformanceSampleCollection collection; collection.samples.resize(handle->performance_report_samples); - collection.sample_idx = 0; - - // Create events for each sample - for (auto& sample : collection.samples) { - sample.halo_start_event.create(); - sample.halo_end_event.create(); - sample.sendrecv_start_event.create(); - sample.sendrecv_end_event.create(); - sample.valid = false; - } samples_map[config] = std::move(collection); }