triton-inference-server · yinggeh · Mar 12, 2026 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/src/backend_model.cc b/src/backend_model.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -825,10 +825,14 @@ TritonModel::SetConfiguredScheduler(
   for (const auto& input : config_.input()) {
     if (input.is_shape_tensor()) {
       enforce_equal_shape_tensors.insert({input.name(), true});
-    } else if (
-        !input.allow_ragged_batch() &&
-        (triton::common::GetElementCount(input) == -1)) {
-      enforce_equal_shape_tensors.insert({input.name(), false});
+    } else {
+      int64_t element_count = 0;
+      RETURN_IF_ERROR(
+          GetElementCount(input.dims(), input.name(), &element_count));
+      if (!input.allow_ragged_batch() &&
+          (element_count == triton::common::WILDCARD_SIZE)) {
+        enforce_equal_shape_tensors.insert({input.name(), false});
+      }
     }
   }
 

diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -372,23 +372,18 @@ TritonModelInstance::GenerateWarmupData()
     int64_t max_zero_byte_size = 0;
     int64_t max_random_byte_size = 0;
     for (const auto& input_meta : warmup_setting.inputs()) {
-      auto element_count =
-          triton::common::GetElementCount(input_meta.second.dims());
-      if (element_count == -1) {
+      int64_t batch_byte_size = 0;
+      RETURN_IF_ERROR(GetByteSize(
+          input_meta.second.data_type(), input_meta.second.dims(),
+          input_meta.first, &batch_byte_size));
+      if (batch_byte_size == triton::common::WILDCARD_SIZE) {
         return Status(
             Status::Code::INVALID_ARG,
             "warmup setting expects all variable-size dimensions are specified "
             "for input '" +
                 input_meta.first + "'");
       }
 
-      int64_t batch_byte_size =
-          element_count *
-          triton::common::GetDataTypeByteSize(input_meta.second.data_type());
-      if (batch_byte_size == 0) {
-        batch_byte_size = element_count * sizeof(int32_t);
-      }
-
       switch (input_meta.second.input_data_type_case()) {
         case inference::ModelWarmup_Input::InputDataTypeCase::kZeroData:
           max_zero_byte_size = std::max(batch_byte_size, max_zero_byte_size);
@@ -443,14 +438,11 @@ TritonModelInstance::GenerateWarmupData()
       // Second pass to prepare original inputs.
       std::vector<std::shared_ptr<InferenceRequest::Input>> input_sps;
       for (const auto& input_meta : warmup_setting.inputs()) {
-        auto batch1_element_count =
-            triton::common::GetElementCount(input_meta.second.dims());
-        auto batch_byte_size =
-            batch1_element_count *
-            triton::common::GetDataTypeByteSize(input_meta.second.data_type());
-        if (batch_byte_size == 0) {
-          batch_byte_size = batch1_element_count * sizeof(int32_t);
-        }
+        int64_t batch_byte_size_signed = 0;
+        RETURN_IF_ERROR(GetByteSize(
+            input_meta.second.data_type(), input_meta.second.dims(),
+            input_meta.first, &batch_byte_size_signed));
+        size_t batch_byte_size = static_cast<size_t>(batch_byte_size_signed);
 
         const char* allocated_ptr;
         switch (input_meta.second.input_data_type_case()) {
@@ -476,10 +468,11 @@ TritonModelInstance::GenerateWarmupData()
                     {model_->LocalizedModelPath(), kWarmupDataFolder,
                      input_meta.second.input_data_file()}),
                 input_data));
+
             if (input_meta.second.data_type() ==
                 inference::DataType::TYPE_STRING) {
               batch_byte_size = input_data->size();
-            } else if (((size_t)batch_byte_size) > input_data->size()) {
+            } else if (batch_byte_size > input_data->size()) {
               return Status(
                   Status::Code::INVALID_ARG,
                   lrequest->LogRequest() + "warmup setting expects " +

diff --git a/src/infer_request.cc b/src/infer_request.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -515,9 +515,15 @@ InferenceRequest::Release(
   return Status::Success;
 }
 
-InferenceRequest*
-InferenceRequest::CopyAsNull(const InferenceRequest& from)
+Status
+InferenceRequest::CopyAsNull(
+    const InferenceRequest& from, std::unique_ptr<InferenceRequest>* to)
 {
+  if (to == nullptr) {
+    return Status(
+        Status::Code::INVALID_ARG, "InferenceRequest 'to' must not be null");
+  }
+
   // Create a copy of 'from' request with artificial inputs and no requested
   // outputs. Maybe more efficient to share inputs and other metadata,
   // but that binds the Null request with 'from' request's lifecycle.
@@ -587,10 +593,11 @@ InferenceRequest::CopyAsNull(const InferenceRequest& from)
     }
 
     if (input.second.DType() == inference::DataType::TYPE_STRING) {
-      int64_t element_count =
-          triton::common::GetElementCount(input.second.Shape());
-
-      size_t str_byte_size = static_cast<size_t>(4 * element_count);
+      int64_t str_byte_size_signed = 0;
+      RETURN_IF_ERROR(GetByteSize(
+          inference::DataType::TYPE_STRING, input.second.Shape(), input.first,
+          &str_byte_size_signed));
+      size_t str_byte_size = static_cast<size_t>(str_byte_size_signed);
       max_str_byte_size = std::max(str_byte_size, max_str_byte_size);
       if (str_byte_size > max_byte_size) {
         max_byte_size = str_byte_size;
@@ -638,11 +645,12 @@ InferenceRequest::CopyAsNull(const InferenceRequest& from)
     if (input.first == *max_input_name) {
       new_input->SetData(data);
     } else {
-      if (inference::DataType::TYPE_STRING == input.second.DType()) {
-        new_input->AppendData(
-            data_base,
-            triton::common::GetElementCount(input.second.Shape()) * 4, mem_type,
-            mem_id);
+      if (input.second.DType() == inference::DataType::TYPE_STRING) {
+        int64_t str_byte_size = 0;
+        RETURN_IF_ERROR(GetByteSize(
+            inference::DataType::TYPE_STRING, input.second.Shape(), input.first,
+            &str_byte_size));
+        new_input->AppendData(data_base, str_byte_size, mem_type, mem_id);
       } else {
         new_input->AppendData(
             data_base, input.second.Data()->TotalByteSize(), mem_type, mem_id);
@@ -662,7 +670,8 @@ InferenceRequest::CopyAsNull(const InferenceRequest& from)
         std::make_pair(pr.second.Name(), std::addressof(pr.second)));
   }
 
-  return lrequest.release();
+  *to = std::move(lrequest);
+  return Status::Success;
 }
 
 Status
@@ -844,8 +853,8 @@ InferenceRequest::LoadInputStates()
   // Add the input states to the inference request.
   if (sequence_states_ != nullptr) {
     if (sequence_states_->IsNullRequest()) {
-      sequence_states_ =
-          SequenceStates::CopyAsNull(sequence_states_->NullSequenceStates());
+      RETURN_IF_ERROR(SequenceStates::CopyAsNull(
+          sequence_states_->NullSequenceStates(), &sequence_states_));
     }
     for (auto& input_state_pair : sequence_states_->InputStates()) {
       auto& input_state = input_state_pair.second;
@@ -1173,14 +1182,14 @@ InferenceRequest::Normalize()
     if (input_config->has_reshape()) {
       std::deque<int64_t> variable_size_values;
       for (int64_t idx = 0; idx < input_config->dims_size(); idx++) {
-        if (input_config->dims(idx) == -1) {
+        if (input_config->dims(idx) == triton::common::WILDCARD_DIM) {
           variable_size_values.push_back((*shape)[idx]);
         }
       }
 
       shape->clear();
       for (const auto& dim : input_config->reshape().shape()) {
-        if (dim == -1) {
+        if (dim == triton::common::WILDCARD_DIM) {
           shape->push_back(variable_size_values.front());
           variable_size_values.pop_front();
         } else {
@@ -1219,8 +1228,9 @@ InferenceRequest::Normalize()
           const std::vector<int64_t>& input_dims =
               input.IsShapeTensor() ? input.OriginalShape()
                                     : input.ShapeWithBatchDim();
-          int64_t expected_byte_size =
-              triton::common::GetByteSize(data_type, input_dims);
+          int64_t expected_byte_size = 0;
+          RETURN_IF_ERROR(GetByteSize(
+              data_type, input_dims, input_name, &expected_byte_size));
           const size_t& byte_size = input.Data()->TotalByteSize();
           if ((byte_size > LLONG_MAX) ||
               (static_cast<int64_t>(byte_size) != expected_byte_size)) {
@@ -1311,7 +1321,7 @@ InferenceRequest::ValidateBytesInputs(
 {
   const auto& input_dims = input.ShapeWithBatchDim();
 
-  int64_t element_count = triton::common::GetElementCount(input_dims);
+  int64_t element_count = 0;
   int64_t element_checked = 0;
   size_t remaining_element_size = 0;
 
@@ -1322,6 +1332,8 @@ InferenceRequest::ValidateBytesInputs(
   size_t remaining_buffer_size = 0;
   int64_t buffer_memory_id;
 
+  RETURN_IF_ERROR(GetElementCount(input_dims, input_name, &element_count));
+
   // Validate elements until all buffers have been fully processed.
   while (remaining_buffer_size || buffer_next_idx < buffer_count) {
     // Get the next buffer if not currently processing one.

diff --git a/src/infer_request.h b/src/infer_request.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -632,7 +632,8 @@ class InferenceRequest {
   // required for the direct sequence batcher. The returned copy will
   // contain only the minimum content required for a null request.
   // The statistics of the copy will not be collected.
-  static InferenceRequest* CopyAsNull(const InferenceRequest& from);
+  static Status CopyAsNull(
+      const InferenceRequest& from, std::unique_ptr<InferenceRequest>* to);
 
   uint64_t QueueStartNs() const { return queue_start_ns_; }
   uint64_t CaptureQueueStartNs()

diff --git a/src/model_config_utils.cc b/src/model_config_utils.cc
@@ -353,9 +353,12 @@ ValidateIOShape(
       }
     }
 
-    const int64_t dims_size = triton::common::GetElementCount(io.dims());
-    const int64_t reshape_size =
-        triton::common::GetElementCount(io.reshape().shape());
+    int64_t dims_size = 0;
+    int64_t reshape_size = 0;
+    RETURN_IF_ERROR(
+        GetElementCount(io.dims(), io.name() + " dims", &dims_size));
+    RETURN_IF_ERROR(GetElementCount(
+        io.reshape().shape(), io.name() + " reshape", &reshape_size));
 
     // dims and reshape must both have same element count
     // or both have variable-size dimension.
@@ -372,12 +375,12 @@ ValidateIOShape(
     // each pair of the trunks separated by variable-size dimension has
     // the same element count. For instance, from [2, 4, -1, 6] to [8, -1, 1, 6]
     // is valid reshape as 2 * 4 = 8 and 6 = 1 * 6.
-    if (dims_size == -1) {
+    if (dims_size == triton::common::WILDCARD_SIZE) {
       std::vector<int64_t> dim_element_cnts;
       std::vector<int64_t> reshape_element_cnts;
       int64_t current_cnt = 1;
       for (const auto& dim : io.dims()) {
-        if (dim != -1) {
+        if (dim != triton::common::WILDCARD_DIM) {
           current_cnt *= dim;
         } else {
           dim_element_cnts.push_back(current_cnt);
@@ -388,7 +391,7 @@ ValidateIOShape(
 
       current_cnt = 1;
       for (const auto& dim : io.reshape().shape()) {
-        if (dim != -1) {
+        if (dim != triton::common::WILDCARD_DIM) {
           current_cnt *= dim;
         } else {
           reshape_element_cnts.push_back(current_cnt);

diff --git a/src/model_config_utils.h b/src/model_config_utils.h
@@ -25,12 +25,13 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #pragma once
 
+#include <cstdint>
+
 #include "filesystem/api.h"
 #include "model_config.pb.h"
 #include "status.h"
 #include "triton/common/model_config.h"
 #include "tritonserver_apis.h"
-
 namespace triton { namespace core {
 
 /// Enumeration for the different backend types.
@@ -319,4 +320,77 @@ bool EquivalentInInstanceConfig(
 std::string InstanceConfigSignature(
     const inference::ModelInstanceGroup& instance_config);
 
+template <typename T>
+Status
+GetElementCount(const T& dims, const std::string& name, int64_t* cnt)
+{
+  if (cnt == nullptr) {
+    return Status(Status::Code::INTERNAL, "argument `cnt` cannot be nullptr");
+  }
+
+  int64_t element_count = 0;
+  element_count = triton::common::GetElementCount(dims);
+  if (element_count == triton::common::INVALID_SIZE) {
+    return Status(
+        Status::Code::INVALID_ARG,
+        "tensor '" + name + "' contains an invalid dimension in shape " +
+            triton::common::DimsListToString(dims));
+  } else if (element_count == triton::common::OVERFLOW_SIZE) {
+    return Status(
+        Status::Code::INVALID_ARG, "element count for tensor '" + name +
+                                       "' exceeds maximum size of " +
+                                       std::to_string(INT64_MAX));
+  }
+
+  *cnt = element_count;
+  return Status::Success;
+}
+
+template <typename T>
+Status
+GetByteSize(
+    const inference::DataType& dtype, const T& dims, const std::string& name,
+    int64_t* size)
+{
+  if (size == nullptr) {
+    return Status(Status::Code::INTERNAL, "argument `size` cannot be nullptr");
+  }
+
+  int64_t byte_size = 0;
+  if (dtype == inference::DataType::TYPE_STRING) {
+    int64_t element_count = 0;
+    RETURN_IF_ERROR(GetElementCount(dims, name, &element_count));
+
+    if (element_count == triton::common::WILDCARD_SIZE) {
+      *size = triton::common::WILDCARD_SIZE;
+      return Status::Success;
+    }
+
+    // Total number of bytes required is equal to the element count
+    // multiplied by 4.
+    if (element_count > static_cast<int64_t>(INT64_MAX / sizeof(int32_t))) {
+      return Status(
+          Status::Code::INVALID_ARG, "byte size for tensor '" + name +
+                                         "' exceeds maximum size of " +
+                                         std::to_string(INT64_MAX));
+    }
+    byte_size = sizeof(int32_t) * element_count;
+  } else {
+    byte_size = triton::common::GetByteSize(dtype, dims);
+    if (byte_size == triton::common::INVALID_SIZE) {
+      return Status(
+          Status::Code::INVALID_ARG,
+          "tensor '" + name + "' contains an invalid dimension " +
+              triton::common::DimsListToString(dims));
+    } else if (byte_size == triton::common::OVERFLOW_SIZE) {
+      return Status(
+          Status::Code::INVALID_ARG, "byte size for tensor '" + name +
+                                         "' exceeds maximum size of " +
+                                         std::to_string(INT64_MAX));
+    }
+  }
+  *size = byte_size;
+  return Status::Success;
+}
+
 }}  // namespace triton::core