diff --git a/src/backend_model.cc b/src/backend_model.cc
index cefce6b59..c3b0fc2dc 100644
--- a/src/backend_model.cc
+++ b/src/backend_model.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -825,10 +825,14 @@ TritonModel::SetConfiguredScheduler(
   for (const auto& input : config_.input()) {
     if (input.is_shape_tensor()) {
       enforce_equal_shape_tensors.insert({input.name(), true});
-    } else if (
-        !input.allow_ragged_batch() &&
-        (triton::common::GetElementCount(input) == -1)) {
-      enforce_equal_shape_tensors.insert({input.name(), false});
+    } else {
+      int64_t element_count = 0;
+      RETURN_IF_ERROR(
+          GetElementCount(input.dims(), input.name(), &element_count));
+      if (!input.allow_ragged_batch() &&
+          (element_count == triton::common::WILDCARD_SIZE)) {
+        enforce_equal_shape_tensors.insert({input.name(), false});
+      }
     }
   }
 
diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc
index b5f595c87..fd8d5eb52 100644
--- a/src/backend_model_instance.cc
+++ b/src/backend_model_instance.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -372,9 +372,11 @@ TritonModelInstance::GenerateWarmupData()
     int64_t max_zero_byte_size = 0;
     int64_t max_random_byte_size = 0;
     for (const auto& input_meta : warmup_setting.inputs()) {
-      auto element_count =
-          triton::common::GetElementCount(input_meta.second.dims());
-      if (element_count == -1) {
+      int64_t batch_byte_size = 0;
+      RETURN_IF_ERROR(GetByteSize(
+          input_meta.second.data_type(), input_meta.second.dims(),
+          input_meta.first, &batch_byte_size));
+      if (batch_byte_size == triton::common::WILDCARD_SIZE) {
         return Status(
             Status::Code::INVALID_ARG,
             "warmup setting expects all variable-size dimensions are specified "
@@ -382,13 +384,6 @@ TritonModelInstance::GenerateWarmupData()
                 input_meta.first + "'");
       }
 
-      int64_t batch_byte_size =
-          element_count *
-          triton::common::GetDataTypeByteSize(input_meta.second.data_type());
-      if (batch_byte_size == 0) {
-        batch_byte_size = element_count * sizeof(int32_t);
-      }
-
       switch (input_meta.second.input_data_type_case()) {
         case inference::ModelWarmup_Input::InputDataTypeCase::kZeroData:
           max_zero_byte_size = std::max(batch_byte_size, max_zero_byte_size);
@@ -443,14 +438,11 @@ TritonModelInstance::GenerateWarmupData()
       // Second pass to prepare original inputs.
       std::vector<std::shared_ptr<InferenceRequest::Input>> input_sps;
       for (const auto& input_meta : warmup_setting.inputs()) {
-        auto batch1_element_count =
-            triton::common::GetElementCount(input_meta.second.dims());
-        auto batch_byte_size =
-            batch1_element_count *
-            triton::common::GetDataTypeByteSize(input_meta.second.data_type());
-        if (batch_byte_size == 0) {
-          batch_byte_size = batch1_element_count * sizeof(int32_t);
-        }
+        int64_t batch_byte_size_signed = 0;
+        RETURN_IF_ERROR(GetByteSize(
+            input_meta.second.data_type(), input_meta.second.dims(),
+            input_meta.first, &batch_byte_size_signed));
+        size_t batch_byte_size = static_cast<size_t>(batch_byte_size_signed);
 
         const char* allocated_ptr;
         switch (input_meta.second.input_data_type_case()) {
@@ -476,10 +468,11 @@ TritonModelInstance::GenerateWarmupData()
                     {model_->LocalizedModelPath(), kWarmupDataFolder,
                      input_meta.second.input_data_file()}),
                 input_data));
+
             if (input_meta.second.data_type() ==
                 inference::DataType::TYPE_STRING) {
               batch_byte_size = input_data->size();
-            } else if (((size_t)batch_byte_size) > input_data->size()) {
+            } else if (batch_byte_size > input_data->size()) {
               return Status(
                   Status::Code::INVALID_ARG,
                   lrequest->LogRequest() + "warmup setting expects " +
diff --git a/src/infer_request.cc b/src/infer_request.cc
index 41074effc..bdcc8e031 100644
--- a/src/infer_request.cc
+++ b/src/infer_request.cc
@@ -1,4 +1,4 @@
-// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -515,9 +515,15 @@ InferenceRequest::Release(
   return Status::Success;
 }
 
-InferenceRequest*
-InferenceRequest::CopyAsNull(const InferenceRequest& from)
+Status
+InferenceRequest::CopyAsNull(
+    const InferenceRequest& from, std::unique_ptr<InferenceRequest>* to)
 {
+  if (to == nullptr) {
+    return Status(
+        Status::Code::INVALID_ARG, "InferenceRequest 'to' must not be null");
+  }
+
   // Create a copy of 'from' request with artificial inputs and no requested
   // outputs. Maybe more efficient to share inputs and other metadata,
   // but that binds the Null request with 'from' request's lifecycle.
@@ -587,10 +593,11 @@ InferenceRequest::CopyAsNull(const InferenceRequest& from)
     }
 
     if (input.second.DType() == inference::DataType::TYPE_STRING) {
-      int64_t element_count =
-          triton::common::GetElementCount(input.second.Shape());
-
-      size_t str_byte_size = static_cast<size_t>(4 * element_count);
+      int64_t str_byte_size_signed = 0;
+      RETURN_IF_ERROR(GetByteSize(
+          inference::DataType::TYPE_STRING, input.second.Shape(), input.first,
+          &str_byte_size_signed));
+      size_t str_byte_size = static_cast<size_t>(str_byte_size_signed);
       max_str_byte_size = std::max(str_byte_size, max_str_byte_size);
       if (str_byte_size > max_byte_size) {
         max_byte_size = str_byte_size;
@@ -638,11 +645,12 @@ InferenceRequest::CopyAsNull(const InferenceRequest& from)
     if (input.first == *max_input_name) {
       new_input->SetData(data);
     } else {
-      if (inference::DataType::TYPE_STRING == input.second.DType()) {
-        new_input->AppendData(
-            data_base,
-            triton::common::GetElementCount(input.second.Shape()) * 4, mem_type,
-            mem_id);
+      if (input.second.DType() == inference::DataType::TYPE_STRING) {
+        int64_t str_byte_size = 0;
+        RETURN_IF_ERROR(GetByteSize(
+            inference::DataType::TYPE_STRING, input.second.Shape(), input.first,
+            &str_byte_size));
+        new_input->AppendData(data_base, str_byte_size, mem_type, mem_id);
       } else {
         new_input->AppendData(
             data_base, input.second.Data()->TotalByteSize(), mem_type, mem_id);
@@ -662,7 +670,8 @@ InferenceRequest::CopyAsNull(const InferenceRequest& from)
         std::make_pair(pr.second.Name(), std::addressof(pr.second)));
   }
 
-  return lrequest.release();
+  *to = std::move(lrequest);
+  return Status::Success;
 }
 
 Status
@@ -844,8 +853,8 @@ InferenceRequest::LoadInputStates()
   // Add the input states to the inference request.
   if (sequence_states_ != nullptr) {
     if (sequence_states_->IsNullRequest()) {
-      sequence_states_ =
-          SequenceStates::CopyAsNull(sequence_states_->NullSequenceStates());
+      RETURN_IF_ERROR(SequenceStates::CopyAsNull(
+          sequence_states_->NullSequenceStates(), &sequence_states_));
     }
     for (auto& input_state_pair : sequence_states_->InputStates()) {
       auto& input_state = input_state_pair.second;
@@ -1173,14 +1182,14 @@ InferenceRequest::Normalize()
     if (input_config->has_reshape()) {
       std::deque<int64_t> variable_size_values;
       for (int64_t idx = 0; idx < input_config->dims_size(); idx++) {
-        if (input_config->dims(idx) == -1) {
+        if (input_config->dims(idx) == triton::common::WILDCARD_DIM) {
           variable_size_values.push_back((*shape)[idx]);
         }
       }
 
       shape->clear();
       for (const auto& dim : input_config->reshape().shape()) {
-        if (dim == -1) {
+        if (dim == triton::common::WILDCARD_DIM) {
           shape->push_back(variable_size_values.front());
           variable_size_values.pop_front();
         } else {
@@ -1219,8 +1228,9 @@ InferenceRequest::Normalize()
           const std::vector<int64_t>& input_dims =
               input.IsShapeTensor() ? input.OriginalShape()
                                     : input.ShapeWithBatchDim();
-          int64_t expected_byte_size =
-              triton::common::GetByteSize(data_type, input_dims);
+          int64_t expected_byte_size = 0;
+          RETURN_IF_ERROR(GetByteSize(
+              data_type, input_dims, input_name, &expected_byte_size));
           const size_t& byte_size = input.Data()->TotalByteSize();
           if ((byte_size > LLONG_MAX) ||
               (static_cast<int64_t>(byte_size) != expected_byte_size)) {
@@ -1311,7 +1321,7 @@ InferenceRequest::ValidateBytesInputs(
 {
   const auto& input_dims = input.ShapeWithBatchDim();
 
-  int64_t element_count = triton::common::GetElementCount(input_dims);
+  int64_t element_count = 0;
   int64_t element_checked = 0;
   size_t remaining_element_size = 0;
 
@@ -1322,6 +1332,8 @@ InferenceRequest::ValidateBytesInputs(
   size_t remaining_buffer_size = 0;
   int64_t buffer_memory_id;
 
+  RETURN_IF_ERROR(GetElementCount(input_dims, input_name, &element_count));
+
   // Validate elements until all buffers have been fully processed.
   while (remaining_buffer_size || buffer_next_idx < buffer_count) {
     // Get the next buffer if not currently processing one.
diff --git a/src/infer_request.h b/src/infer_request.h
index 1c7e83d6d..02ab5a4f0 100644
--- a/src/infer_request.h
+++ b/src/infer_request.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -632,7 +632,8 @@ class InferenceRequest {
   // required for the direct sequence batcher. The returned copy will
   // contain only the minimum content required for a null request.
   // The statistics of the copy will not be collected.
-  static InferenceRequest* CopyAsNull(const InferenceRequest& from);
+  static Status CopyAsNull(
+      const InferenceRequest& from, std::unique_ptr<InferenceRequest>* to);
 
   uint64_t QueueStartNs() const { return queue_start_ns_; }
   uint64_t CaptureQueueStartNs()
diff --git a/src/model_config_utils.cc b/src/model_config_utils.cc
index 79f0c53aa..8920c589e 100644
--- a/src/model_config_utils.cc
+++ b/src/model_config_utils.cc
@@ -353,9 +353,12 @@ ValidateIOShape(
       }
     }
 
-    const int64_t dims_size = triton::common::GetElementCount(io.dims());
-    const int64_t reshape_size =
-        triton::common::GetElementCount(io.reshape().shape());
+    int64_t dims_size = 0;
+    int64_t reshape_size = 0;
+    RETURN_IF_ERROR(
+        GetElementCount(io.dims(), io.name() + " dims", &dims_size));
+    RETURN_IF_ERROR(GetElementCount(
+        io.reshape().shape(), io.name() + " reshape", &reshape_size));
 
     // dims and reshape must both have same element count
     // or both have variable-size dimension.
@@ -372,12 +375,12 @@ ValidateIOShape(
     // each pair of the trunks separated by variable-size dimension has
     // the same element count. For instance, from [2, 4, -1, 6] to [8, -1, 1, 6]
     // is valid reshape as 2 * 4 = 8 and 6 = 1 * 6.
-    if (dims_size == -1) {
+    if (dims_size == triton::common::WILDCARD_SIZE) {
       std::vector<int64_t> dim_element_cnts;
       std::vector<int64_t> reshape_element_cnts;
       int64_t current_cnt = 1;
       for (const auto& dim : io.dims()) {
-        if (dim != -1) {
+        if (dim != triton::common::WILDCARD_DIM) {
           current_cnt *= dim;
         } else {
           dim_element_cnts.push_back(current_cnt);
@@ -388,7 +391,7 @@ ValidateIOShape(
 
       current_cnt = 1;
       for (const auto& dim : io.reshape().shape()) {
-        if (dim != -1) {
+        if (dim != triton::common::WILDCARD_DIM) {
           current_cnt *= dim;
         } else {
           reshape_element_cnts.push_back(current_cnt);
diff --git a/src/model_config_utils.h b/src/model_config_utils.h
index 44bccabe7..61547077b 100644
--- a/src/model_config_utils.h
+++ b/src/model_config_utils.h
@@ -25,12 +25,13 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #pragma once
 
+#include <cstdint>
+
 #include "filesystem/api.h"
 #include "model_config.pb.h"
 #include "status.h"
 #include "triton/common/model_config.h"
 #include "tritonserver_apis.h"
-
 namespace triton { namespace core {
 
 /// Enumeration for the different backend types.
@@ -319,4 +320,77 @@ bool EquivalentInInstanceConfig(
 std::string InstanceConfigSignature(
     const inference::ModelInstanceGroup& instance_config);
 
+template <typename T>
+Status
+GetElementCount(const T& dims, const std::string& name, int64_t* cnt)
+{
+  if (cnt == nullptr) {
+    return Status(Status::Code::INTERNAL, "argument `cnt` cannot be nullptr");
+  }
+
+  int64_t element_count = 0;
+  element_count = triton::common::GetElementCount(dims);
+  if (element_count == triton::common::INVALID_SIZE) {
+    return Status(
+        Status::Code::INVALID_ARG,
+        "tensor '" + name + "' contains an invalid dimension in shape " +
+            triton::common::DimsListToString(dims));
+  } else if (element_count == triton::common::OVERFLOW_SIZE) {
+    return Status(
+        Status::Code::INVALID_ARG, "element count for tensor '" + name +
+                                       "' exceeds maximum size of " +
+                                       std::to_string(INT64_MAX));
+  }
+
+  *cnt = element_count;
+  return Status::Success;
+}
+
+template <typename T>
+Status
+GetByteSize(
+    const inference::DataType& dtype, const T& dims, const std::string& name,
+    int64_t* size)
+{
+  if (size == nullptr) {
+    return Status(Status::Code::INTERNAL, "argument `size` cannot be nullptr");
+  }
+
+  int64_t byte_size = 0;
+  if (dtype == inference::DataType::TYPE_STRING) {
+    int64_t element_count = 0;
+    RETURN_IF_ERROR(GetElementCount(dims, name, &element_count));
+
+    if (element_count == triton::common::WILDCARD_SIZE) {
+      *size = triton::common::WILDCARD_SIZE;
+      return Status::Success;
+    }
+
+    // Total number of bytes required is equal to the element count
+    // multiplied by 4.
+    if (element_count > static_cast<int64_t>(INT64_MAX / sizeof(int32_t))) {
+      return Status(
+          Status::Code::INVALID_ARG, "byte size for tensor '" + name +
+                                         "' exceeds maximum size of " +
+                                         std::to_string(INT64_MAX));
+    }
+    byte_size = sizeof(int32_t) * element_count;
+  } else {
+    byte_size = triton::common::GetByteSize(dtype, dims);
+    if (byte_size == triton::common::INVALID_SIZE) {
+      return Status(
+          Status::Code::INVALID_ARG,
+          "tensor '" + name + "' contains an invalid dimension " +
+              triton::common::DimsListToString(dims));
+    } else if (byte_size == triton::common::OVERFLOW_SIZE) {
+      return Status(
+          Status::Code::INVALID_ARG, "byte size for tensor '" + name +
+                                         "' exceeds maximum size of " +
+                                         std::to_string(INT64_MAX));
+    }
+  }
+  *size = byte_size;
+  return Status::Success;
+}
+
 }}  // namespace triton::core
diff --git a/src/sequence_batch_scheduler/sequence_batch_scheduler.cc b/src/sequence_batch_scheduler/sequence_batch_scheduler.cc
index 45e9c037c..f51877301 100644
--- a/src/sequence_batch_scheduler/sequence_batch_scheduler.cc
+++ b/src/sequence_batch_scheduler/sequence_batch_scheduler.cc
@@ -1,4 +1,4 @@
-// Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2018-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -384,13 +384,14 @@ SequenceBatchScheduler::GenerateInitialStateData(
   auto state_dim = state.dims().begin();
   for (; initial_state_dim != initial_state.dims().end();
        initial_state_dim++, state_dim++) {
-    if (*initial_state_dim == -1) {
+    if (*initial_state_dim == triton::common::WILDCARD_DIM) {
       return Status(
           Status::Code::INVALID_ARG,
           std::string("'initial_state' field for state input name '") +
               state.input_name() + "' contains variable dimensions.");
     } else {
-      if (*state_dim != -1 && *initial_state_dim != *state_dim) {
+      if (*state_dim != triton::common::WILDCARD_DIM &&
+          *initial_state_dim != *state_dim) {
         return Status(
             Status::Code::INVALID_ARG,
             std::string("'initial_state' dim for input name '") +
@@ -404,15 +405,11 @@ SequenceBatchScheduler::GenerateInitialStateData(
   }
 
   // Calculate total memory byte size
-  auto element_count = triton::common::GetElementCount(initial_state.dims());
-  size_t dtype_byte_size =
-      triton::common::GetDataTypeByteSize(initial_state.data_type());
-  size_t total_byte_size = element_count * dtype_byte_size;
-
-  // Custom handling for TYPE_BYTES
-  if (dtype_byte_size == 0) {
-    total_byte_size = sizeof(int32_t) * element_count;
-  }
+  int64_t total_byte_size_signed = 0;
+  RETURN_IF_ERROR(GetByteSize(
+      initial_state.data_type(), initial_state.dims(), state.input_name(),
+      &total_byte_size_signed));
+  size_t total_byte_size = static_cast<size_t>(total_byte_size_signed);
 
   switch (initial_state.state_data_case()) {
     case inference::ModelSequenceBatching_InitialState::StateDataCase::
@@ -1757,8 +1754,12 @@ DirectSequenceBatch::BatcherThread(const int nice)
           // Use null-request if necessary otherwise use the next
           // request in the queue...
           if (use_null_request) {
-            std::unique_ptr<InferenceRequest> ni(
-                InferenceRequest::CopyAsNull(*null_irequest));
+            std::unique_ptr<InferenceRequest> ni = nullptr;
+            Status status = InferenceRequest::CopyAsNull(*null_irequest, &ni);
+            if (!status.IsOk()) {
+              LOG_ERROR << "internal: unexpected failure copying null request: "
+                        << status.Message();
+            }
             // Note that when the not-ready control input of the
             // request is "true" the model can't assume that any
             // other inputs are meaningful, including CORRID. So we
diff --git a/src/sequence_batch_scheduler/sequence_utils.cc b/src/sequence_batch_scheduler/sequence_utils.cc
index c916ccca7..96abf62bf 100644
--- a/src/sequence_batch_scheduler/sequence_utils.cc
+++ b/src/sequence_batch_scheduler/sequence_utils.cc
@@ -1,4 +1,4 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -43,8 +43,8 @@ IterativeSequencer::RescheduleRequest(
   else if (!request->IsCancelled()) {
     // Use a null request to trigger sequence batcher cancellation so
     // additional request manipulation won't affect the actual request.
-    std::unique_ptr<InferenceRequest> ni(
-        InferenceRequest::CopyAsNull(*request));
+    std::unique_ptr<InferenceRequest> ni = nullptr;
+    RETURN_IF_ERROR(InferenceRequest::CopyAsNull(*request, &ni));
     ni->SetCorrelationId(request->CorrelationId());
     ni->SetFlags(TRITONSERVER_REQUEST_FLAG_SEQUENCE_END);
     ni->Cancel();
diff --git a/src/sequence_state.cc b/src/sequence_state.cc
index e1c4dc13d..66e03b9f8 100644
--- a/src/sequence_state.cc
+++ b/src/sequence_state.cc
@@ -1,4 +1,4 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -26,8 +26,11 @@
 
 #include "sequence_state.h"
 
+#include <cstdint>
+
 #include "cuda_utils.h"
 #include "memory.h"
+#include "model_config_utils.h"
 #include "triton/common/logging.h"
 
 namespace triton { namespace core {
@@ -159,7 +162,7 @@ SequenceStates::Initialize(
 
     // Convert the variable dimensions to 1 for the first request.
     for (auto& dim : state_config.dims()) {
-      if (dim == -1) {
+      if (dim == triton::common::WILDCARD_DIM) {
         dims.push_back(1);
       } else {
         dims.push_back(dim);
@@ -209,16 +212,10 @@ SequenceStates::Initialize(
             initial_state_it->second.data_->TotalByteSize());
       }
     } else {
-      size_t state_size;
-      if (state.second.data_type() == inference::DataType::TYPE_STRING) {
-        auto element_count = triton::common::GetElementCount(dims);
-        // Total number of bytes required is equal to the element count
-        // multiplied by 4.
-        state_size = 4 * element_count;
-      } else {
-        state_size =
-            triton::common::GetByteSize(state.second.data_type(), dims);
-      }
+      int64_t state_size = 0;
+      RETURN_IF_ERROR(GetByteSize(
+          state.second.data_type(), dims, state_config.input_name(),
+          &state_size));
       if (use_growable_memory) {
         std::unique_ptr<GrowableMemory> growable_memory;
         RETURN_IF_ERROR(GrowableMemory::Create(
@@ -374,9 +371,16 @@ SequenceStates::OutputState(
   return OutputState(name, datatype, shape.data(), shape.size(), output_state);
 }
 
-std::shared_ptr<SequenceStates>
-SequenceStates::CopyAsNull(const std::shared_ptr<SequenceStates>& from)
+Status
+SequenceStates::CopyAsNull(
+    const std::shared_ptr<SequenceStates>& from,
+    std::shared_ptr<SequenceStates>* to)
 {
+  if (to == nullptr) {
+    return Status(
+        Status::Code::INVALID_ARG, "SequenceStates 'to' must not be null");
+  }
+
   std::shared_ptr<SequenceStates> lsequence_states;
   if (from != nullptr) {
     lsequence_states.reset(new SequenceStates);
@@ -394,10 +398,10 @@ SequenceStates::CopyAsNull(const std::shared_ptr<SequenceStates>& from)
       std::shared_ptr<AllocatedMemory> data;
       if (from_input_state_tensor->DType() ==
           inference::DataType::TYPE_STRING) {
-        // Use all-zero input states for null requests.
-        auto element_count =
-            triton::common::GetElementCount(from_input_state_tensor->Shape());
-        auto state_size = 4 * element_count;
+        int64_t state_size = 0;
+        RETURN_IF_ERROR(GetByteSize(
+            inference::DataType::TYPE_STRING, from_input_state_tensor->Shape(),
+            from_input_state_tensor->Name(), &state_size));
         data = std::make_shared<AllocatedMemory>(
             state_size, TRITONSERVER_MEMORY_CPU, 0);
       } else {
@@ -424,6 +428,7 @@ SequenceStates::CopyAsNull(const std::shared_ptr<SequenceStates>& from)
               false /* use_growable_memory */)));
     }
   }
-  return lsequence_states;
+  *to = std::move(lsequence_states);
+  return Status::Success;
 }
 }}  // namespace triton::core
diff --git a/src/sequence_state.h b/src/sequence_state.h
index 7faba3429..c2e9fe909 100644
--- a/src/sequence_state.h
+++ b/src/sequence_state.h
@@ -1,4 +1,4 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -160,8 +160,10 @@ class SequenceStates {
       const std::vector<int64_t>& shape, SequenceState** output_state);
 
   // Create a copy of the 'from' sequence states for NULL requests.
-  static std::shared_ptr<SequenceStates> CopyAsNull(
-      const std::shared_ptr<SequenceStates>& from);
+  // On success, sets *to and returns Status::Success; on failure returns error.
+  static Status CopyAsNull(
+      const std::shared_ptr<SequenceStates>& from,
+      std::shared_ptr<SequenceStates>* to);
 
   const std::map<std::string, std::shared_ptr<SequenceState>>& InputStates()
   {