Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions cloud_pipelines_backend/component_library_api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,19 @@ def calculate_digest_for_component_text(text: str) -> str:
return digest


MAX_COMPONENT_SIZE = 300_000
# Baseline: MySQL TEXT column maximum (65,535 bytes).
MAX_COMPONENT_SIZE_BYTES = 65_535
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @morgan-wowk for actioning this issue!

Tangle is suppose to be DB agnostic, and this limit is for MySQL while (Postgres and SQLite) doesn't have this limit. I'm wondering if we should:

  1. Move this change to oasis-backend, since that would be proper for KateSQL (MySQL) only.
  2. Create a dialect aware helper class in Tangle (too complex IMHO, and I don't think we have a precendent for that, nor would it be wise to create a precendent now).
  3. Something else?

TEXT Column Size Limits by Database

Database TEXT Max Size Effectively Unlimited?
MySQL 65,535 bytes (~64 KB) No
PostgreSQL ~1 GB Yes
SQLite ~1 GB (default) Yes

MySQL

"A TEXT column with a maximum length of 65,535 (2^16 − 1) bytes. The effective maximum length is less if the value contains multibyte characters."

Source: MySQL 8.4 Reference Manual - String Data Type Syntax

PostgreSQL

"In addition, PostgreSQL provides the text type, which stores strings of any length."

"the longest possible character string that can be stored is about 1 GB"

Source: PostgreSQL Docs - Character Types

SQLite

"The maximum length of a TEXT or BLOB in bytes."
#define SQLITE_MAX_LENGTH 1000000000

Source (source header): sqliteLimit.h

"The maximum number of bytes in a string or BLOB in SQLite is defined by the preprocessor macro SQLITE_MAX_LENGTH. The default value of this macro is 1 billion (1,000,000,000)."

Source (docs): SQLite Implementation Limits



def load_component_spec_from_text_and_validate(
text: str,
) -> component_structures.ComponentSpec:
if len(text) > MAX_COMPONENT_SIZE:
raise ValueError(f"Component size {len(text)} > {MAX_COMPONENT_SIZE=}.")
text_bytes = len(text.encode("utf-8"))
if text_bytes > MAX_COMPONENT_SIZE_BYTES:
raise errors.ApiValidationError(
f"Component text is too large: {text_bytes} bytes"
f" (maximum allowed: {MAX_COMPONENT_SIZE_BYTES} bytes)."
)
component_dict = yaml.safe_load(text)
return load_component_spec_from_dict_and_validate(component_dict)

Expand Down
29 changes: 29 additions & 0 deletions tests/test_component_library_api_server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from sqlalchemy import orm
import pydantic
import yaml
import pytest

Expand Down Expand Up @@ -314,5 +315,33 @@ def test_component_library_service():
assert pins_11b == pins_11


def test_component_text_at_byte_limit_is_accepted():
"""Component text of exactly MAX_COMPONENT_SIZE_BYTES bytes does not raise ApiValidationError."""
text = "a" * components_api.MAX_COMPONENT_SIZE_BYTES
assert len(text.encode("utf-8")) == components_api.MAX_COMPONENT_SIZE_BYTES
# Only assert that the size check does not reject the input; downstream
# yaml/pydantic parsing errors are expected because "a" * N is not a
# valid component spec.
try:
components_api.load_component_spec_from_text_and_validate(text)
except errors.ApiValidationError:
pytest.fail(
"ApiValidationError should not be raised for text at the byte limit"
)
except (yaml.YAMLError, pydantic.ValidationError):
pass # Expected: synthetic text is not valid YAML / ComponentSpec
except Exception as exc:
pytest.fail(f"Unexpected exception raised: {type(exc).__name__}: {exc}")


def test_component_text_one_byte_over_limit_raises_api_validation_error():
"""Component text of MAX_COMPONENT_SIZE_BYTES + 1 bytes raises ApiValidationError."""
text = "a" * (components_api.MAX_COMPONENT_SIZE_BYTES + 1)
assert len(text.encode("utf-8")) == components_api.MAX_COMPONENT_SIZE_BYTES + 1
with pytest.raises(errors.ApiValidationError) as exc_info:
components_api.load_component_spec_from_text_and_validate(text)
assert str(components_api.MAX_COMPONENT_SIZE_BYTES) in str(exc_info.value)


if __name__ == "__main__":
pytest.main()
Loading