Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ Hard rules:
3. **UI uses proxy model**
- Map indices proxy↔source before touching tree items.

4. **Base64 auto-inference has a persisted minimum-length guard**
- Automatic string→`BYTES`/`ZLIB`/`GZIP` inference only runs when the string length meets the current
`edit_limits/base64_min_length_chars` threshold (default `100`).
- Short valid base64 stays `STRING` unless the type is pinned explicitly or the threshold is lowered.

## 4) Isolation constraints (must hold)

- `editors/inline/*`, `editors/windowed/*` must not import `app/`, `documents/`, `tree/`.
Expand Down
3 changes: 3 additions & 0 deletions ai-memory/repo-map.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ Model".
primitive in `tree_actions/anchors.py`. This ensures consistency across different UI interactions.
- **Type-Centric**: Type inference (`tree/types.py`) and coercion (`tree/item_coercion.py`) are the source of truth for
how data is handled. Don't scatter type logic in the UI.
- **Base64 inference guard**: Automatic base64-family detection in `tree/types.py` uses valid syntax + strict decode,
but also a persisted minimum string length from `QSettings` (`edit_limits/base64_min_length_chars`, default `100`) to
reduce false positives on short human-readable strings.
- **Surgical Model Updates**: The `DiffApplier` (`undo/diff.py`) is used during Undo/Redo to emit minimal Qt signals.
This preserves UI state like selection and expansion that would be lost on a full model reset.
**Important**: `DiffApplier.apply()` bypasses `JsonTreeItem.set_data()` — special type handling (e.g., `RAW_FLOAT`
Expand Down
23 changes: 23 additions & 0 deletions app/app_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
from app.dialogs.secret_prefixes_dlg import SecretPrefixesDialog
from state.edit_limits import (
get_attach_file_warning_limit_bytes,
get_base64_inference_min_length_chars,
get_binary_edit_warning_limit_bytes,
get_multiline_edit_warning_limit_chars,
get_string_edit_warning_limit_chars,
set_attach_file_warning_limit_bytes,
set_base64_inference_min_length_chars,
set_binary_edit_warning_limit_bytes,
set_multiline_edit_warning_limit_chars,
set_string_edit_warning_limit_chars,
Expand Down Expand Up @@ -60,16 +62,19 @@ def _build_edit_limits_menu(self) -> None:
self.limit_multiline_action = QAction(win)
self.limit_binary_action = QAction(win)
self.limit_attach_action = QAction(win)
self.limit_base64_min_length_action = QAction(win)

self.limit_string_action.triggered.connect(self._set_string_warning_limit)
self.limit_multiline_action.triggered.connect(self._set_multiline_warning_limit)
self.limit_binary_action.triggered.connect(self._set_binary_warning_limit)
self.limit_attach_action.triggered.connect(self._set_attach_warning_limit)
self.limit_base64_min_length_action.triggered.connect(self._set_base64_inference_min_length)

self.limits_menu.addAction(self.limit_string_action)
self.limits_menu.addAction(self.limit_multiline_action)
self.limits_menu.addAction(self.limit_binary_action)
self.limits_menu.addAction(self.limit_attach_action)
self.limits_menu.addAction(self.limit_base64_min_length_action)
self.limits_menu.aboutToShow.connect(self.refresh_edit_limits_menu_entries)
self.refresh_edit_limits_menu_entries()

Expand All @@ -82,6 +87,7 @@ def refresh_edit_limits_menu_entries(self) -> None:
multiline_limit = get_multiline_edit_warning_limit_chars()
binary_limit = get_binary_edit_warning_limit_bytes()
attach_limit = get_attach_file_warning_limit_bytes()
base64_min_length = get_base64_inference_min_length_chars()

self.limit_string_action.setText(
win.tr("String edit limit... ({value} chars)").format(value=counts(string_limit))
Expand All @@ -95,6 +101,9 @@ def refresh_edit_limits_menu_entries(self) -> None:
self.limit_attach_action.setText(
win.tr("Attach file size limit... ({value})").format(value=format_bytes(attach_limit))
)
self.limit_base64_min_length_action.setText(
win.tr("Base64 inference minimum length... ({value} chars)").format(value=counts(base64_min_length))
)

def _prompt_limit_value(self, *, title: str, label: str, current: int) -> int | None:
value, ok = QInputDialog.getInt(self._win, title, label, current, 1, 2_147_483_647, 1)
Expand Down Expand Up @@ -157,3 +166,17 @@ def _set_attach_warning_limit(self) -> None:
set_attach_file_warning_limit_bytes(value)
self.refresh_edit_limits_menu_entries()
win.statusBar.showMessage(win.tr("Updated attach file warning limit"), 2000)

def _set_base64_inference_min_length(self) -> None:
win = self._win
current = get_base64_inference_min_length_chars()
value = self._prompt_limit_value(
title=win.tr("Base64 Inference Minimum Length"),
label=win.tr("Interpret strings as base64 starting at length (chars):"),
current=current,
)
if value is None:
return
set_base64_inference_min_length_chars(value)
self.refresh_edit_limits_menu_entries()
win.statusBar.showMessage(win.tr("Updated base64 inference minimum length"), 2000)
6 changes: 3 additions & 3 deletions documents/composition/demo_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def build_demo_data() -> dict[str, Any]:
"utf8-text": "Line 1\nLine 2\n\u03a9",
"password": "plainsecret",
"private_key": "-----BEGIN KEY-----\nabc\n-----END KEY-----",
"bytes": base64.b64encode(b"hello " * 10).decode(),
"zlib": base64.b64encode(zlib.compress(b"hello " * 10)).decode(),
"gzip": base64.b64encode(gzip.compress(b"hello " * 10)).decode(),
"bytes": base64.b64encode(b"hello " * 20).decode(),
"zlib": base64.b64encode(zlib.compress(bytes(range(256)))).decode(),
"gzip": base64.b64encode(gzip.compress(bytes(range(256)))).decode(),
"date": "2024-06-01",
"time": "12:34",
"datetime": "2024-06-01 12:34:56",
Expand Down
6 changes: 4 additions & 2 deletions settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
BINARY_EDIT_WARNING_LIMIT_BYTES = 100 * 1024
STRING_EDIT_WARNING_LIMIT_CHARS = 10_000
MULTILINE_EDIT_WARNING_LIMIT_CHARS = 100_000
BASE64_INFERENCE_MIN_LENGTH_CHARS = 100

# Secret field detection and masking defaults.
SECRET_WORD_PREFIXES: tuple[str, ...] = (
Expand Down Expand Up @@ -55,8 +56,9 @@
# Design decisions:
# - No INFERENCE_MAX_TOTAL_CHARS: individual gates (datetime, affix, color)
# effectively skip all unnecessary checks; a top-level fast path is redundant.
# - No INFERENCE_MAX_BASE64_PROBE_CHARS: base64 uses content-based syntax
# validation (len mod 4 + alphabet regex) instead of a length cap.
# - No INFERENCE_MAX_BASE64_PROBE_CHARS: base64 does not use a performance
# cap; it uses content-based syntax validation plus a separate minimum-length
# false-positive guard.
# - No EDITABLE_DECODE_LIMIT_BYTES: if base64 syntax is valid, decode is allowed.

# parse_datetime_text() regex and datetime conversion.
Expand Down
10 changes: 10 additions & 0 deletions state/edit_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from settings import (
APPLICATION_ID,
BASE64_INFERENCE_MIN_LENGTH_CHARS,
BINARY_ATTACH_WARNING_LIMIT_BYTES,
BINARY_EDIT_WARNING_LIMIT_BYTES,
MULTILINE_EDIT_WARNING_LIMIT_CHARS,
Expand All @@ -14,6 +15,7 @@
_MULTILINE_LIMIT_KEY = "edit_limits/multiline_chars"
_BINARY_EDIT_LIMIT_KEY = "edit_limits/binary_bytes"
_ATTACH_LIMIT_KEY = "edit_limits/attach_bytes"
_BASE64_MIN_LENGTH_KEY = "edit_limits/base64_min_length_chars"


def _settings() -> QSettings:
Expand Down Expand Up @@ -58,3 +60,11 @@ def get_attach_file_warning_limit_bytes() -> int:

def set_attach_file_warning_limit_bytes(limit: int) -> None:
_settings().setValue(_ATTACH_LIMIT_KEY, _coerce_positive_int(limit, default=BINARY_ATTACH_WARNING_LIMIT_BYTES))


def get_base64_inference_min_length_chars() -> int:
return _coerce_positive_int(_settings().value(_BASE64_MIN_LENGTH_KEY), default=BASE64_INFERENCE_MIN_LENGTH_CHARS)


def set_base64_inference_min_length_chars(limit: int) -> None:
_settings().setValue(_BASE64_MIN_LENGTH_KEY, _coerce_positive_int(limit, default=BASE64_INFERENCE_MIN_LENGTH_CHARS))
23 changes: 17 additions & 6 deletions tests/perf/test_decode_amplification.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

import pytest

import settings
from state.edit_limits import set_base64_inference_min_length_chars
from tests.perf.harness import classify_rows, measure_call, scaling_rows
from tests.perf.string_corpus import DEFAULT_SIZES, base64_like, plain_ascii

Expand All @@ -31,28 +33,37 @@
_collected_rows: list = []


@pytest.fixture(autouse=True)
def _lower_base64_inference_threshold_for_perf_valid_fixtures():
previous = settings.BASE64_INFERENCE_MIN_LENGTH_CHARS
set_base64_inference_min_length_chars(20)
try:
yield
finally:
set_base64_inference_min_length_chars(previous)


# ---------------------------------------------------------------------------
# Valid small fixtures for successful decode paths
# ---------------------------------------------------------------------------


def _make_valid_bytes_fixture() -> str:
"""Create a valid base64-encoded BYTES fixture (at least 20 chars for _B64_RE)."""
# Need at least 20 chars to match _B64_RE pattern
"""Create a valid base64-encoded BYTES fixture long enough to pass the default minimum-length guard."""
raw = b"Hello, World! This is a longer test message for base64 encoding."
return base64.b64encode(raw).decode("ascii")


def _make_valid_zlib_fixture() -> str:
"""Create a valid base64-encoded ZLIB fixture."""
raw = b"Hello, World! This is compressed data."
raw = b"Hello, World! This is compressed data. " * 8
compressed = zlib.compress(raw)
return base64.b64encode(compressed).decode("ascii")


def _make_valid_gzip_fixture() -> str:
"""Create a valid base64-encoded GZIP fixture."""
raw = b"Hello, World! This is gzip compressed data."
raw = b"Hello, World! This is gzip compressed data. " * 8
compressed = gzip.compress(raw)
return base64.b64encode(compressed).decode("ascii")

Expand Down Expand Up @@ -135,13 +146,13 @@ def test_valid_zlib_fixture_decodes(self):
"""Valid ZLIB fixture should decompress successfully."""
fixture = _make_valid_zlib_fixture()
result = decode_bytes(fixture, JsonType.ZLIB)
assert result == b"Hello, World! This is compressed data."
assert result == (b"Hello, World! This is compressed data. " * 8)

def test_valid_gzip_fixture_decodes(self):
"""Valid GZIP fixture should decompress successfully."""
fixture = _make_valid_gzip_fixture()
result = decode_bytes(fixture, JsonType.GZIP)
assert result == b"Hello, World! This is gzip compressed data."
assert result == (b"Hello, World! This is gzip compressed data. " * 8)

def test_parse_json_type_detects_bytes(self):
"""parse_json_type should detect valid base64 as BYTES."""
Expand Down
6 changes: 3 additions & 3 deletions tests/test_context_menu_base64_file_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _value_at(tab: JsonTab, path: tuple[int, ...]) -> str:


def test_attach_base64_from_file_replaces_value(qtbot, tmp_path, monkeypatch):
initial = base64.b64encode(b"seed payload for bytes").decode("ascii")
initial = base64.b64encode(b"seed payload for bytes " * 5).decode("ascii")
tab = _make_tab(qtbot, {"blob": initial})
_select_value_cell(tab, (0,))

Expand All @@ -47,7 +47,7 @@ def test_attach_base64_from_file_replaces_value(qtbot, tmp_path, monkeypatch):


def test_attach_base64_from_file_warns_and_can_cancel_large_file(qtbot, tmp_path, monkeypatch):
initial = base64.b64encode(b"seed payload for bytes").decode("ascii")
initial = base64.b64encode(b"seed payload for bytes " * 5).decode("ascii")
tab = _make_tab(qtbot, {"blob": initial})
_select_value_cell(tab, (0,))

Expand All @@ -74,7 +74,7 @@ def _warn(*_args, **_kwargs):


def test_save_base64_as_file_writes_decoded_payload(qtbot, tmp_path, monkeypatch):
payload = b"content to save"
payload = b"content to save " * 5
encoded = base64.b64encode(payload).decode("ascii")
tab = _make_tab(qtbot, {"blob": encoded})
_select_value_cell(tab, (0,))
Expand Down
9 changes: 8 additions & 1 deletion tests/test_edit_limits_menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
from settings import APPLICATION_ID
from state.edit_limits import (
get_attach_file_warning_limit_bytes,
get_base64_inference_min_length_chars,
get_binary_edit_warning_limit_bytes,
get_multiline_edit_warning_limit_chars,
get_string_edit_warning_limit_chars,
set_attach_file_warning_limit_bytes,
set_base64_inference_min_length_chars,
set_binary_edit_warning_limit_bytes,
set_multiline_edit_warning_limit_chars,
set_string_edit_warning_limit_chars,
Expand All @@ -26,7 +28,7 @@ def test_file_menu_limit_actions_persist_updates(qtbot, monkeypatch):
win = MainWindow(yaml_filename="")
qtbot.addWidget(win)

picks = iter([111, 222, 333, 444])
picks = iter([111, 222, 333, 444, 555])

def _pick(*_args, **_kwargs):
return next(picks), True
Expand All @@ -37,11 +39,13 @@ def _pick(*_args, **_kwargs):
win._app_settings.limit_multiline_action.trigger()
win._app_settings.limit_binary_action.trigger()
win._app_settings.limit_attach_action.trigger()
win._app_settings.limit_base64_min_length_action.trigger()

assert get_string_edit_warning_limit_chars() == 111
assert get_multiline_edit_warning_limit_chars() == 222
assert get_binary_edit_warning_limit_bytes() == 333
assert get_attach_file_warning_limit_bytes() == 444
assert get_base64_inference_min_length_chars() == 555


def test_file_menu_limit_actions_restore_after_restart(qtbot):
Expand All @@ -50,6 +54,7 @@ def test_file_menu_limit_actions_restore_after_restart(qtbot):
set_multiline_edit_warning_limit_chars(2002)
set_binary_edit_warning_limit_bytes(3003)
set_attach_file_warning_limit_bytes(4004)
set_base64_inference_min_length_chars(5005)

first = MainWindow(yaml_filename="")
qtbot.addWidget(first)
Expand All @@ -58,6 +63,7 @@ def test_file_menu_limit_actions_restore_after_restart(qtbot):
assert "2.00K" in first._app_settings.limit_multiline_action.text()
assert "KiB" in first._app_settings.limit_binary_action.text()
assert "KiB" in first._app_settings.limit_attach_action.text()
assert "5.00K" in first._app_settings.limit_base64_min_length_action.text()
first.close()
first.deleteLater()

Expand All @@ -68,3 +74,4 @@ def test_file_menu_limit_actions_restore_after_restart(qtbot):
assert "2.00K" in second._app_settings.limit_multiline_action.text()
assert "KiB" in second._app_settings.limit_binary_action.text()
assert "KiB" in second._app_settings.limit_attach_action.text()
assert "5.00K" in second._app_settings.limit_base64_min_length_action.text()
24 changes: 24 additions & 0 deletions tests/test_inference_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
import base64

import settings
from state.edit_limits import set_base64_inference_min_length_chars
from tree.inference_limits import (
affix_inference_allowed,
base64_syntax_valid,
color_inference_allowed,
datetime_inference_allowed,
format_preview_decode_allowed,
)
from tree.types import JsonType, _looks_like_base64, parse_json_type


class TestDatetimeInferenceAllowed:
Expand Down Expand Up @@ -129,6 +131,28 @@ def test_too_much_padding(self):
assert base64_syntax_valid("Y===") is False


class TestBase64InferenceMinimumLength:
def teardown_method(self):
set_base64_inference_min_length_chars(settings.BASE64_INFERENCE_MIN_LENGTH_CHARS)

def test_short_valid_base64_is_not_inferred_by_default(self):
assert len("bXkgbG92ZWx5IGJ5dGVzIQ==") < settings.BASE64_INFERENCE_MIN_LENGTH_CHARS
assert _looks_like_base64("bXkgbG92ZWx5IGJ5dGVzIQ==") is False
assert parse_json_type("bXkgbG92ZWx5IGJ5dGVzIQ==") is JsonType.STRING

def test_valid_base64_at_default_minimum_length_is_inferred(self):
raw = b"x" * 75
encoded = base64.b64encode(raw).decode("ascii")
assert len(encoded) == settings.BASE64_INFERENCE_MIN_LENGTH_CHARS
assert _looks_like_base64(encoded) is True
assert parse_json_type(encoded) is JsonType.BYTES

def test_lowered_minimum_allows_shorter_valid_base64(self):
set_base64_inference_min_length_chars(20)
assert _looks_like_base64("bXkgbG92ZWx5IGJ5dGVzIQ==") is True
assert parse_json_type("bXkgbG92ZWx5IGJ5dGVzIQ==") is JsonType.BYTES


class TestFormatPreviewDecodeAllowed:
"""Boundary tests for format_preview_decode_allowed."""

Expand Down
Loading
Loading