Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions hooks/hook_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,50 @@ def _record_sync_signal(event: str, data: dict) -> None:
pass


# Dedup markers are only meaningful for the 500 ms double-fire window, but each
# distinct (event, payload) writes its own file. Without pruning, the markers
# directory leaks one file per unique tool call indefinitely. Sweep stale markers
# on a coarse time-gate so the cost stays amortized-cheap.
_DEDUP_TTL_SEC = 5 # markers older than this are stale (>> 500 ms dedup window)
_DEDUP_SWEEP_INTERVAL_SEC = 60 # sweep at most once per minute
_DEDUP_SWEEP_STAMP = MARKERS_DIR / "dedup-sweep.stamp"


def _prune_stale_dedup_markers() -> None:
"""Remove leaked ``hook-dedup-*`` markers older than the dedup window.

Time-gated to run at most once per ``_DEDUP_SWEEP_INTERVAL_SEC`` so a busy
session does not pay an O(N) directory scan on every hook invocation.

Fail-open: any I/O error is swallowed.
"""
try:
now = time.time()
try:
if (
_DEDUP_SWEEP_STAMP.is_file()
and now - _DEDUP_SWEEP_STAMP.stat().st_mtime < _DEDUP_SWEEP_INTERVAL_SEC
):
return
except Exception:
pass
# Claim the sweep slot before scanning so concurrent hooks do not all sweep.
try:
MARKERS_DIR.mkdir(parents=True, exist_ok=True)
_DEDUP_SWEEP_STAMP.write_text(str(int(now)), encoding="utf-8")
except Exception:
pass
cutoff = now - _DEDUP_TTL_SEC
for p in MARKERS_DIR.glob("hook-dedup-*"):
try:
if p.is_file() and p.stat().st_mtime < cutoff:
p.unlink()
except Exception:
continue
except Exception:
pass


def _check_and_set_dedup(event: str, payload_hash: str = "") -> bool:
"""Return True if this event+payload should be skipped as a double-fire duplicate.

Expand All @@ -131,6 +175,7 @@ def _check_and_set_dedup(event: str, payload_hash: str = "") -> bool:
Fail-open: any I/O error → returns False (process normally).
"""
try:
_prune_stale_dedup_markers()
key = f"{event}-{payload_hash}" if payload_hash else event
# Sanitise key to a safe filename: keep only alphanumeric + dash + dot
safe_key = "".join(c if c.isalnum() or c in "-." else "_" for c in key)
Expand Down
13 changes: 12 additions & 1 deletion hooks/rules/error_kb.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,18 @@ class ErrorFixNudgeRule(Rule):

def evaluate(self, event, data):
tool_name = data.get("toolName", "")
tool_args = data.get("toolArgs", {}) or {}
# toolArgs may arrive as a dict, a JSON string, or be absent depending on
# the runtime. Normalise to a dict so the later .get() calls never crash
# with "'str' object has no attribute 'get'".
tool_args = data.get("toolArgs", {})
if isinstance(tool_args, str):
try:
parsed = json.loads(tool_args)
tool_args = parsed if isinstance(parsed, dict) else {}
except Exception:
tool_args = {}
elif not isinstance(tool_args, dict):
tool_args = {}
Comment on lines +135 to +146

# Clear marker when learn.py is detected in bash command
if tool_name == "bash":
Expand Down
13 changes: 10 additions & 3 deletions hooks/rules/tentacle.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,16 @@ def _read_edits(path):
return data
except (json.JSONDecodeError, ValueError):
pass
# Legacy: flat set of file paths → migrate with current timestamp
now = time.time()
legacy_entries = [{"p": fp, "t": now} for fp in raw_set if fp]
# Legacy: flat set of file paths → migrate with the marker's last-modified
# time (NOT the current time). Stamping with now() on every read would
# perpetually refresh the entries so they never satisfy the 24 h TTL,
# permanently poisoning the "legacy" bucket. Using the file mtime lets
# migrated entries expire ~24 h after the marker was last written.
Comment on lines +112 to +116
try:
base_ts = path.stat().st_mtime
except Exception:
base_ts = time.time()
legacy_entries = [{"p": fp, "t": base_ts} for fp in raw_set if fp]
return {"legacy": legacy_entries}


Expand Down
24 changes: 24 additions & 0 deletions tests/test_hook_rules_more.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,30 @@ def _capture_run(args, **kwargs):
test("ErrorKBRule name", rule.name == "error-kb")
test("ErrorKBRule events", "errorOccurred" in rule.events)

# 2j. ErrorFixNudgeRule must not crash when toolArgs is a string (regression).
# Previously `data.get("toolArgs", {}) or {}` kept a non-empty string and the
# subsequent `.get()` raised "'str' object has no attribute 'get'" (367 logged
# errors in the wild).
from rules.error_kb import ErrorFixNudgeRule

_nudge_rule = ErrorFixNudgeRule()
try:
_r2j = _nudge_rule.evaluate("postToolUse", {"toolName": "bash", "toolArgs": "not-a-dict"})
test("ErrorFixNudgeRule: string toolArgs does not crash", _r2j is None or isinstance(_r2j, dict))
except Exception as _e2j:
test("ErrorFixNudgeRule: string toolArgs does not crash", False, f"raised: {_e2j}")

# 2k. ErrorFixNudgeRule still parses a JSON-string toolArgs so the learn.py
# marker-clear path keeps working when the runtime passes args as a string.
try:
_r2k = _nudge_rule.evaluate(
"postToolUse",
{"toolName": "bash", "toolArgs": '{"command": "python3 learn.py --mistake x y"}'},
)
test("ErrorFixNudgeRule: JSON-string toolArgs parsed (learn.py clears nudge)", _r2k is None)
except Exception as _e2k:
test("ErrorFixNudgeRule: JSON-string toolArgs parsed (learn.py clears nudge)", False, f"raised: {_e2k}")


# ══════════════════════════════════════════════════════════════════════
# Section 3: SessionEndRule
Expand Down
36 changes: 36 additions & 0 deletions tests/test_hook_runner_entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,42 @@ def _run_dedup_event(event: str, payload: dict) -> subprocess.CompletedProcess:

shutil.rmtree(str(_dd_home), ignore_errors=True)

# 14b. Regression: stale hook-dedup-* markers are pruned so the directory does
# not leak one file per unique tool call (previously grew to 130k+ files).
_ddp_home = Path(tempfile.mkdtemp(prefix="test-ep-ddp-"))
_ddp_markers = _ddp_home / ".copilot" / "markers"
_ddp_markers.mkdir(parents=True, exist_ok=True)
_ddp_env = {**os.environ, "HOME": str(_ddp_home), "USERPROFILE": str(_ddp_home), "HOOK_DRY_RUN": "1"}

# Plant a stale dedup marker (mtime 10 s ago → older than the 5 s TTL).
_stale_marker = _ddp_markers / "hook-dedup-preToolUse-deadbeef"
_stale_marker.write_text("0", encoding="utf-8")
_stale_ts = time.time() - 10
os.utime(str(_stale_marker), (_stale_ts, _stale_ts))

# No sweep stamp yet → the first hook invocation must run the sweep.
_ddp_r = subprocess.run(
[sys.executable, str(RUNNER), "preToolUse"],
input=json.dumps({"toolName": "read", "toolArgs": {"path": "bar.py"}}),
capture_output=True,
text=True,
encoding="utf-8",
timeout=10,
env=_ddp_env,
cwd=str(REPO / "hooks"),
)
test(
"dedup-prune: stale hook-dedup marker removed by sweep",
not _stale_marker.exists(),
f"stale marker still present; markers={list(_ddp_markers.glob('hook-dedup-*'))}",
)
test(
"dedup-prune: fresh marker for current call survives sweep",
any(f.name.startswith("hook-dedup-preToolUse") for f in _ddp_markers.iterdir() if f.is_file()),
f"markers={list(_ddp_markers.glob('hook-dedup-*'))}",
)
Comment on lines +1073 to +1082
shutil.rmtree(str(_ddp_home), ignore_errors=True)

shutil.rmtree(_ISOLATED_HOME, ignore_errors=True)

# ══════════════════════════════════════════════════════════════════════
Expand Down
26 changes: 26 additions & 0 deletions tests/test_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3809,6 +3809,32 @@ def _make_event21(command, *, edited_files=None):
)
test("17b2: all 3 legacy paths present", len(_loaded17b.get("legacy", [])) == 3, f"Got: {_loaded17b!r}")

# 17b3. Regression: legacy entries are stamped with the marker's mtime, NOT
# now(), so an old marker's entries expire via _prune_ttl instead of being
# perpetually refreshed (which previously caused permanent false-positive
# TENTACLE REQUIRED blocks).
_td17b3 = Path(tempfile.mkdtemp(prefix="test-17b3-"))
_marker17b3 = _td17b3 / "tentacle-edits"
_marker17b3.write_text("placeholder", encoding="utf-8")
_old_ts17b3 = _now17 - (86400 * 2) # 2 days ago → outside the 24 h TTL
os.utime(str(_marker17b3), (_old_ts17b3, _old_ts17b3))
_orig_vlist17b3 = _rt17.verify_list_marker
_rt17.verify_list_marker = lambda p: {"src/old.py", "tests/old.py"}
_loaded17b3 = _read_edits(_marker17b3)
_rt17.verify_list_marker = _orig_vlist17b3
_legacy_ts17b3 = [e["t"] for e in _loaded17b3.get("legacy", [])]
test(
"17b3: legacy entries stamped with marker mtime (not now)",
bool(_legacy_ts17b3) and all(abs(t - _old_ts17b3) < 2 for t in _legacy_ts17b3),
f"Got timestamps: {_legacy_ts17b3!r} (expected ~{_old_ts17b3})",
)
test(
"17b3b: stale legacy entries expire via _prune_ttl",
_prune_ttl(_loaded17b3.get("legacy", []), _now17) == [],
f"Got: {_prune_ttl(_loaded17b3.get('legacy', []), _now17)!r}",
)
shutil.rmtree(str(_td17b3), ignore_errors=True)

# 17c. Cross-repo isolation: repo-A entries invisible from repo-B
_repo_a17c = "/fake/repo-a"
_repo_b17c = "/fake/repo-b"
Expand Down
Loading