From 46a59415016ddb3c1da64e7f6ae90c551bce679b Mon Sep 17 00:00:00 2001 From: Micah Alpern Date: Wed, 10 Jun 2026 22:53:03 -0700 Subject: [PATCH] VHID daemon: ProcessType=Interactive to prevent load-induced disconnects (MAL-57 Layer 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Under heavy CPU load the Karabiner VHID daemon can be starved long enough to miss the pqrs client's 3s heartbeat. Kanata then drops its output connection mid-keystroke, losing key Release events and leaving the virtual keyboard's key stuck down β€” macOS autorepeats it for ~3 seconds (multi-letter bursts like 'togggggg'). The shipped SMAppService kanata plist already sets ProcessType=Interactive; the KeyPath-installed VHID daemon plist set no ProcessType at all, so the process that must answer heartbeats and write key reports was the one launchd could deprioritize. Changes: - Add ProcessType=Interactive to the VHID daemon plist in both generator copies (PlistGenerator + privileged helper), the legacy kanata plist generators (for consistency with the shipped SMAppService plist), and the debug script template. - isVHIDDaemonConfiguredCorrectly() now also requires the key, so a pre-fix plist reports misconfigured and repair rewrites it. - Tests: plist generation asserts the key; health check covers the missing-ProcessType case. - Document the 2026-06-10 live incident evidence in the MAL-57 bug doc: full causal chain across kanata, the VHID daemon, and DriverKit, plus the remaining fix layers for future work. Co-Authored-By: Claude Fable 5 --- Sources/KeyPathHelper/HelperService.swift | 4 + .../Core/PlistGenerator.swift | 11 +++ .../Core/ServiceHealthChecker.swift | 14 ++- .../Services/PlistGeneratorTests.swift | 15 +++ .../Services/ServiceHealthCheckerTests.swift | 16 ++- dev-tools/debug/debug-plist-validation.sh | 2 + docs/bugs/MAL-57-duplicate-keypresses.md | 98 +++++++++++++++++++ 7 files changed, 154 insertions(+), 6 deletions(-) diff --git a/Sources/KeyPathHelper/HelperService.swift b/Sources/KeyPathHelper/HelperService.swift index c1c588cf0..15d179d8a 100644 --- a/Sources/KeyPathHelper/HelperService.swift +++ b/Sources/KeyPathHelper/HelperService.swift @@ -879,6 +879,8 @@ extension HelperService { KeepAlive + ProcessType + Interactive StandardOutPath /var/log/com.keypath.kanata.stdout.log StandardErrorPath @@ -914,6 +916,8 @@ extension HelperService { KeepAlive + ProcessType + Interactive StandardOutPath /var/log/karabiner-vhid-daemon.log StandardErrorPath diff --git a/Sources/KeyPathInstallationWizard/Core/PlistGenerator.swift b/Sources/KeyPathInstallationWizard/Core/PlistGenerator.swift index c92a37b4c..46e0dff9b 100644 --- a/Sources/KeyPathInstallationWizard/Core/PlistGenerator.swift +++ b/Sources/KeyPathInstallationWizard/Core/PlistGenerator.swift @@ -105,6 +105,8 @@ public enum PlistGenerator { KeepAlive + ProcessType + Interactive StandardOutPath /var/log/com.keypath.kanata.stdout.log StandardErrorPath @@ -138,9 +140,16 @@ public enum PlistGenerator { /// - Automatic restart (KeepAlive) /// - Logging to /var/log/karabiner-vhid-daemon.log /// - Throttle protection to prevent rapid restart loops + /// - Interactive process type so the daemon is not starved under CPU load /// /// This daemon is required for Kanata to access the virtual keyboard device. /// + /// ProcessType=Interactive matches the Kanata daemon plist. Without it, + /// heavy system load (e.g. parallel builds) can starve the daemon long + /// enough to miss the pqrs client's 3s heartbeat, dropping Kanata's output + /// connection mid-keystroke and leaving a key stuck down (autorepeat + /// bursts). See docs/bugs/MAL-57-duplicate-keypresses.md. + /// /// - Returns: Complete plist XML string ready to write to disk public static func generateVHIDDaemonPlist() -> String { """ @@ -158,6 +167,8 @@ public enum PlistGenerator { KeepAlive + ProcessType + Interactive StandardOutPath /var/log/karabiner-vhid-daemon.log StandardErrorPath diff --git a/Sources/KeyPathInstallationWizard/Core/ServiceHealthChecker.swift b/Sources/KeyPathInstallationWizard/Core/ServiceHealthChecker.swift index 035aa0e72..984c843a5 100644 --- a/Sources/KeyPathInstallationWizard/Core/ServiceHealthChecker.swift +++ b/Sources/KeyPathInstallationWizard/Core/ServiceHealthChecker.swift @@ -873,7 +873,12 @@ public final class ServiceHealthChecker: @unchecked Sendable { return Foundation.FileManager().fileExists(atPath: plistPath) } - /// Verifies that the installed VHID LaunchDaemon plist points to the DriverKit daemon path. + /// Verifies that the installed VHID LaunchDaemon plist points to the DriverKit daemon path + /// and carries the required ProcessType=Interactive key. + /// + /// ProcessType=Interactive keeps the daemon from being starved under CPU + /// load (MAL-57 stuck-key autorepeat). A plist without it predates the fix + /// and should be treated as misconfigured so repair rewrites it. /// /// - Returns: `true` if the plist is correctly configured public func isVHIDDaemonConfiguredCorrectly() -> Bool { @@ -889,11 +894,12 @@ public final class ServiceHealthChecker: @unchecked Sendable { "/Library/Application Support/org.pqrs/Karabiner-DriverKit-VirtualHIDDevice/Applications/Karabiner-VirtualHIDDevice-Daemon.app/Contents/MacOS/Karabiner-VirtualHIDDevice-Daemon" if let args = dict["ProgramArguments"] as? [String], let first = args.first { - let ok = first == expectedPath + let pathOK = first == expectedPath + let processTypeOK = (dict["ProcessType"] as? String) == "Interactive" AppLogger.shared.log( - "πŸ” [ServiceHealthChecker] VHID plist ProgramArguments[0]=\(first) | expected=\(expectedPath) | ok=\(ok)" + "πŸ” [ServiceHealthChecker] VHID plist ProgramArguments[0]=\(first) | pathOK=\(pathOK) | processTypeOK=\(processTypeOK)" ) - return ok + return pathOK && processTypeOK } AppLogger.shared.log( "πŸ” [ServiceHealthChecker] VHID plist ProgramArguments missing or malformed" diff --git a/Tests/KeyPathTests/Services/PlistGeneratorTests.swift b/Tests/KeyPathTests/Services/PlistGeneratorTests.swift index 8695e715a..48a382386 100644 --- a/Tests/KeyPathTests/Services/PlistGeneratorTests.swift +++ b/Tests/KeyPathTests/Services/PlistGeneratorTests.swift @@ -212,6 +212,21 @@ final class PlistGeneratorTests: XCTestCase { XCTAssertTrue(plist.contains(PlistGenerator.vhidDaemonPath)) } + /// ProcessType=Interactive prevents the VHID daemon from being starved + /// under CPU load, which drops Kanata's output connection mid-keystroke + /// and causes stuck-key autorepeat bursts (MAL-57). + func testGenerateVHIDDaemonPlistUsesInteractiveProcessType() { + let plist = PlistGenerator.generateVHIDDaemonPlist() + + guard let data = plist.data(using: .utf8), + let dict = try? PropertyListSerialization.propertyList(from: data, options: [], format: nil) as? [String: Any] + else { + XCTFail("Generated VHID daemon plist is not valid XML/plist format") + return + } + XCTAssertEqual(dict["ProcessType"] as? String, "Interactive") + } + func testGenerateVHIDDaemonPlistValidXML() { let plist = PlistGenerator.generateVHIDDaemonPlist() diff --git a/Tests/KeyPathTests/Services/ServiceHealthCheckerTests.swift b/Tests/KeyPathTests/Services/ServiceHealthCheckerTests.swift index 10b848eb5..abe4b6b46 100644 --- a/Tests/KeyPathTests/Services/ServiceHealthCheckerTests.swift +++ b/Tests/KeyPathTests/Services/ServiceHealthCheckerTests.swift @@ -77,10 +77,13 @@ final class ServiceHealthCheckerTests: XCTestCase { FileManager.default.createFile(atPath: url.path, contents: Data(), attributes: nil) } - private func writeVHIDPlist(programPath: String) throws { - let dict: [String: Any] = [ + private func writeVHIDPlist(programPath: String, processType: String? = "Interactive") throws { + var dict: [String: Any] = [ "ProgramArguments": [programPath] ] + if let processType { + dict["ProcessType"] = processType + } let url = tempLaunchDaemonsDir.appendingPathComponent("\(ServiceHealthChecker.vhidDaemonServiceID).plist") let data = try PropertyListSerialization.data(fromPropertyList: dict, format: .xml, options: 0) try data.write(to: url) @@ -269,4 +272,13 @@ final class ServiceHealthCheckerTests: XCTestCase { try writeVHIDPlist(programPath: "/wrong/path") XCTAssertFalse(checker.isVHIDDaemonConfiguredCorrectly()) } + + /// Plists from before the MAL-57 starvation fix lack ProcessType=Interactive + /// and must report misconfigured so repair rewrites them. + func testIsVHIDDaemonConfiguredCorrectlyReturnsFalseWithoutProcessType() throws { + let expectedPath = + "/Library/Application Support/org.pqrs/Karabiner-DriverKit-VirtualHIDDevice/Applications/Karabiner-VirtualHIDDevice-Daemon.app/Contents/MacOS/Karabiner-VirtualHIDDevice-Daemon" + try writeVHIDPlist(programPath: expectedPath, processType: nil) + XCTAssertFalse(checker.isVHIDDaemonConfiguredCorrectly()) + } } diff --git a/dev-tools/debug/debug-plist-validation.sh b/dev-tools/debug/debug-plist-validation.sh index c3bc97933..efeebb49c 100755 --- a/dev-tools/debug/debug-plist-validation.sh +++ b/dev-tools/debug/debug-plist-validation.sh @@ -61,6 +61,8 @@ cat > "$VHID_DAEMON_TEMP" << 'EOF' KeepAlive + ProcessType + Interactive StandardOutPath /var/log/karabiner-vhid-daemon.log StandardErrorPath diff --git a/docs/bugs/MAL-57-duplicate-keypresses.md b/docs/bugs/MAL-57-duplicate-keypresses.md index af369bff5..981bf703c 100644 --- a/docs/bugs/MAL-57-duplicate-keypresses.md +++ b/docs/bugs/MAL-57-duplicate-keypresses.md @@ -10,6 +10,104 @@ as previous conclusions, not final truth. Latest investigation summary: - [`docs/analysis/2026-03-07-duplicate-key-under-load-investigation.md`](/Users/malpern/local-code/KeyPath/.worktrees/duplicate-key-investigation/docs/analysis/2026-03-07-duplicate-key-under-load-investigation.md) +- 2026-06-10 live incident evidence below (strongest capture so far; mechanism confirmed end to end) + +## 2026-06-10 Incident Evidence (mechanism confirmed) + +Two live incidents were captured with full logs on all three layers (kanata, the +Karabiner VHID daemon, and KeyPath). Both produced multi-second autorepeat bursts +of a single letter while typing ("togggggg…", "hteeeeee…"). Machine context: several +agent worktrees running parallel Swift builds and `quick-deploy.sh` at the time of +both incidents. The KeyPath GUI (and therefore the stuck-key detector/recovery) +was **not** running. + +### Timeline (from `/var/log/com.keypath.kanata.stdout.log` and `/var/log/karabiner/virtual_hid_device_service.log`) + +Incident 1 β€” 21:42:17 (the "g" burst): + +``` +driver connected: false / driver connected: true (repeated flapping for minutes prior) +21:42:17.3224 [WARN] output backend unavailable during write β€” releasing input devices +21:42:17.3265 [WARN] output backend suspended: recovery-entry +21:42:18.4464 [INFO] output backend and console session ready β€” re-grabbing input devices +21:42:18.5345 [KeyInput] sent key=backspace ... <- user deleting the repeated g's +21:42:18.7421 managed repeat BSpace (x8) +``` + +Incident 2 β€” 22:00:08-09 (the "e" burst). Kanata logged the user typing +"check hte logs … and", then: + +``` +22:00:08.5099 [KeyInput] sent key=e action=Press +22:00:08.6263 [KeyInput] sent key=e action=Release <- written into a dying socket; never took effect +22:00:09.6736 [KeyInput] sent key=d action=Press +connected <- pqrs client reconnected +22:00:09.7820 [WARN] output backend unavailable during write β€” releasing input devices +22:00:09.7821 [WARN] dropping KEY_D Release: output backend unavailable (will recover) +driver connected: false +driver connected: true +22:00:14.2039 [INFO] output backend and console session ready β€” re-grabbing input devices +``` + +Daemon side (`virtual_hid_device_service.log`): new client socket connected at +22:00:10.150; the old client (and with it the virtual keyboard state holding the +stuck key) was not torn down until 22:00:11.378-11.744. From the lost `e` release +at 22:00:08.6 that is ~3 seconds of OS-level autorepeat. + +Both the dext (pid 736) and the Karabiner-VirtualHIDDevice-Daemon (pid 814) ran +continuously through both incidents β€” no crashes. The failure is purely the +socket between kanata and the daemon. + +### Confirmed causal chain + +1. Kanata talks to the VHID daemon via the pqrs client in the `karabiner-driverkit` + crate. The client pings the daemon every 3s and declares the connection dead on a + missed heartbeat deadline (`virtual_hid_device_service/client.hpp`: + `set_server_check_interval(3000ms)`, `next_heartbeat_deadline_exceeded`). +2. Under CPU load the heartbeat misses and the connection drops. Key releases + in flight are lost: either written into the dying socket (the `e`) or + explicitly dropped by `drop_if_sink_disconnected` in + `External/kanata/src/oskbd/macos.rs` (the `d` β€” `dropping KEY_D Release`). +3. The virtual HID keyboard keeps the key logically down until the daemon tears + down the old client (~1.2s after the new client connects). macOS autorepeats + the stuck key for the whole window. +4. The fork's recovery (`release_tracked_output_keys` in + `External/kanata/src/kanata/macos.rs`) runs at re-grab β€” *after* the damage + window has already closed on its own, so it cannot shorten the burst. + +### Why the daemon starves: launchd priority asymmetry + +The kanata daemon plist sets `ProcessType=Interactive`; the KeyPath-installed +`com.keypath.karabiner-vhiddaemon.plist` set **no** `ProcessType`, so under load +macOS may deprioritize exactly the process that must answer heartbeats and +process key reports. + +### Fix layers + +- **Layer 1 (done, this repo):** `ProcessType=Interactive` added to the VHID + daemon plist in `PlistGenerator.generateVHIDDaemonPlist()`, the helper's + copy in `HelperService.swift`, the legacy kanata generators (the shipped + SMAppService plist `Sources/KeyPathApp/com.keypath.kanata.plist` already had + it), and the debug script template. `isVHIDDaemonConfiguredCorrectly()` now + also requires the key, so a pre-fix plist reports misconfigured and repair + rewrites it (`installOrRepairVHIDServices` / `repairVHIDDaemonServices` + rewrite unconditionally). Note: that check currently feeds only the repair + postflight β€” wiring plist-content validation into `getServiceStatus()` so the + wizard proactively flags stale plists on old installs is a follow-up. + Expected effect: fewer heartbeat-miss disconnects, so far fewer incidents. +- **Layer 2 (open, kanata fork):** shrink the autorepeat window. On reconnect the + client's `connected` callback only calls `virtual_hid_keyboard_initialize`; + also issuing `virtual_hid_keyboard_reset` would clear stuck keys immediately + (~10.2s in the timeline above instead of 11.7s). Force-closing the old client + object as soon as `sink_ready` flips false would help further. +- **Layer 3 (open, kanata fork):** never silently drop a Release. + `drop_if_sink_disconnected` treats all events equally; dropped Releases are the + toxic case and should be remembered and replayed after reconnect + (`output_pressed_since` tracking already exists). + +Residual gap no fix can fully close: a release written into a socket that is +dying but not yet declared dead is unrecoverable in transit; Layer 2 bounds the +damage to the reconnect latency. ## Problem Statement