From 2737cb1fdaa6a4e3b75bcc8079a4fa2d26a93396 Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Fri, 24 Apr 2026 15:54:42 -0500 Subject: [PATCH 1/9] Fixes zombie process leak in execFile - Defers cmd.Wait() immediately after cmd.Start() to ensure process reaping - Combines Wait error with any stdin operation errors using multierror - Moves timeout timer setup before stdin operations for proper cleanup Previously early returns from Seek or Copy failures would skip cmd.Wait(), leaving zombie processes. --- exec.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/exec.go b/exec.go index 15ae991..cabde77 100644 --- a/exec.go +++ b/exec.go @@ -186,7 +186,7 @@ func getErrorHandlerEnv(f string, err error) []string { return env } -func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, env ...string) error { +func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, env ...string) (err error) { cmd := exec.Command(f) if h.Stdout != nil { @@ -213,6 +213,14 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, if err != nil { return err } + defer func() { + err = multierror.Append(err, cmd.Wait()).ErrorOrNil() + }() + + timer := time.AfterFunc(timeout, func() { + cmd.Process.Kill() + }) + defer timer.Stop() _, err = data.Seek(0, 0) if err != nil { @@ -225,14 +233,7 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, } stdin.Close() - timer := time.AfterFunc(timeout, func() { - cmd.Process.Kill() - }) - - err = cmd.Wait() - timer.Stop() - - return err + return nil } // todo: base this on OS From 367dba42252737c0dd1fc77e20c78efdc3c6116b Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Fri, 24 Apr 2026 16:01:30 -0500 Subject: [PATCH 2/9] Replaces timer-based timeout with context cancellation - Uses exec.CommandContext with context.WithTimeout for process lifecycle - Moves data.Seek before cmd.Start to avoid starting process on seek failure - Removes deferred stdin.Close in favor of explicit cleanup on error paths - Adds timeout exceeded detection with descriptive error message - Maintains multierror combination of stdin and wait errors Context-based cancellation is more idiomatic and ensures proper process cleanup on timeout. --- exec.go | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/exec.go b/exec.go index cabde77..1326e16 100644 --- a/exec.go +++ b/exec.go @@ -1,6 +1,7 @@ package hookah import ( + "context" "errors" "fmt" "io" @@ -187,7 +188,17 @@ func getErrorHandlerEnv(f string, err error) []string { } func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, env ...string) (err error) { - cmd := exec.Command(f) + ctx := context.Background() + + var cancel context.CancelFunc + if timeout > 0 { + ctx, cancel = context.WithTimeout(ctx, timeout) + } else { + cancel = func() {} + } + defer cancel() + + cmd := exec.CommandContext(ctx, f) if h.Stdout != nil { cmd.Stdout = h.Stdout @@ -207,31 +218,35 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, if err != nil { return err } - defer stdin.Close() - err = cmd.Start() - if err != nil { + if _, err := data.Seek(0, 0); err != nil { + _ = stdin.Close() + return err + } + + if err := cmd.Start(); err != nil { + _ = stdin.Close() return err } + defer func() { - err = multierror.Append(err, cmd.Wait()).ErrorOrNil() - }() + waitErr := cmd.Wait() - timer := time.AfterFunc(timeout, func() { - cmd.Process.Kill() - }) - defer timer.Stop() + if ctx.Err() == context.DeadlineExceeded { + waitErr = fmt.Errorf("hook timed out after %s: %w", timeout, waitErr) + } - _, err = data.Seek(0, 0) - if err != nil { + err = multierror.Append(err, waitErr).ErrorOrNil() + }() + + if _, err := io.Copy(stdin, data); err != nil { + _ = stdin.Close() return err } - _, err = io.Copy(stdin, data) - if err != nil { + if err := stdin.Close(); err != nil { return err } - stdin.Close() return nil } From 7c4a3063df5b87e5aaca5667b0056bf01613da5b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Apr 2026 21:14:17 +0000 Subject: [PATCH 3/9] Fix zombie process leak and add regression tests for execFile timeout/copy-error paths Agent-Logs-Url: https://github.com/donatj/hookah/sessions/9c696600-1506-4502-9912-17bc6b4bb88d Co-authored-by: donatj <133747+donatj@users.noreply.github.com> --- exec.go | 11 ++++++-- exec_test.go | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/exec.go b/exec.go index 1326e16..ad05881 100644 --- a/exec.go +++ b/exec.go @@ -232,11 +232,18 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, defer func() { waitErr := cmd.Wait() - if ctx.Err() == context.DeadlineExceeded { + if waitErr != nil && ctx.Err() == context.DeadlineExceeded { waitErr = fmt.Errorf("hook timed out after %s: %w", timeout, waitErr) } - err = multierror.Append(err, waitErr).ErrorOrNil() + switch { + case err == nil: + err = waitErr + case waitErr == nil: + // keep err unchanged; preserve original error type + default: + err = multierror.Append(err, waitErr).ErrorOrNil() + } }() if _, err := io.Copy(stdin, data); err != nil { diff --git a/exec_test.go b/exec_test.go index 9ba2416..6915cc0 100644 --- a/exec_test.go +++ b/exec_test.go @@ -2,12 +2,16 @@ package hookah import ( "bytes" + "errors" + "io" "log" + "os" "strings" "testing" "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestOnlyExecutableBinsFound(t *testing.T) { @@ -144,3 +148,70 @@ func TestEnvPopulatedCorrectly(t *testing.T) { } } + +// TestExecFileTimeout verifies that execFile respects the timeout and returns +// a timeout error without hanging for long-running scripts. +func TestExecFileTimeout(t *testing.T) { + f, err := os.CreateTemp("", "hookah-test-*.sh") + require.NoError(t, err) + defer os.Remove(f.Name()) + + _, _ = io.WriteString(f, "#!/bin/sh\nsleep 30\n") + require.NoError(t, f.Close()) + require.NoError(t, os.Chmod(f.Name(), 0700)) + + h := HookExec{ + Stdout: io.Discard, + Stderr: io.Discard, + } + data := strings.NewReader(`{}`) + + start := time.Now() + err = h.execFile(f.Name(), data, 200*time.Millisecond) + elapsed := time.Since(start) + + require.Error(t, err) + assert.Less(t, elapsed, 5*time.Second, "execFile should not hang after timeout") + assert.Contains(t, err.Error(), "timed out") +} + +// TestExecFileCopyError verifies that a Read error during stdin copy still allows +// the child process to be reaped without the call hanging (no zombie processes). +func TestExecFileCopyError(t *testing.T) { + f, err := os.CreateTemp("", "hookah-test-*.sh") + require.NoError(t, err) + defer os.Remove(f.Name()) + + _, _ = io.WriteString(f, "#!/bin/sh\ncat\n") + require.NoError(t, f.Close()) + require.NoError(t, os.Chmod(f.Name(), 0700)) + + h := HookExec{ + Stdout: io.Discard, + Stderr: io.Discard, + } + + readErr := errors.New("simulated read error") + data := &readErrSeeker{readErr: readErr} + + done := make(chan error, 1) + go func() { + done <- h.execFile(f.Name(), data, 5*time.Second) + }() + + select { + case err := <-done: + assert.ErrorContains(t, err, readErr.Error()) + case <-time.After(3 * time.Second): + t.Fatal("execFile hung waiting for process to be reaped") + } +} + +// readErrSeeker is a ReadSeeker whose Seek always succeeds but whose Read always +// returns the configured error, simulating an io.Copy failure mid-transfer. +type readErrSeeker struct { + readErr error +} + +func (r *readErrSeeker) Seek(_ int64, _ int) (int64, error) { return 0, nil } +func (r *readErrSeeker) Read(_ []byte) (int, error) { return 0, r.readErr } From 5301c02bda312de8ed8f887899a76ffab4c5a72d Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Fri, 24 Apr 2026 16:28:26 -0500 Subject: [PATCH 4/9] Removes redundant switch in error handling multierror.Append already handles nil cases --- exec.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/exec.go b/exec.go index ad05881..ddab9ef 100644 --- a/exec.go +++ b/exec.go @@ -236,14 +236,7 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, waitErr = fmt.Errorf("hook timed out after %s: %w", timeout, waitErr) } - switch { - case err == nil: - err = waitErr - case waitErr == nil: - // keep err unchanged; preserve original error type - default: - err = multierror.Append(err, waitErr).ErrorOrNil() - } + err = multierror.Append(err, waitErr).ErrorOrNil() }() if _, err := io.Copy(stdin, data); err != nil { From 2ac41c2af52f58fabfc7bfe5a48233b48bdc6c13 Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Fri, 24 Apr 2026 16:58:30 -0500 Subject: [PATCH 5/9] Fixes timeout handling to kill child processes - Sets Setpgid to create process group for spawned scripts - Monitors context deadline and explicitly kills process group on timeout - Uses negative PID with SIGKILL to terminate all children Shell scripts spawn child processes that survive parent termination. CommandContext kills only the parent shell, leaving children like sleep running. Process groups allow killing the entire tree. --- exec.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/exec.go b/exec.go index ddab9ef..81cdb4f 100644 --- a/exec.go +++ b/exec.go @@ -199,6 +199,7 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, defer cancel() cmd := exec.CommandContext(ctx, f) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} if h.Stdout != nil { cmd.Stdout = h.Stdout @@ -230,7 +231,21 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, } defer func() { - waitErr := cmd.Wait() + // Monitor context timeout and kill process group if needed + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + var waitErr error + select { + case waitErr = <-done: + // Process finished naturally + case <-ctx.Done(): + // Context timeout - kill the entire process group + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + waitErr = <-done + } if waitErr != nil && ctx.Err() == context.DeadlineExceeded { waitErr = fmt.Errorf("hook timed out after %s: %w", timeout, waitErr) From 295a636a90e9da3859c4368b5ad2af7870a5abc8 Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Fri, 24 Apr 2026 17:00:57 -0500 Subject: [PATCH 6/9] Simplifies timeout by overriding cmd.Cancel - Overrides cmd.Cancel to kill process group on context timeout - Removes manual select/goroutine timeout monitoring - Returns to simple cmd.Wait in defer CommandContext automatically calls cmd.Cancel on timeout. Overriding it to kill the process group is cleaner than manually monitoring ctx.Done. --- exec.go | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/exec.go b/exec.go index 81cdb4f..6542f76 100644 --- a/exec.go +++ b/exec.go @@ -200,6 +200,10 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, cmd := exec.CommandContext(ctx, f) cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Cancel = func() error { + // Kill the entire process group instead of just the parent + return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } if h.Stdout != nil { cmd.Stdout = h.Stdout @@ -231,21 +235,7 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, } defer func() { - // Monitor context timeout and kill process group if needed - done := make(chan error, 1) - go func() { - done <- cmd.Wait() - }() - - var waitErr error - select { - case waitErr = <-done: - // Process finished naturally - case <-ctx.Done(): - // Context timeout - kill the entire process group - _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) - waitErr = <-done - } + waitErr := cmd.Wait() if waitErr != nil && ctx.Err() == context.DeadlineExceeded { waitErr = fmt.Errorf("hook timed out after %s: %w", timeout, waitErr) From ef6c105c5d6b945415116ef70ff94113076e144f Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Fri, 24 Apr 2026 17:04:36 -0500 Subject: [PATCH 7/9] Guards cmd.Cancel and ignores stdin close error - Guards cmd.Cancel against nil process - Ignores stdin.Close error after successful copy Child processes may exit early without reading stdin, causing close to fail with a broken pipe error. --- exec.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/exec.go b/exec.go index 6542f76..8978dcd 100644 --- a/exec.go +++ b/exec.go @@ -202,6 +202,9 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} cmd.Cancel = func() error { // Kill the entire process group instead of just the parent + if cmd.Process == nil { + return os.ErrProcessDone + } return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) } @@ -249,9 +252,8 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, return err } - if err := stdin.Close(); err != nil { - return err - } + // Ignore close error - child may exit early without reading all stdin + _ = stdin.Close() return nil } From 7025ad177a3590abf424517e67b07511b908fde8 Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Fri, 24 Apr 2026 17:11:52 -0500 Subject: [PATCH 8/9] Adds documentation to execFile --- exec.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/exec.go b/exec.go index 8978dcd..52f22f4 100644 --- a/exec.go +++ b/exec.go @@ -187,6 +187,10 @@ func getErrorHandlerEnv(f string, err error) []string { return env } +// execFile executes the hook script at path f with data piped to stdin and the given environment variables. +// If timeout is greater than zero, the process and its children are killed via process group termination after +// the timeout expires. If timeout is zero, the process runs without a timeout. The function always waits for +// the process to exit, preventing zombie processes. func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, env ...string) (err error) { ctx := context.Background() From 87104616b6da0f17d15a5d4a1709e971f3faac53 Mon Sep 17 00:00:00 2001 From: Jesse Donat Date: Wed, 20 May 2026 16:33:57 -0500 Subject: [PATCH 9/9] Replaces multierror with errors.Join in execFile - Switches from multierror.Append().ErrorOrNil() to native errors.Join - Completes migration to stdlib error handling --- exec.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exec.go b/exec.go index 6fcfa12..1ca7739 100644 --- a/exec.go +++ b/exec.go @@ -246,7 +246,7 @@ func (h *HookExec) execFile(f string, data io.ReadSeeker, timeout time.Duration, waitErr = fmt.Errorf("hook timed out after %s: %w", timeout, waitErr) } - err = multierror.Append(err, waitErr).ErrorOrNil() + err = errors.Join(err, waitErr) }() if _, err := io.Copy(stdin, data); err != nil {