From 847140e11594270167ff4057d37aa1c91c52a449 Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 26 Dec 2024 23:09:46 -0500 Subject: [PATCH 01/11] kill --- pexec/managed_process.go | 30 ++++++++++++++++++++++++++++++ pexec/managed_process_test.go | 2 ++ pexec/managed_process_unix.go | 9 +++++++++ pexec/managed_process_windows.go | 8 +++++++- 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/pexec/managed_process.go b/pexec/managed_process.go index 14752998..25201028 100644 --- a/pexec/managed_process.go +++ b/pexec/managed_process.go @@ -32,6 +32,10 @@ type ManagedProcess interface { // there's any system level issue stopping the process. Stop() error + // Kill will attempt to kill the process group and not wait for completion. Only use this if + // comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired). + Kill() + // Status return nil when the process is both alive and owned. // If err is non-nil, process may be a) alive but not owned or b) dead. Status() error @@ -432,3 +436,29 @@ func (p *managedProcess) Stop() error { } return errors.Errorf("non-successful exit code: %d", p.cmd.ProcessState.ExitCode()) } + +func (p *managedProcess) Kill() { + // Minimally hold a lock here so that we can signal the + // management goroutine to stop. We will attempt to kill the + // process even if p.stopped is true. + p.mu.Lock() + if !p.stopped { + close(p.killCh) + p.stopped = true + } + + if p.cmd == nil { + p.mu.Unlock() + return + } + p.mu.Unlock() + + // Since p.cmd is mutex guarded and we just signaled the manage + // goroutine to stop, no new Start can happen and therefore + // p.cmd can no longer be modified rendering it safe to read + // without a lock held. + // We are intentionally not checking the error here, we are already + // in a bad state. + //nolint:errcheck,gosec + p.forceKill() +} diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 75e3e44f..b009b7ac 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -702,3 +702,5 @@ func (fp *fakeProcess) UnixPid() (int, error) { in reality tests should just depend on the methods they rely on. UnixPid is not one of those methods (for better or worse)`) } + +func (fp *fakeProcess) Kill() {} diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go index 498be9e8..f696fe16 100644 --- a/pexec/managed_process_unix.go +++ b/pexec/managed_process_unix.go @@ -4,6 +4,7 @@ package pexec import ( "os" + "os/exec" "os/user" "strconv" "syscall" @@ -126,6 +127,14 @@ func (p *managedProcess) kill() (bool, error) { return forceKilled, nil } +// forceKill kills everything in the process group. This will not wait for completion and may result in a zombie process. +func (p *managedProcess) forceKill() error { + pidStr := strconv.Itoa(-p.cmd.Process.Pid) + p.logger.Infof("killing entire process group %d", p.cmd.Process.Pid) + //nolint:gosec + return exec.Command("kill", "-9", pidStr).Start() +} + func isWaitErrUnknown(err string, forceKilled bool) bool { // This can easily happen if the process does not handle interrupts gracefully // and it won't provide us any exit code info. diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go index ad1034b2..088ef377 100644 --- a/pexec/managed_process_windows.go +++ b/pexec/managed_process_windows.go @@ -25,7 +25,6 @@ func parseSignal(sigStr, name string) (syscall.Signal, error) { return 0, errors.New("signals not supported on Windows") } - func (p *managedProcess) sysProcAttr() (*syscall.SysProcAttr, error) { ret := &syscall.SysProcAttr{ CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP, @@ -107,6 +106,13 @@ func (p *managedProcess) kill() (bool, error) { return forceKilled, nil } +// forceKill kills everything in the process tree. This will not wait for completion and may result in a zombie process. +func (p *managedProcess) forceKill() error { + pidStr := strconv.Itoa(p.cmd.Process.Pid) + p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid) + return exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Start() +} + func isWaitErrUnknown(err string, forceKilled bool) bool { if !forceKilled { return false From af8640d4484d21a3ed8e78776452aa022d706ea8 Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 00:03:30 -0500 Subject: [PATCH 02/11] pr feedback, add test --- pexec/managed_process.go | 6 +- pexec/managed_process_test.go | 112 +++++++++++++++++++++++++------ pexec/managed_process_unix.go | 9 +-- pexec/managed_process_windows.go | 2 +- 4 files changed, 99 insertions(+), 30 deletions(-) diff --git a/pexec/managed_process.go b/pexec/managed_process.go index 25201028..e3081006 100644 --- a/pexec/managed_process.go +++ b/pexec/managed_process.go @@ -34,7 +34,7 @@ type ManagedProcess interface { // Kill will attempt to kill the process group and not wait for completion. Only use this if // comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired). - Kill() + KillGroup() // Status return nil when the process is both alive and owned. // If err is non-nil, process may be a) alive but not owned or b) dead. @@ -437,7 +437,7 @@ func (p *managedProcess) Stop() error { return errors.Errorf("non-successful exit code: %d", p.cmd.ProcessState.ExitCode()) } -func (p *managedProcess) Kill() { +func (p *managedProcess) KillGroup() { // Minimally hold a lock here so that we can signal the // management goroutine to stop. We will attempt to kill the // process even if p.stopped is true. @@ -460,5 +460,5 @@ func (p *managedProcess) Kill() { // We are intentionally not checking the error here, we are already // in a bad state. //nolint:errcheck,gosec - p.forceKill() + p.forceKillGroup() } diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index b009b7ac..69f780d5 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -411,10 +411,10 @@ func TestManagedProcessStop(t *testing.T) { bashScriptBuilder.WriteString("\n") } bashScriptBuilder.WriteString(fmt.Sprintf(`echo hello >> '%s' -while true -do echo hey -sleep 1 -done`, tempFile.Name())) + while true + do echo hey + sleep 1 + done`, tempFile.Name())) bashScriptBuilder.WriteString("\n") bashScript := bashScriptBuilder.String() @@ -455,13 +455,13 @@ done`, tempFile.Name())) watcher1, tempFile := testutils.WatchedFile(t) bashScript1 := fmt.Sprintf(` - trap "echo SIGTERM >> '%s'" SIGTERM - echo hello >> '%s' - while true - do echo hey - sleep 1 - done - `, tempFile.Name(), tempFile.Name()) + trap "echo SIGTERM >> '%s'" SIGTERM + echo hello >> '%s' + while true + do echo hey + sleep 1 + done + `, tempFile.Name(), tempFile.Name()) proc := NewManagedProcess(ProcessConfig{ Name: "bash", @@ -496,20 +496,20 @@ done`, tempFile.Name())) watcher3, tempFile3 := testutils.WatchedFile(t) trapScript := ` - trap "echo SIGTERM >> '%s'" SIGTERM - echo hello >> '%s' - while true - do echo hey - sleep 1 - done - ` + trap "echo SIGTERM >> '%s'" SIGTERM + echo hello >> '%s' + while true + do echo hey + sleep 1 + done + ` bashScript1 := fmt.Sprintf(trapScript, tempFile1.Name(), tempFile1.Name()) bashScript2 := fmt.Sprintf(trapScript, tempFile2.Name(), tempFile2.Name()) bashScriptParent := fmt.Sprintf(` - bash -c '%s' & - bash -c '%s' & - `+trapScript, + bash -c '%s' & + bash -c '%s' & + `+trapScript, bashScript1, bashScript2, tempFile3.Name(), @@ -569,6 +569,74 @@ done`, tempFile.Name())) <-watcher1.Events test.That(t, proc.Stop(), test.ShouldBeNil) }) + t.Run("kill signaling with children", func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("cannot test this on windows") + } + logger := golog.NewTestLogger(t) + + watcher1, tempFile1 := testutils.WatchedFile(t) + watcher2, tempFile2 := testutils.WatchedFile(t) + watcher3, tempFile3 := testutils.WatchedFile(t) + + // this script writes a string to the specified file every 100ms + script := ` + while true + do echo hello >> '%s' + sleep 0.1 + done + ` + + bashScript1 := fmt.Sprintf(script, tempFile1.Name()) + bashScript2 := fmt.Sprintf(script, tempFile2.Name()) + bashScriptParent := fmt.Sprintf(` + bash -c '%s' & + bash -c '%s' & + `+script, + bashScript1, + bashScript2, + tempFile3.Name(), + tempFile3.Name(), + ) + + proc := NewManagedProcess(ProcessConfig{ + Name: "bash", + Args: []string{"-c", bashScriptParent}, + }, logger) + + // To confirm that the processes have died, confirm that the size of the file stopped increasing + getSize := func(file *os.File) int64 { + info, _ := file.Stat() + return info.Size() + } + + file1SizeBeforeStart := getSize(tempFile1) + file2SizeBeforeStart := getSize(tempFile2) + file3SizeBeforeStart := getSize(tempFile3) + + test.That(t, proc.Start(context.Background()), test.ShouldBeNil) + + <-watcher1.Events + <-watcher2.Events + <-watcher3.Events + + proc.KillGroup() + + file1SizeAfterKill := getSize(tempFile1) + file2SizeAfterKill := getSize(tempFile2) + file3SizeAfterKill := getSize(tempFile3) + + test.That(t, file1SizeAfterKill, test.ShouldBeGreaterThan, file1SizeBeforeStart) + test.That(t, file2SizeAfterKill, test.ShouldBeGreaterThan, file2SizeBeforeStart) + test.That(t, file3SizeAfterKill, test.ShouldBeGreaterThan, file3SizeBeforeStart) + + // wait a short amount of time and check again - this time the size should not have increased. + time.Sleep(300 * time.Millisecond) + + test.That(t, getSize(tempFile1), test.ShouldEqual, file1SizeAfterKill) + test.That(t, getSize(tempFile2), test.ShouldEqual, file2SizeAfterKill) + test.That(t, getSize(tempFile3), test.ShouldEqual, file3SizeAfterKill) + }) } func TestManagedProcessEnvironmentVariables(t *testing.T) { @@ -703,4 +771,4 @@ in reality tests should just depend on the methods they rely on. UnixPid is not of those methods (for better or worse)`) } -func (fp *fakeProcess) Kill() {} +func (fp *fakeProcess) KillGroup() {} diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go index f696fe16..e2f81b54 100644 --- a/pexec/managed_process_unix.go +++ b/pexec/managed_process_unix.go @@ -127,12 +127,13 @@ func (p *managedProcess) kill() (bool, error) { return forceKilled, nil } -// forceKill kills everything in the process group. This will not wait for completion and may result in a zombie process. -func (p *managedProcess) forceKill() error { - pidStr := strconv.Itoa(-p.cmd.Process.Pid) +// forceKill kills everything in the process group. This will not wait for completion and may result the +// kill becoming a zombie process. +func (p *managedProcess) forceKillGroup() error { + pgidStr := strconv.Itoa(-p.cmd.Process.Pid) p.logger.Infof("killing entire process group %d", p.cmd.Process.Pid) //nolint:gosec - return exec.Command("kill", "-9", pidStr).Start() + return exec.Command("kill", "-9", pgidStr).Start() } func isWaitErrUnknown(err string, forceKilled bool) bool { diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go index 088ef377..63a18983 100644 --- a/pexec/managed_process_windows.go +++ b/pexec/managed_process_windows.go @@ -107,7 +107,7 @@ func (p *managedProcess) kill() (bool, error) { } // forceKill kills everything in the process tree. This will not wait for completion and may result in a zombie process. -func (p *managedProcess) forceKill() error { +func (p *managedProcess) forceKillGroup() error { pidStr := strconv.Itoa(p.cmd.Process.Pid) p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid) return exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Start() From 6beca95e101d3f362c0d54647ee2423f2ec8e75d Mon Sep 17 00:00:00 2001 From: Cheuk <90270663+cheukt@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:13:59 -0500 Subject: [PATCH 03/11] Apply suggestions from code review Co-authored-by: Benjamin Rewis <32186188+benjirewis@users.noreply.github.com> --- pexec/managed_process.go | 3 ++- pexec/managed_process_unix.go | 2 +- pexec/managed_process_windows.go | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pexec/managed_process.go b/pexec/managed_process.go index e3081006..7f03fecf 100644 --- a/pexec/managed_process.go +++ b/pexec/managed_process.go @@ -32,7 +32,7 @@ type ManagedProcess interface { // there's any system level issue stopping the process. Stop() error - // Kill will attempt to kill the process group and not wait for completion. Only use this if + // KillGroup will attempt to kill the process group and not wait for completion. Only use this if // comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired). KillGroup() @@ -437,6 +437,7 @@ func (p *managedProcess) Stop() error { return errors.Errorf("non-successful exit code: %d", p.cmd.ProcessState.ExitCode()) } +// KillGroup kills the process group. func (p *managedProcess) KillGroup() { // Minimally hold a lock here so that we can signal the // management goroutine to stop. We will attempt to kill the diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go index e2f81b54..a5be4317 100644 --- a/pexec/managed_process_unix.go +++ b/pexec/managed_process_unix.go @@ -127,7 +127,7 @@ func (p *managedProcess) kill() (bool, error) { return forceKilled, nil } -// forceKill kills everything in the process group. This will not wait for completion and may result the +// forceKillGroup kills everything in the process group. This will not wait for completion and may result the // kill becoming a zombie process. func (p *managedProcess) forceKillGroup() error { pgidStr := strconv.Itoa(-p.cmd.Process.Pid) diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go index 63a18983..df758860 100644 --- a/pexec/managed_process_windows.go +++ b/pexec/managed_process_windows.go @@ -106,7 +106,7 @@ func (p *managedProcess) kill() (bool, error) { return forceKilled, nil } -// forceKill kills everything in the process tree. This will not wait for completion and may result in a zombie process. +// forceKillGroup kills everything in the process tree. This will not wait for completion and may result in a zombie process. func (p *managedProcess) forceKillGroup() error { pidStr := strconv.Itoa(p.cmd.Process.Pid) p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid) From 09c7703ce3cb43ad8cab4a9dcdc0233db0ea39b4 Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 14:58:12 -0500 Subject: [PATCH 04/11] fix? --- pexec/managed_process_test.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 69f780d5..442a7d55 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -630,12 +630,22 @@ func TestManagedProcessStop(t *testing.T) { test.That(t, file2SizeAfterKill, test.ShouldBeGreaterThan, file2SizeBeforeStart) test.That(t, file3SizeAfterKill, test.ShouldBeGreaterThan, file3SizeBeforeStart) - // wait a short amount of time and check again - this time the size should not have increased. - time.Sleep(300 * time.Millisecond) + // since KillGroup does not wait, we might have to check file size a few times as the kill + // might take a little to propagate. We want to make sure that the file size stops increasing. + testutils.WaitForAssertionWithSleep(t, 300*time.Millisecond, 50, func(tb testing.TB) { + tempSize1 := getSize(tempFile1) + tempSize2 := getSize(tempFile2) + tempSize3 := getSize(tempFile3) + + test.That(t, tempSize1, test.ShouldEqual, file1SizeAfterKill) + test.That(t, tempSize2, test.ShouldEqual, file2SizeAfterKill) + test.That(t, tempSize3, test.ShouldEqual, file3SizeAfterKill) + + file1SizeAfterKill = tempSize1 + file2SizeAfterKill = tempSize1 + file3SizeAfterKill = tempSize1 + }) - test.That(t, getSize(tempFile1), test.ShouldEqual, file1SizeAfterKill) - test.That(t, getSize(tempFile2), test.ShouldEqual, file2SizeAfterKill) - test.That(t, getSize(tempFile3), test.ShouldEqual, file3SizeAfterKill) }) } From 9cf10a67dc387ec397a8476aace8f85d2092cfee Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 15:18:47 -0500 Subject: [PATCH 05/11] lint --- pexec/managed_process_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 442a7d55..9046aa84 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -645,7 +645,6 @@ func TestManagedProcessStop(t *testing.T) { file2SizeAfterKill = tempSize1 file3SizeAfterKill = tempSize1 }) - }) } From 8ae366c84ab7df715d82589663319bc9fd5264f1 Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 16:57:53 -0500 Subject: [PATCH 06/11] should hopefully work --- pexec/managed_process.go | 10 +++++----- pexec/managed_process_test.go | 24 +++++++----------------- pexec/managed_process_unix.go | 8 ++++++-- pexec/managed_process_windows.go | 21 +++++++++++++++------ 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/pexec/managed_process.go b/pexec/managed_process.go index 7f03fecf..a78477e1 100644 --- a/pexec/managed_process.go +++ b/pexec/managed_process.go @@ -34,7 +34,8 @@ type ManagedProcess interface { // KillGroup will attempt to kill the process group and not wait for completion. Only use this if // comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired). - KillGroup() + // Returns kill process that can be waited on if desired. + KillGroup() (*exec.Cmd, error) // Status return nil when the process is both alive and owned. // If err is non-nil, process may be a) alive but not owned or b) dead. @@ -438,7 +439,7 @@ func (p *managedProcess) Stop() error { } // KillGroup kills the process group. -func (p *managedProcess) KillGroup() { +func (p *managedProcess) KillGroup() (*exec.Cmd, error) { // Minimally hold a lock here so that we can signal the // management goroutine to stop. We will attempt to kill the // process even if p.stopped is true. @@ -450,7 +451,7 @@ func (p *managedProcess) KillGroup() { if p.cmd == nil { p.mu.Unlock() - return + return nil, errAlreadyStopped } p.mu.Unlock() @@ -460,6 +461,5 @@ func (p *managedProcess) KillGroup() { // without a lock held. // We are intentionally not checking the error here, we are already // in a bad state. - //nolint:errcheck,gosec - p.forceKillGroup() + return p.forceKillGroup() } diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 9046aa84..22e47fa3 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -620,7 +620,9 @@ func TestManagedProcessStop(t *testing.T) { <-watcher2.Events <-watcher3.Events - proc.KillGroup() + cmd, err := proc.KillGroup() + test.That(t, err, test.ShouldBeNil) + test.That(t, cmd.Wait(), test.ShouldBeNil) file1SizeAfterKill := getSize(tempFile1) file2SizeAfterKill := getSize(tempFile2) @@ -630,21 +632,9 @@ func TestManagedProcessStop(t *testing.T) { test.That(t, file2SizeAfterKill, test.ShouldBeGreaterThan, file2SizeBeforeStart) test.That(t, file3SizeAfterKill, test.ShouldBeGreaterThan, file3SizeBeforeStart) - // since KillGroup does not wait, we might have to check file size a few times as the kill - // might take a little to propagate. We want to make sure that the file size stops increasing. - testutils.WaitForAssertionWithSleep(t, 300*time.Millisecond, 50, func(tb testing.TB) { - tempSize1 := getSize(tempFile1) - tempSize2 := getSize(tempFile2) - tempSize3 := getSize(tempFile3) - - test.That(t, tempSize1, test.ShouldEqual, file1SizeAfterKill) - test.That(t, tempSize2, test.ShouldEqual, file2SizeAfterKill) - test.That(t, tempSize3, test.ShouldEqual, file3SizeAfterKill) - - file1SizeAfterKill = tempSize1 - file2SizeAfterKill = tempSize1 - file3SizeAfterKill = tempSize1 - }) + test.That(t, getSize(tempFile1), test.ShouldEqual, file1SizeAfterKill) + test.That(t, getSize(tempFile2), test.ShouldEqual, file2SizeAfterKill) + test.That(t, getSize(tempFile3), test.ShouldEqual, file3SizeAfterKill) }) } @@ -780,4 +770,4 @@ in reality tests should just depend on the methods they rely on. UnixPid is not of those methods (for better or worse)`) } -func (fp *fakeProcess) KillGroup() {} +func (fp *fakeProcess) KillGroup() (*exec.Cmd, error) { return nil, errAlreadyStopped } diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go index a5be4317..b28c23dd 100644 --- a/pexec/managed_process_unix.go +++ b/pexec/managed_process_unix.go @@ -129,11 +129,15 @@ func (p *managedProcess) kill() (bool, error) { // forceKillGroup kills everything in the process group. This will not wait for completion and may result the // kill becoming a zombie process. -func (p *managedProcess) forceKillGroup() error { +func (p *managedProcess) forceKillGroup() (*exec.Cmd, error) { pgidStr := strconv.Itoa(-p.cmd.Process.Pid) p.logger.Infof("killing entire process group %d", p.cmd.Process.Pid) //nolint:gosec - return exec.Command("kill", "-9", pgidStr).Start() + cmd := exec.Command("kill", "-9", pgidStr) + if err := cmd.Start(); err != nil { + return nil, err + } + return cmd, nil } func isWaitErrUnknown(err string, forceKilled bool) bool { diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go index df758860..f8892e81 100644 --- a/pexec/managed_process_windows.go +++ b/pexec/managed_process_windows.go @@ -89,7 +89,7 @@ func (p *managedProcess) kill() (bool, error) { select { case <-timer2.C: p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid) - if err := exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Run(); err != nil { + if err := forceKillCmd(pidStr).Run(); err != nil { return false, errors.Wrapf(err, "error force killing process tree %d", p.cmd.Process.Pid) } forceKilled = true @@ -97,7 +97,7 @@ func (p *managedProcess) kill() (bool, error) { timer2.Stop() } } else { - if err := exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Run(); err != nil { + if err := forceKillCmd(pidStr).Run(); err != nil { return false, errors.Wrapf(err, "error force killing process tree %d", p.cmd.Process.Pid) } forceKilled = true @@ -106,11 +106,20 @@ func (p *managedProcess) kill() (bool, error) { return forceKilled, nil } -// forceKillGroup kills everything in the process tree. This will not wait for completion and may result in a zombie process. -func (p *managedProcess) forceKillGroup() error { - pidStr := strconv.Itoa(p.cmd.Process.Pid) +func forceKillCmd(pidStr string) *exec.Cmd { + return exec.Command("taskkill", "/t", "/f", "/pid", pidStr) +} + +// forceKillGroup kills everything in the process group. This will not wait for completion and may result the +// kill becoming a zombie process. +func (p *managedProcess) forceKillGroup() (*exec.Cmd, error) { + pgidStr := strconv.Itoa(p.cmd.Process.Pid) p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid) - return exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Start() + cmd := forceKillCmd(pgidStr) + if err := cmd.Start(); err != nil { + return nil, err + } + return cmd, nil } func isWaitErrUnknown(err string, forceKilled bool) bool { From 957ea627c1d85106747b46c509175cce54caec8b Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 17:11:00 -0500 Subject: [PATCH 07/11] Revert "should hopefully work" This reverts commit 8ae366c84ab7df715d82589663319bc9fd5264f1. --- pexec/managed_process.go | 10 +++++----- pexec/managed_process_test.go | 24 +++++++++++++++++------- pexec/managed_process_unix.go | 8 ++------ pexec/managed_process_windows.go | 21 ++++++--------------- 4 files changed, 30 insertions(+), 33 deletions(-) diff --git a/pexec/managed_process.go b/pexec/managed_process.go index a78477e1..7f03fecf 100644 --- a/pexec/managed_process.go +++ b/pexec/managed_process.go @@ -34,8 +34,7 @@ type ManagedProcess interface { // KillGroup will attempt to kill the process group and not wait for completion. Only use this if // comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired). - // Returns kill process that can be waited on if desired. - KillGroup() (*exec.Cmd, error) + KillGroup() // Status return nil when the process is both alive and owned. // If err is non-nil, process may be a) alive but not owned or b) dead. @@ -439,7 +438,7 @@ func (p *managedProcess) Stop() error { } // KillGroup kills the process group. -func (p *managedProcess) KillGroup() (*exec.Cmd, error) { +func (p *managedProcess) KillGroup() { // Minimally hold a lock here so that we can signal the // management goroutine to stop. We will attempt to kill the // process even if p.stopped is true. @@ -451,7 +450,7 @@ func (p *managedProcess) KillGroup() (*exec.Cmd, error) { if p.cmd == nil { p.mu.Unlock() - return nil, errAlreadyStopped + return } p.mu.Unlock() @@ -461,5 +460,6 @@ func (p *managedProcess) KillGroup() (*exec.Cmd, error) { // without a lock held. // We are intentionally not checking the error here, we are already // in a bad state. - return p.forceKillGroup() + //nolint:errcheck,gosec + p.forceKillGroup() } diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 22e47fa3..9046aa84 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -620,9 +620,7 @@ func TestManagedProcessStop(t *testing.T) { <-watcher2.Events <-watcher3.Events - cmd, err := proc.KillGroup() - test.That(t, err, test.ShouldBeNil) - test.That(t, cmd.Wait(), test.ShouldBeNil) + proc.KillGroup() file1SizeAfterKill := getSize(tempFile1) file2SizeAfterKill := getSize(tempFile2) @@ -632,9 +630,21 @@ func TestManagedProcessStop(t *testing.T) { test.That(t, file2SizeAfterKill, test.ShouldBeGreaterThan, file2SizeBeforeStart) test.That(t, file3SizeAfterKill, test.ShouldBeGreaterThan, file3SizeBeforeStart) - test.That(t, getSize(tempFile1), test.ShouldEqual, file1SizeAfterKill) - test.That(t, getSize(tempFile2), test.ShouldEqual, file2SizeAfterKill) - test.That(t, getSize(tempFile3), test.ShouldEqual, file3SizeAfterKill) + // since KillGroup does not wait, we might have to check file size a few times as the kill + // might take a little to propagate. We want to make sure that the file size stops increasing. + testutils.WaitForAssertionWithSleep(t, 300*time.Millisecond, 50, func(tb testing.TB) { + tempSize1 := getSize(tempFile1) + tempSize2 := getSize(tempFile2) + tempSize3 := getSize(tempFile3) + + test.That(t, tempSize1, test.ShouldEqual, file1SizeAfterKill) + test.That(t, tempSize2, test.ShouldEqual, file2SizeAfterKill) + test.That(t, tempSize3, test.ShouldEqual, file3SizeAfterKill) + + file1SizeAfterKill = tempSize1 + file2SizeAfterKill = tempSize1 + file3SizeAfterKill = tempSize1 + }) }) } @@ -770,4 +780,4 @@ in reality tests should just depend on the methods they rely on. UnixPid is not of those methods (for better or worse)`) } -func (fp *fakeProcess) KillGroup() (*exec.Cmd, error) { return nil, errAlreadyStopped } +func (fp *fakeProcess) KillGroup() {} diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go index b28c23dd..a5be4317 100644 --- a/pexec/managed_process_unix.go +++ b/pexec/managed_process_unix.go @@ -129,15 +129,11 @@ func (p *managedProcess) kill() (bool, error) { // forceKillGroup kills everything in the process group. This will not wait for completion and may result the // kill becoming a zombie process. -func (p *managedProcess) forceKillGroup() (*exec.Cmd, error) { +func (p *managedProcess) forceKillGroup() error { pgidStr := strconv.Itoa(-p.cmd.Process.Pid) p.logger.Infof("killing entire process group %d", p.cmd.Process.Pid) //nolint:gosec - cmd := exec.Command("kill", "-9", pgidStr) - if err := cmd.Start(); err != nil { - return nil, err - } - return cmd, nil + return exec.Command("kill", "-9", pgidStr).Start() } func isWaitErrUnknown(err string, forceKilled bool) bool { diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go index f8892e81..df758860 100644 --- a/pexec/managed_process_windows.go +++ b/pexec/managed_process_windows.go @@ -89,7 +89,7 @@ func (p *managedProcess) kill() (bool, error) { select { case <-timer2.C: p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid) - if err := forceKillCmd(pidStr).Run(); err != nil { + if err := exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Run(); err != nil { return false, errors.Wrapf(err, "error force killing process tree %d", p.cmd.Process.Pid) } forceKilled = true @@ -97,7 +97,7 @@ func (p *managedProcess) kill() (bool, error) { timer2.Stop() } } else { - if err := forceKillCmd(pidStr).Run(); err != nil { + if err := exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Run(); err != nil { return false, errors.Wrapf(err, "error force killing process tree %d", p.cmd.Process.Pid) } forceKilled = true @@ -106,20 +106,11 @@ func (p *managedProcess) kill() (bool, error) { return forceKilled, nil } -func forceKillCmd(pidStr string) *exec.Cmd { - return exec.Command("taskkill", "/t", "/f", "/pid", pidStr) -} - -// forceKillGroup kills everything in the process group. This will not wait for completion and may result the -// kill becoming a zombie process. -func (p *managedProcess) forceKillGroup() (*exec.Cmd, error) { - pgidStr := strconv.Itoa(p.cmd.Process.Pid) +// forceKillGroup kills everything in the process tree. This will not wait for completion and may result in a zombie process. +func (p *managedProcess) forceKillGroup() error { + pidStr := strconv.Itoa(p.cmd.Process.Pid) p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid) - cmd := forceKillCmd(pgidStr) - if err := cmd.Start(); err != nil { - return nil, err - } - return cmd, nil + return exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Start() } func isWaitErrUnknown(err string, forceKilled bool) bool { From fc2c2fab558354d941764c943af11961849bdac8 Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 17:11:26 -0500 Subject: [PATCH 08/11] wait for close --- pexec/managed_process_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 9046aa84..8fbc40db 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -645,6 +645,9 @@ func TestManagedProcessStop(t *testing.T) { file2SizeAfterKill = tempSize1 file3SizeAfterKill = tempSize1 }) + + // wait on the managingCh to close + <-proc.(*managedProcess).managingCh }) } From 1cbee8ad23ac98f2991ac43dda62dfa9d6b0c133 Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 17:50:13 -0500 Subject: [PATCH 09/11] hopefully --- pexec/managed_process_test.go | 37 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 8fbc40db..2dc197af 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -455,13 +455,13 @@ func TestManagedProcessStop(t *testing.T) { watcher1, tempFile := testutils.WatchedFile(t) bashScript1 := fmt.Sprintf(` - trap "echo SIGTERM >> '%s'" SIGTERM - echo hello >> '%s' - while true - do echo hey - sleep 1 - done - `, tempFile.Name(), tempFile.Name()) + trap "echo SIGTERM >> '%s'" SIGTERM + echo hello >> '%s' + while true + do echo hey + sleep 1 + done + `, tempFile.Name(), tempFile.Name()) proc := NewManagedProcess(ProcessConfig{ Name: "bash", @@ -496,20 +496,20 @@ func TestManagedProcessStop(t *testing.T) { watcher3, tempFile3 := testutils.WatchedFile(t) trapScript := ` - trap "echo SIGTERM >> '%s'" SIGTERM - echo hello >> '%s' - while true - do echo hey - sleep 1 - done - ` + trap "echo SIGTERM >> '%s'" SIGTERM + echo hello >> '%s' + while true + do echo hey + sleep 1 + done + ` bashScript1 := fmt.Sprintf(trapScript, tempFile1.Name(), tempFile1.Name()) bashScript2 := fmt.Sprintf(trapScript, tempFile2.Name(), tempFile2.Name()) bashScriptParent := fmt.Sprintf(` - bash -c '%s' & - bash -c '%s' & - `+trapScript, + bash -c '%s' & + bash -c '%s' & + `+trapScript, bashScript1, bashScript2, tempFile3.Name(), @@ -602,6 +602,9 @@ func TestManagedProcessStop(t *testing.T) { proc := NewManagedProcess(ProcessConfig{ Name: "bash", Args: []string{"-c", bashScriptParent}, + // have to set Log to false so that open file descriptors do not cause the test to hang + // https://github.com/golang/go/issues/18874 + Log: false, }, logger) // To confirm that the processes have died, confirm that the size of the file stopped increasing From a9a013b2f06cb92bba095ac75a69e0e93dee4a9d Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 18:08:41 -0500 Subject: [PATCH 10/11] maybe? --- pexec/managed_process_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 2dc197af..9ddbf698 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -16,6 +16,7 @@ import ( "strings" "sync" "sync/atomic" + "syscall" "testing" "time" @@ -649,6 +650,12 @@ func TestManagedProcessStop(t *testing.T) { file3SizeAfterKill = tempSize1 }) + // on certain systems, we have to send another signal to make sure the cmd.Wait() in + // the manage goroutine actually returns + if err := proc.(*managedProcess).cmd.Process.Signal(syscall.SIGTERM); err != nil { + test.That(t, errors.Is(err, os.ErrProcessDone), test.ShouldBeFalse) + } + // wait on the managingCh to close <-proc.(*managedProcess).managingCh }) From ffa8db3774344cc20f86c8e81817191647379aaa Mon Sep 17 00:00:00 2001 From: Cheuk Tse Date: Thu, 9 Jan 2025 18:15:59 -0500 Subject: [PATCH 11/11] better doc --- pexec/managed_process_test.go | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go index 9ddbf698..059c5564 100644 --- a/pexec/managed_process_test.go +++ b/pexec/managed_process_test.go @@ -570,6 +570,9 @@ func TestManagedProcessStop(t *testing.T) { <-watcher1.Events test.That(t, proc.Stop(), test.ShouldBeNil) }) +} + +func TestManagedProcessKillGroup(t *testing.T) { t.Run("kill signaling with children", func(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("cannot test this on windows") @@ -582,18 +585,18 @@ func TestManagedProcessStop(t *testing.T) { // this script writes a string to the specified file every 100ms script := ` - while true - do echo hello >> '%s' - sleep 0.1 - done - ` + while true + do echo hello >> '%s' + sleep 0.1 + done + ` bashScript1 := fmt.Sprintf(script, tempFile1.Name()) bashScript2 := fmt.Sprintf(script, tempFile2.Name()) bashScriptParent := fmt.Sprintf(` - bash -c '%s' & - bash -c '%s' & - `+script, + bash -c '%s' & + bash -c '%s' & + `+script, bashScript1, bashScript2, tempFile3.Name(), @@ -603,9 +606,6 @@ func TestManagedProcessStop(t *testing.T) { proc := NewManagedProcess(ProcessConfig{ Name: "bash", Args: []string{"-c", bashScriptParent}, - // have to set Log to false so that open file descriptors do not cause the test to hang - // https://github.com/golang/go/issues/18874 - Log: false, }, logger) // To confirm that the processes have died, confirm that the size of the file stopped increasing @@ -650,8 +650,10 @@ func TestManagedProcessStop(t *testing.T) { file3SizeAfterKill = tempSize1 }) - // on certain systems, we have to send another signal to make sure the cmd.Wait() in - // the manage goroutine actually returns + // in CI, we have to send another signal to make sure the cmd.Wait() in + // the manage goroutine actually returns. + // We do not care about the error if it is expected. + // maybe related to https://github.com/golang/go/issues/18874 if err := proc.(*managedProcess).cmd.Process.Signal(syscall.SIGTERM); err != nil { test.That(t, errors.Is(err, os.ErrProcessDone), test.ShouldBeFalse) }