From 847140e11594270167ff4057d37aa1c91c52a449 Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 26 Dec 2024 23:09:46 -0500
Subject: [PATCH 01/11] kill

---
 pexec/managed_process.go         | 30 ++++++++++++++++++++++++++++++
 pexec/managed_process_test.go    |  2 ++
 pexec/managed_process_unix.go    |  9 +++++++++
 pexec/managed_process_windows.go |  8 +++++++-
 4 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/pexec/managed_process.go b/pexec/managed_process.go
index 14752998..25201028 100644
--- a/pexec/managed_process.go
+++ b/pexec/managed_process.go
@@ -32,6 +32,10 @@ type ManagedProcess interface {
 	// there's any system level issue stopping the process.
 	Stop() error
 
+	// Kill will attempt to kill the process group and not wait for completion. Only use this if
+	// comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired).
+	Kill()
+
 	// Status return nil when the process is both alive and owned.
 	// If err is non-nil, process may be a) alive but not owned or b) dead.
 	Status() error
@@ -432,3 +436,29 @@ func (p *managedProcess) Stop() error {
 	}
 	return errors.Errorf("non-successful exit code: %d", p.cmd.ProcessState.ExitCode())
 }
+
+func (p *managedProcess) Kill() {
+	// Minimally hold a lock here so that we can signal the
+	// management goroutine to stop. We will attempt to kill the
+	// process even if p.stopped is true.
+	p.mu.Lock()
+	if !p.stopped {
+		close(p.killCh)
+		p.stopped = true
+	}
+
+	if p.cmd == nil {
+		p.mu.Unlock()
+		return
+	}
+	p.mu.Unlock()
+
+	// Since p.cmd is mutex guarded and we just signaled the manage
+	// goroutine to stop, no new Start can happen and therefore
+	// p.cmd can no longer be modified rendering it safe to read
+	// without a lock held.
+	// We are intentionally not checking the error here, we are already
+	// in a bad state.
+	//nolint:errcheck,gosec
+	p.forceKill()
+}
diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 75e3e44f..b009b7ac 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -702,3 +702,5 @@ func (fp *fakeProcess) UnixPid() (int, error) {
 in reality tests should just depend on the methods they rely on. UnixPid is not one
 of those methods (for better or worse)`)
 }
+
+func (fp *fakeProcess) Kill() {}
diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go
index 498be9e8..f696fe16 100644
--- a/pexec/managed_process_unix.go
+++ b/pexec/managed_process_unix.go
@@ -4,6 +4,7 @@ package pexec
 
 import (
 	"os"
+	"os/exec"
 	"os/user"
 	"strconv"
 	"syscall"
@@ -126,6 +127,14 @@ func (p *managedProcess) kill() (bool, error) {
 	return forceKilled, nil
 }
 
+// forceKill kills everything in the process group. This will not wait for completion and may result in a zombie process.
+func (p *managedProcess) forceKill() error {
+	pidStr := strconv.Itoa(-p.cmd.Process.Pid)
+	p.logger.Infof("killing entire process group %d", p.cmd.Process.Pid)
+	//nolint:gosec
+	return exec.Command("kill", "-9", pidStr).Start()
+}
+
 func isWaitErrUnknown(err string, forceKilled bool) bool {
 	// This can easily happen if the process does not handle interrupts gracefully
 	// and it won't provide us any exit code info.
diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go
index ad1034b2..088ef377 100644
--- a/pexec/managed_process_windows.go
+++ b/pexec/managed_process_windows.go
@@ -25,7 +25,6 @@ func parseSignal(sigStr, name string) (syscall.Signal, error) {
 	return 0, errors.New("signals not supported on Windows")
 }
 
-
 func (p *managedProcess) sysProcAttr() (*syscall.SysProcAttr, error) {
 	ret := &syscall.SysProcAttr{
 		CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP,
@@ -107,6 +106,13 @@ func (p *managedProcess) kill() (bool, error) {
 	return forceKilled, nil
 }
 
+// forceKill kills everything in the process tree. This will not wait for completion and may result in a zombie process.
+func (p *managedProcess) forceKill() error {
+	pidStr := strconv.Itoa(p.cmd.Process.Pid)
+	p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid)
+	return exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Start()
+}
+
 func isWaitErrUnknown(err string, forceKilled bool) bool {
 	if !forceKilled {
 		return false

From af8640d4484d21a3ed8e78776452aa022d706ea8 Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 00:03:30 -0500
Subject: [PATCH 02/11] pr feedback, add test

---
 pexec/managed_process.go         |   6 +-
 pexec/managed_process_test.go    | 112 +++++++++++++++++++++++++------
 pexec/managed_process_unix.go    |   9 +--
 pexec/managed_process_windows.go |   2 +-
 4 files changed, 99 insertions(+), 30 deletions(-)

diff --git a/pexec/managed_process.go b/pexec/managed_process.go
index 25201028..e3081006 100644
--- a/pexec/managed_process.go
+++ b/pexec/managed_process.go
@@ -34,7 +34,7 @@ type ManagedProcess interface {
 
 	// Kill will attempt to kill the process group and not wait for completion. Only use this if
 	// comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired).
-	Kill()
+	KillGroup()
 
 	// Status return nil when the process is both alive and owned.
 	// If err is non-nil, process may be a) alive but not owned or b) dead.
@@ -437,7 +437,7 @@ func (p *managedProcess) Stop() error {
 	return errors.Errorf("non-successful exit code: %d", p.cmd.ProcessState.ExitCode())
 }
 
-func (p *managedProcess) Kill() {
+func (p *managedProcess) KillGroup() {
 	// Minimally hold a lock here so that we can signal the
 	// management goroutine to stop. We will attempt to kill the
 	// process even if p.stopped is true.
@@ -460,5 +460,5 @@ func (p *managedProcess) Kill() {
 	// We are intentionally not checking the error here, we are already
 	// in a bad state.
 	//nolint:errcheck,gosec
-	p.forceKill()
+	p.forceKillGroup()
 }
diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index b009b7ac..69f780d5 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -411,10 +411,10 @@ func TestManagedProcessStop(t *testing.T) {
 			bashScriptBuilder.WriteString("\n")
 		}
 		bashScriptBuilder.WriteString(fmt.Sprintf(`echo hello >> '%s'
-while true
-do echo hey
-sleep 1
-done`, tempFile.Name()))
+	while true
+	do echo hey
+	sleep 1
+	done`, tempFile.Name()))
 		bashScriptBuilder.WriteString("\n")
 
 		bashScript := bashScriptBuilder.String()
@@ -455,13 +455,13 @@ done`, tempFile.Name()))
 		watcher1, tempFile := testutils.WatchedFile(t)
 
 		bashScript1 := fmt.Sprintf(`
-			trap "echo SIGTERM >> '%s'" SIGTERM
-			echo hello >> '%s'
-			while true
-			do echo hey
-			sleep 1
-			done
-		`, tempFile.Name(), tempFile.Name())
+				trap "echo SIGTERM >> '%s'" SIGTERM
+				echo hello >> '%s'
+				while true
+				do echo hey
+				sleep 1
+				done
+			`, tempFile.Name(), tempFile.Name())
 
 		proc := NewManagedProcess(ProcessConfig{
 			Name:        "bash",
@@ -496,20 +496,20 @@ done`, tempFile.Name()))
 		watcher3, tempFile3 := testutils.WatchedFile(t)
 
 		trapScript := `
-			trap "echo SIGTERM >> '%s'" SIGTERM
-			echo hello >> '%s'
-			while true
-			do echo hey
-			sleep 1
-			done
-		`
+				trap "echo SIGTERM >> '%s'" SIGTERM
+				echo hello >> '%s'
+				while true
+				do echo hey
+				sleep 1
+				done
+			`
 
 		bashScript1 := fmt.Sprintf(trapScript, tempFile1.Name(), tempFile1.Name())
 		bashScript2 := fmt.Sprintf(trapScript, tempFile2.Name(), tempFile2.Name())
 		bashScriptParent := fmt.Sprintf(`
-			bash -c '%s' &
-			bash -c '%s' &
-			`+trapScript,
+				bash -c '%s' &
+				bash -c '%s' &
+				`+trapScript,
 			bashScript1,
 			bashScript2,
 			tempFile3.Name(),
@@ -569,6 +569,74 @@ done`, tempFile.Name()))
 		<-watcher1.Events
 		test.That(t, proc.Stop(), test.ShouldBeNil)
 	})
+	t.Run("kill signaling with children", func(t *testing.T) {
+		if runtime.GOOS == "windows" {
+			t.Skip("cannot test this on windows")
+		}
+		logger := golog.NewTestLogger(t)
+
+		watcher1, tempFile1 := testutils.WatchedFile(t)
+		watcher2, tempFile2 := testutils.WatchedFile(t)
+		watcher3, tempFile3 := testutils.WatchedFile(t)
+
+		// this script writes a string to the specified file every 100ms
+		script := `
+			while true
+			do echo hello >> '%s'
+			sleep 0.1
+			done
+		`
+
+		bashScript1 := fmt.Sprintf(script, tempFile1.Name())
+		bashScript2 := fmt.Sprintf(script, tempFile2.Name())
+		bashScriptParent := fmt.Sprintf(`
+			bash -c '%s' &
+			bash -c '%s' &
+			`+script,
+			bashScript1,
+			bashScript2,
+			tempFile3.Name(),
+			tempFile3.Name(),
+		)
+
+		proc := NewManagedProcess(ProcessConfig{
+			Name: "bash",
+			Args: []string{"-c", bashScriptParent},
+		}, logger)
+
+		// To confirm that the processes have died, confirm that the size of the file stopped increasing
+		getSize := func(file *os.File) int64 {
+			info, _ := file.Stat()
+			return info.Size()
+		}
+
+		file1SizeBeforeStart := getSize(tempFile1)
+		file2SizeBeforeStart := getSize(tempFile2)
+		file3SizeBeforeStart := getSize(tempFile3)
+
+		test.That(t, proc.Start(context.Background()), test.ShouldBeNil)
+
+		<-watcher1.Events
+		<-watcher2.Events
+		<-watcher3.Events
+
+		proc.KillGroup()
+
+		file1SizeAfterKill := getSize(tempFile1)
+		file2SizeAfterKill := getSize(tempFile2)
+		file3SizeAfterKill := getSize(tempFile3)
+
+		test.That(t, file1SizeAfterKill, test.ShouldBeGreaterThan, file1SizeBeforeStart)
+		test.That(t, file2SizeAfterKill, test.ShouldBeGreaterThan, file2SizeBeforeStart)
+		test.That(t, file3SizeAfterKill, test.ShouldBeGreaterThan, file3SizeBeforeStart)
+
+		// wait a short amount of time and check again - this time the size should not have increased.
+		time.Sleep(300 * time.Millisecond)
+
+		test.That(t, getSize(tempFile1), test.ShouldEqual, file1SizeAfterKill)
+		test.That(t, getSize(tempFile2), test.ShouldEqual, file2SizeAfterKill)
+		test.That(t, getSize(tempFile3), test.ShouldEqual, file3SizeAfterKill)
+	})
 }
 
 func TestManagedProcessEnvironmentVariables(t *testing.T) {
@@ -703,4 +771,4 @@ in reality tests should just depend on the methods they rely on. UnixPid is not
 of those methods (for better or worse)`)
 }
 
-func (fp *fakeProcess) Kill() {}
+func (fp *fakeProcess) KillGroup() {}
diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go
index f696fe16..e2f81b54 100644
--- a/pexec/managed_process_unix.go
+++ b/pexec/managed_process_unix.go
@@ -127,12 +127,13 @@ func (p *managedProcess) kill() (bool, error) {
 	return forceKilled, nil
 }
 
-// forceKill kills everything in the process group. This will not wait for completion and may result in a zombie process.
-func (p *managedProcess) forceKill() error {
-	pidStr := strconv.Itoa(-p.cmd.Process.Pid)
+// forceKill kills everything in the process group. This will not wait for completion and may result the
+// kill becoming a zombie process.
+func (p *managedProcess) forceKillGroup() error {
+	pgidStr := strconv.Itoa(-p.cmd.Process.Pid)
 	p.logger.Infof("killing entire process group %d", p.cmd.Process.Pid)
 	//nolint:gosec
-	return exec.Command("kill", "-9", pidStr).Start()
+	return exec.Command("kill", "-9", pgidStr).Start()
 }
 
 func isWaitErrUnknown(err string, forceKilled bool) bool {
diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go
index 088ef377..63a18983 100644
--- a/pexec/managed_process_windows.go
+++ b/pexec/managed_process_windows.go
@@ -107,7 +107,7 @@ func (p *managedProcess) kill() (bool, error) {
 }
 
 // forceKill kills everything in the process tree. This will not wait for completion and may result in a zombie process.
-func (p *managedProcess) forceKill() error {
+func (p *managedProcess) forceKillGroup() error {
 	pidStr := strconv.Itoa(p.cmd.Process.Pid)
 	p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid)
 	return exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Start()

From 6beca95e101d3f362c0d54647ee2423f2ec8e75d Mon Sep 17 00:00:00 2001
From: Cheuk <90270663+cheukt@users.noreply.github.com>
Date: Thu, 9 Jan 2025 10:13:59 -0500
Subject: [PATCH 03/11] Apply suggestions from code review

Co-authored-by: Benjamin Rewis <32186188+benjirewis@users.noreply.github.com>
---
 pexec/managed_process.go         | 3 ++-
 pexec/managed_process_unix.go    | 2 +-
 pexec/managed_process_windows.go | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pexec/managed_process.go b/pexec/managed_process.go
index e3081006..7f03fecf 100644
--- a/pexec/managed_process.go
+++ b/pexec/managed_process.go
@@ -32,7 +32,7 @@ type ManagedProcess interface {
 	// there's any system level issue stopping the process.
 	Stop() error
 
-	// Kill will attempt to kill the process group and not wait for completion. Only use this if
+	// KillGroup will attempt to kill the process group and not wait for completion. Only use this if
 	// comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired).
 	KillGroup()
 
@@ -437,6 +437,7 @@ func (p *managedProcess) Stop() error {
 	return errors.Errorf("non-successful exit code: %d", p.cmd.ProcessState.ExitCode())
 }
 
+// KillGroup kills the process group.
 func (p *managedProcess) KillGroup() {
 	// Minimally hold a lock here so that we can signal the
 	// management goroutine to stop. We will attempt to kill the
diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go
index e2f81b54..a5be4317 100644
--- a/pexec/managed_process_unix.go
+++ b/pexec/managed_process_unix.go
@@ -127,7 +127,7 @@ func (p *managedProcess) kill() (bool, error) {
 	return forceKilled, nil
 }
 
-// forceKill kills everything in the process group. This will not wait for completion and may result the
+// forceKillGroup kills everything in the process group. This will not wait for completion and may result the
 // kill becoming a zombie process.
 func (p *managedProcess) forceKillGroup() error {
 	pgidStr := strconv.Itoa(-p.cmd.Process.Pid)
diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go
index 63a18983..df758860 100644
--- a/pexec/managed_process_windows.go
+++ b/pexec/managed_process_windows.go
@@ -106,7 +106,7 @@ func (p *managedProcess) kill() (bool, error) {
 	return forceKilled, nil
 }
 
-// forceKill kills everything in the process tree. This will not wait for completion and may result in a zombie process.
+// forceKillGroup kills everything in the process tree. This will not wait for completion and may result in a zombie process.
 func (p *managedProcess) forceKillGroup() error {
 	pidStr := strconv.Itoa(p.cmd.Process.Pid)
 	p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid)

From 09c7703ce3cb43ad8cab4a9dcdc0233db0ea39b4 Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 14:58:12 -0500
Subject: [PATCH 04/11] fix?

---
 pexec/managed_process_test.go | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 69f780d5..442a7d55 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -630,12 +630,22 @@ func TestManagedProcessStop(t *testing.T) {
 		test.That(t, file2SizeAfterKill, test.ShouldBeGreaterThan, file2SizeBeforeStart)
 		test.That(t, file3SizeAfterKill, test.ShouldBeGreaterThan, file3SizeBeforeStart)
 
-		// wait a short amount of time and check again - this time the size should not have increased.
-		time.Sleep(300 * time.Millisecond)
+		// since KillGroup does not wait, we might have to check file size a few times as the kill
+		// might take a little to propagate. We want to make sure that the file size stops increasing.
+		testutils.WaitForAssertionWithSleep(t, 300*time.Millisecond, 50, func(tb testing.TB) {
+			tempSize1 := getSize(tempFile1)
+			tempSize2 := getSize(tempFile2)
+			tempSize3 := getSize(tempFile3)
+
+			test.That(t, tempSize1, test.ShouldEqual, file1SizeAfterKill)
+			test.That(t, tempSize2, test.ShouldEqual, file2SizeAfterKill)
+			test.That(t, tempSize3, test.ShouldEqual, file3SizeAfterKill)
+
+			file1SizeAfterKill = tempSize1
+			file2SizeAfterKill = tempSize1
+			file3SizeAfterKill = tempSize1
+		})
 
-		test.That(t, getSize(tempFile1), test.ShouldEqual, file1SizeAfterKill)
-		test.That(t, getSize(tempFile2), test.ShouldEqual, file2SizeAfterKill)
-		test.That(t, getSize(tempFile3), test.ShouldEqual, file3SizeAfterKill)
 	})
 }
 

From 9cf10a67dc387ec397a8476aace8f85d2092cfee Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 15:18:47 -0500
Subject: [PATCH 05/11] lint

---
 pexec/managed_process_test.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 442a7d55..9046aa84 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -645,7 +645,6 @@ func TestManagedProcessStop(t *testing.T) {
 			file2SizeAfterKill = tempSize1
 			file3SizeAfterKill = tempSize1
 		})
-
 	})
 }
 

From 8ae366c84ab7df715d82589663319bc9fd5264f1 Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 16:57:53 -0500
Subject: [PATCH 06/11] should hopefully work

---
 pexec/managed_process.go         | 10 +++++-----
 pexec/managed_process_test.go    | 24 +++++++-----------------
 pexec/managed_process_unix.go    |  8 ++++++--
 pexec/managed_process_windows.go | 21 +++++++++++++++------
 4 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/pexec/managed_process.go b/pexec/managed_process.go
index 7f03fecf..a78477e1 100644
--- a/pexec/managed_process.go
+++ b/pexec/managed_process.go
@@ -34,7 +34,8 @@ type ManagedProcess interface {
 
 	// KillGroup will attempt to kill the process group and not wait for completion. Only use this if
 	// comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired).
-	KillGroup()
+	// Returns kill process that can be waited on if desired.
+	KillGroup() (*exec.Cmd, error)
 
 	// Status return nil when the process is both alive and owned.
 	// If err is non-nil, process may be a) alive but not owned or b) dead.
@@ -438,7 +439,7 @@ func (p *managedProcess) Stop() error {
 }
 
 // KillGroup kills the process group.
-func (p *managedProcess) KillGroup() {
+func (p *managedProcess) KillGroup() (*exec.Cmd, error) {
 	// Minimally hold a lock here so that we can signal the
 	// management goroutine to stop. We will attempt to kill the
 	// process even if p.stopped is true.
@@ -450,7 +451,7 @@ func (p *managedProcess) KillGroup() {
 
 	if p.cmd == nil {
 		p.mu.Unlock()
-		return
+		return nil, errAlreadyStopped
 	}
 	p.mu.Unlock()
 
@@ -460,6 +461,5 @@ func (p *managedProcess) KillGroup() {
 	// without a lock held.
 	// We are intentionally not checking the error here, we are already
 	// in a bad state.
-	//nolint:errcheck,gosec
-	p.forceKillGroup()
+	return p.forceKillGroup()
 }
diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 9046aa84..22e47fa3 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -620,7 +620,9 @@ func TestManagedProcessStop(t *testing.T) {
 		<-watcher2.Events
 		<-watcher3.Events
 
-		proc.KillGroup()
+		cmd, err := proc.KillGroup()
+		test.That(t, err, test.ShouldBeNil)
+		test.That(t, cmd.Wait(), test.ShouldBeNil)
 
 		file1SizeAfterKill := getSize(tempFile1)
 		file2SizeAfterKill := getSize(tempFile2)
@@ -630,21 +632,9 @@ func TestManagedProcessStop(t *testing.T) {
 		test.That(t, file2SizeAfterKill, test.ShouldBeGreaterThan, file2SizeBeforeStart)
 		test.That(t, file3SizeAfterKill, test.ShouldBeGreaterThan, file3SizeBeforeStart)
 
-		// since KillGroup does not wait, we might have to check file size a few times as the kill
-		// might take a little to propagate. We want to make sure that the file size stops increasing.
-		testutils.WaitForAssertionWithSleep(t, 300*time.Millisecond, 50, func(tb testing.TB) {
-			tempSize1 := getSize(tempFile1)
-			tempSize2 := getSize(tempFile2)
-			tempSize3 := getSize(tempFile3)
-
-			test.That(t, tempSize1, test.ShouldEqual, file1SizeAfterKill)
-			test.That(t, tempSize2, test.ShouldEqual, file2SizeAfterKill)
-			test.That(t, tempSize3, test.ShouldEqual, file3SizeAfterKill)
-
-			file1SizeAfterKill = tempSize1
-			file2SizeAfterKill = tempSize1
-			file3SizeAfterKill = tempSize1
-		})
+		test.That(t, getSize(tempFile1), test.ShouldEqual, file1SizeAfterKill)
+		test.That(t, getSize(tempFile2), test.ShouldEqual, file2SizeAfterKill)
+		test.That(t, getSize(tempFile3), test.ShouldEqual, file3SizeAfterKill)
 	})
 }
 
@@ -780,4 +770,4 @@ in reality tests should just depend on the methods they rely on. UnixPid is not
 of those methods (for better or worse)`)
 }
 
-func (fp *fakeProcess) KillGroup() {}
+func (fp *fakeProcess) KillGroup() (*exec.Cmd, error) { return nil, errAlreadyStopped }
diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go
index a5be4317..b28c23dd 100644
--- a/pexec/managed_process_unix.go
+++ b/pexec/managed_process_unix.go
@@ -129,11 +129,15 @@ func (p *managedProcess) kill() (bool, error) {
 
 // forceKillGroup kills everything in the process group. This will not wait for completion and may result the
 // kill becoming a zombie process.
-func (p *managedProcess) forceKillGroup() error {
+func (p *managedProcess) forceKillGroup() (*exec.Cmd, error) {
 	pgidStr := strconv.Itoa(-p.cmd.Process.Pid)
 	p.logger.Infof("killing entire process group %d", p.cmd.Process.Pid)
 	//nolint:gosec
-	return exec.Command("kill", "-9", pgidStr).Start()
+	cmd := exec.Command("kill", "-9", pgidStr)
+	if err := cmd.Start(); err != nil {
+		return nil, err
+	}
+	return cmd, nil
 }
 
 func isWaitErrUnknown(err string, forceKilled bool) bool {
diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go
index df758860..f8892e81 100644
--- a/pexec/managed_process_windows.go
+++ b/pexec/managed_process_windows.go
@@ -89,7 +89,7 @@ func (p *managedProcess) kill() (bool, error) {
 		select {
 		case <-timer2.C:
 			p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid)
-			if err := exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Run(); err != nil {
+			if err := forceKillCmd(pidStr).Run(); err != nil {
 				return false, errors.Wrapf(err, "error force killing process tree %d", p.cmd.Process.Pid)
 			}
 			forceKilled = true
@@ -97,7 +97,7 @@ func (p *managedProcess) kill() (bool, error) {
 			timer2.Stop()
 		}
 	} else {
-		if err := exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Run(); err != nil {
+		if err := forceKillCmd(pidStr).Run(); err != nil {
 			return false, errors.Wrapf(err, "error force killing process tree %d", p.cmd.Process.Pid)
 		}
 		forceKilled = true
@@ -106,11 +106,20 @@ func (p *managedProcess) kill() (bool, error) {
 	return forceKilled, nil
 }
 
-// forceKillGroup kills everything in the process tree. This will not wait for completion and may result in a zombie process.
-func (p *managedProcess) forceKillGroup() error {
-	pidStr := strconv.Itoa(p.cmd.Process.Pid)
+func forceKillCmd(pidStr string) *exec.Cmd {
+	return exec.Command("taskkill", "/t", "/f", "/pid", pidStr)
+}
+
+// forceKillGroup kills everything in the process group. This will not wait for completion and may result the
+// kill becoming a zombie process.
+func (p *managedProcess) forceKillGroup() (*exec.Cmd, error) {
+	pgidStr := strconv.Itoa(p.cmd.Process.Pid)
 	p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid)
-	return exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Start()
+	cmd := forceKillCmd(pgidStr)
+	if err := cmd.Start(); err != nil {
+		return nil, err
+	}
+	return cmd, nil
 }
 
 func isWaitErrUnknown(err string, forceKilled bool) bool {

From 957ea627c1d85106747b46c509175cce54caec8b Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 17:11:00 -0500
Subject: [PATCH 07/11] Revert "should hopefully work"

This reverts commit 8ae366c84ab7df715d82589663319bc9fd5264f1.
---
 pexec/managed_process.go         | 10 +++++-----
 pexec/managed_process_test.go    | 24 +++++++++++++++++-------
 pexec/managed_process_unix.go    |  8 ++------
 pexec/managed_process_windows.go | 21 ++++++---------------
 4 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/pexec/managed_process.go b/pexec/managed_process.go
index a78477e1..7f03fecf 100644
--- a/pexec/managed_process.go
+++ b/pexec/managed_process.go
@@ -34,8 +34,7 @@ type ManagedProcess interface {
 
 	// KillGroup will attempt to kill the process group and not wait for completion. Only use this if
 	// comfortable with leaking resources (in cases where exiting the program as quickly as possible is desired).
-	// Returns kill process that can be waited on if desired.
-	KillGroup() (*exec.Cmd, error)
+	KillGroup()
 
 	// Status return nil when the process is both alive and owned.
 	// If err is non-nil, process may be a) alive but not owned or b) dead.
@@ -439,7 +438,7 @@ func (p *managedProcess) Stop() error {
 }
 
 // KillGroup kills the process group.
-func (p *managedProcess) KillGroup() (*exec.Cmd, error) {
+func (p *managedProcess) KillGroup() {
 	// Minimally hold a lock here so that we can signal the
 	// management goroutine to stop. We will attempt to kill the
 	// process even if p.stopped is true.
@@ -451,7 +450,7 @@ func (p *managedProcess) KillGroup() (*exec.Cmd, error) {
 
 	if p.cmd == nil {
 		p.mu.Unlock()
-		return nil, errAlreadyStopped
+		return
 	}
 	p.mu.Unlock()
 
@@ -461,5 +460,6 @@ func (p *managedProcess) KillGroup() (*exec.Cmd, error) {
 	// without a lock held.
 	// We are intentionally not checking the error here, we are already
 	// in a bad state.
-	return p.forceKillGroup()
+	//nolint:errcheck,gosec
+	p.forceKillGroup()
 }
diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 22e47fa3..9046aa84 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -620,9 +620,7 @@ func TestManagedProcessStop(t *testing.T) {
 		<-watcher2.Events
 		<-watcher3.Events
 
-		cmd, err := proc.KillGroup()
-		test.That(t, err, test.ShouldBeNil)
-		test.That(t, cmd.Wait(), test.ShouldBeNil)
+		proc.KillGroup()
 
 		file1SizeAfterKill := getSize(tempFile1)
 		file2SizeAfterKill := getSize(tempFile2)
@@ -632,9 +630,21 @@ func TestManagedProcessStop(t *testing.T) {
 		test.That(t, file2SizeAfterKill, test.ShouldBeGreaterThan, file2SizeBeforeStart)
 		test.That(t, file3SizeAfterKill, test.ShouldBeGreaterThan, file3SizeBeforeStart)
 
-		test.That(t, getSize(tempFile1), test.ShouldEqual, file1SizeAfterKill)
-		test.That(t, getSize(tempFile2), test.ShouldEqual, file2SizeAfterKill)
-		test.That(t, getSize(tempFile3), test.ShouldEqual, file3SizeAfterKill)
+		// since KillGroup does not wait, we might have to check file size a few times as the kill
+		// might take a little to propagate. We want to make sure that the file size stops increasing.
+		testutils.WaitForAssertionWithSleep(t, 300*time.Millisecond, 50, func(tb testing.TB) {
+			tempSize1 := getSize(tempFile1)
+			tempSize2 := getSize(tempFile2)
+			tempSize3 := getSize(tempFile3)
+
+			test.That(t, tempSize1, test.ShouldEqual, file1SizeAfterKill)
+			test.That(t, tempSize2, test.ShouldEqual, file2SizeAfterKill)
+			test.That(t, tempSize3, test.ShouldEqual, file3SizeAfterKill)
+
+			file1SizeAfterKill = tempSize1
+			file2SizeAfterKill = tempSize1
+			file3SizeAfterKill = tempSize1
+		})
 	})
 }
 
@@ -770,4 +780,4 @@ in reality tests should just depend on the methods they rely on. UnixPid is not
 of those methods (for better or worse)`)
 }
 
-func (fp *fakeProcess) KillGroup() (*exec.Cmd, error) { return nil, errAlreadyStopped }
+func (fp *fakeProcess) KillGroup() {}
diff --git a/pexec/managed_process_unix.go b/pexec/managed_process_unix.go
index b28c23dd..a5be4317 100644
--- a/pexec/managed_process_unix.go
+++ b/pexec/managed_process_unix.go
@@ -129,15 +129,11 @@ func (p *managedProcess) kill() (bool, error) {
 
 // forceKillGroup kills everything in the process group. This will not wait for completion and may result the
 // kill becoming a zombie process.
-func (p *managedProcess) forceKillGroup() (*exec.Cmd, error) {
+func (p *managedProcess) forceKillGroup() error {
 	pgidStr := strconv.Itoa(-p.cmd.Process.Pid)
 	p.logger.Infof("killing entire process group %d", p.cmd.Process.Pid)
 	//nolint:gosec
-	cmd := exec.Command("kill", "-9", pgidStr)
-	if err := cmd.Start(); err != nil {
-		return nil, err
-	}
-	return cmd, nil
+	return exec.Command("kill", "-9", pgidStr).Start()
 }
 
 func isWaitErrUnknown(err string, forceKilled bool) bool {
diff --git a/pexec/managed_process_windows.go b/pexec/managed_process_windows.go
index f8892e81..df758860 100644
--- a/pexec/managed_process_windows.go
+++ b/pexec/managed_process_windows.go
@@ -89,7 +89,7 @@ func (p *managedProcess) kill() (bool, error) {
 		select {
 		case <-timer2.C:
 			p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid)
-			if err := forceKillCmd(pidStr).Run(); err != nil {
+			if err := exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Run(); err != nil {
 				return false, errors.Wrapf(err, "error force killing process tree %d", p.cmd.Process.Pid)
 			}
 			forceKilled = true
@@ -97,7 +97,7 @@ func (p *managedProcess) kill() (bool, error) {
 			timer2.Stop()
 		}
 	} else {
-		if err := forceKillCmd(pidStr).Run(); err != nil {
+		if err := exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Run(); err != nil {
 			return false, errors.Wrapf(err, "error force killing process tree %d", p.cmd.Process.Pid)
 		}
 		forceKilled = true
@@ -106,20 +106,11 @@ func (p *managedProcess) kill() (bool, error) {
 	return forceKilled, nil
 }
 
-func forceKillCmd(pidStr string) *exec.Cmd {
-	return exec.Command("taskkill", "/t", "/f", "/pid", pidStr)
-}
-
-// forceKillGroup kills everything in the process group. This will not wait for completion and may result the
-// kill becoming a zombie process.
-func (p *managedProcess) forceKillGroup() (*exec.Cmd, error) {
-	pgidStr := strconv.Itoa(p.cmd.Process.Pid)
+// forceKillGroup kills everything in the process tree. This will not wait for completion and may result in a zombie process.
+func (p *managedProcess) forceKillGroup() error {
+	pidStr := strconv.Itoa(p.cmd.Process.Pid)
 	p.logger.Infof("force killing entire process tree %d", p.cmd.Process.Pid)
-	cmd := forceKillCmd(pgidStr)
-	if err := cmd.Start(); err != nil {
-		return nil, err
-	}
-	return cmd, nil
+	return exec.Command("taskkill", "/t", "/f", "/pid", pidStr).Start()
 }
 
 func isWaitErrUnknown(err string, forceKilled bool) bool {

From fc2c2fab558354d941764c943af11961849bdac8 Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 17:11:26 -0500
Subject: [PATCH 08/11] wait for close

---
 pexec/managed_process_test.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 9046aa84..8fbc40db 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -645,6 +645,9 @@ func TestManagedProcessStop(t *testing.T) {
 			file2SizeAfterKill = tempSize1
 			file3SizeAfterKill = tempSize1
 		})
+
+		// wait on the managingCh to close
+		<-proc.(*managedProcess).managingCh
 	})
 }
 

From 1cbee8ad23ac98f2991ac43dda62dfa9d6b0c133 Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 17:50:13 -0500
Subject: [PATCH 09/11] hopefully

---
 pexec/managed_process_test.go | 37 +++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 8fbc40db..2dc197af 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -455,13 +455,13 @@ func TestManagedProcessStop(t *testing.T) {
 		watcher1, tempFile := testutils.WatchedFile(t)
 
 		bashScript1 := fmt.Sprintf(`
-				trap "echo SIGTERM >> '%s'" SIGTERM
-				echo hello >> '%s'
-				while true
-				do echo hey
-				sleep 1
-				done
-			`, tempFile.Name(), tempFile.Name())
+			trap "echo SIGTERM >> '%s'" SIGTERM
+			echo hello >> '%s'
+			while true
+			do echo hey
+			sleep 1
+			done
+		`, tempFile.Name(), tempFile.Name())
 
 		proc := NewManagedProcess(ProcessConfig{
 			Name:        "bash",
@@ -496,20 +496,20 @@ func TestManagedProcessStop(t *testing.T) {
 		watcher3, tempFile3 := testutils.WatchedFile(t)
 
 		trapScript := `
-				trap "echo SIGTERM >> '%s'" SIGTERM
-				echo hello >> '%s'
-				while true
-				do echo hey
-				sleep 1
-				done
-			`
+			trap "echo SIGTERM >> '%s'" SIGTERM
+			echo hello >> '%s'
+			while true
+			do echo hey
+			sleep 1
+			done
+		`
 
 		bashScript1 := fmt.Sprintf(trapScript, tempFile1.Name(), tempFile1.Name())
 		bashScript2 := fmt.Sprintf(trapScript, tempFile2.Name(), tempFile2.Name())
 		bashScriptParent := fmt.Sprintf(`
-				bash -c '%s' &
-				bash -c '%s' &
-				`+trapScript,
+			bash -c '%s' &
+			bash -c '%s' &
+			`+trapScript,
 			bashScript1,
 			bashScript2,
 			tempFile3.Name(),
@@ -602,6 +602,9 @@ func TestManagedProcessStop(t *testing.T) {
 		proc := NewManagedProcess(ProcessConfig{
 			Name: "bash",
 			Args: []string{"-c", bashScriptParent},
+			// have to set Log to false so that open file descriptors do not cause the test to hang
+			// https://github.com/golang/go/issues/18874
+			Log: false,
 		}, logger)
 
 		// To confirm that the processes have died, confirm that the size of the file stopped increasing

From a9a013b2f06cb92bba095ac75a69e0e93dee4a9d Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 18:08:41 -0500
Subject: [PATCH 10/11] maybe?

---
 pexec/managed_process_test.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 2dc197af..9ddbf698 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -16,6 +16,7 @@ import (
 	"strings"
 	"sync"
 	"sync/atomic"
+	"syscall"
 	"testing"
 	"time"
 
@@ -649,6 +650,12 @@ func TestManagedProcessStop(t *testing.T) {
 			file3SizeAfterKill = tempSize1
 		})
 
+		// on certain systems, we have to send another signal to make sure the cmd.Wait() in
+		// the manage goroutine actually returns
+		if err := proc.(*managedProcess).cmd.Process.Signal(syscall.SIGTERM); err != nil {
+			test.That(t, errors.Is(err, os.ErrProcessDone), test.ShouldBeFalse)
+		}
+
 		// wait on the managingCh to close
 		<-proc.(*managedProcess).managingCh
 	})

From ffa8db3774344cc20f86c8e81817191647379aaa Mon Sep 17 00:00:00 2001
From: Cheuk Tse <cheuk@viam.com>
Date: Thu, 9 Jan 2025 18:15:59 -0500
Subject: [PATCH 11/11] better doc

---
 pexec/managed_process_test.go | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/pexec/managed_process_test.go b/pexec/managed_process_test.go
index 9ddbf698..059c5564 100644
--- a/pexec/managed_process_test.go
+++ b/pexec/managed_process_test.go
@@ -570,6 +570,9 @@ func TestManagedProcessStop(t *testing.T) {
 		<-watcher1.Events
 		test.That(t, proc.Stop(), test.ShouldBeNil)
 	})
+}
+
+func TestManagedProcessKillGroup(t *testing.T) {
 	t.Run("kill signaling with children", func(t *testing.T) {
 		if runtime.GOOS == "windows" {
 			t.Skip("cannot test this on windows")
@@ -582,18 +585,18 @@ func TestManagedProcessStop(t *testing.T) {
 
 		// this script writes a string to the specified file every 100ms
 		script := `
-			while true
-			do echo hello >> '%s'
-			sleep 0.1
-			done
-		`
+		while true
+		do echo hello >> '%s'
+		sleep 0.1
+		done
+	`
 
 		bashScript1 := fmt.Sprintf(script, tempFile1.Name())
 		bashScript2 := fmt.Sprintf(script, tempFile2.Name())
 		bashScriptParent := fmt.Sprintf(`
-			bash -c '%s' &
-			bash -c '%s' &
-			`+script,
+		bash -c '%s' &
+		bash -c '%s' &
+		`+script,
 			bashScript1,
 			bashScript2,
 			tempFile3.Name(),
@@ -603,9 +606,6 @@ func TestManagedProcessStop(t *testing.T) {
 		proc := NewManagedProcess(ProcessConfig{
 			Name: "bash",
 			Args: []string{"-c", bashScriptParent},
-			// have to set Log to false so that open file descriptors do not cause the test to hang
-			// https://github.com/golang/go/issues/18874
-			Log: false,
 		}, logger)
 
 		// To confirm that the processes have died, confirm that the size of the file stopped increasing
@@ -650,8 +650,10 @@ func TestManagedProcessStop(t *testing.T) {
 			file3SizeAfterKill = tempSize1
 		})
 
-		// on certain systems, we have to send another signal to make sure the cmd.Wait() in
-		// the manage goroutine actually returns
+		// in CI, we have to send another signal to make sure the cmd.Wait() in
+		// the manage goroutine actually returns.
+		// We do not care about the error if it is expected.
+		// maybe related to https://github.com/golang/go/issues/18874
 		if err := proc.(*managedProcess).cmd.Process.Signal(syscall.SIGTERM); err != nil {
 			test.That(t, errors.Is(err, os.ErrProcessDone), test.ShouldBeFalse)
 		}