|
8 | 8 | "fmt" |
9 | 9 | "os" |
10 | 10 | "path/filepath" |
| 11 | + "sync" |
| 12 | + "syscall" |
11 | 13 | "testing" |
12 | 14 |
|
13 | 15 | v1 "github.com/google/go-containerregistry/pkg/v1" |
@@ -726,3 +728,191 @@ func TestWithBackend(t *testing.T) { |
726 | 728 | WithBackend(backend).apply(cfg) |
727 | 729 | assert.Equal(t, backend, cfg.backend) |
728 | 730 | } |
| 731 | + |
| 732 | +// --- terminateStaleRunner tests --- |
| 733 | + |
| 734 | +func TestTerminateStaleRunner_NoStateFile(t *testing.T) { |
| 735 | + t.Parallel() |
| 736 | + |
| 737 | + dataDir := t.TempDir() |
| 738 | + cfg := defaultConfig() |
| 739 | + cfg.dataDir = dataDir |
| 740 | + |
| 741 | + // Should not panic or error when no state file exists. |
| 742 | + terminateStaleRunner(cfg) |
| 743 | +} |
| 744 | + |
| 745 | +func TestTerminateStaleRunner_DeadProcess(t *testing.T) { |
| 746 | + t.Parallel() |
| 747 | + |
| 748 | + dataDir := t.TempDir() |
| 749 | + |
| 750 | + // Write state with a PID that doesn't exist. |
| 751 | + mgr := state.NewManager(dataDir) |
| 752 | + ls, err := mgr.LoadAndLock(context.Background()) |
| 753 | + require.NoError(t, err) |
| 754 | + ls.State.Active = true |
| 755 | + ls.State.PID = 2147483647 // max PID, almost certainly dead |
| 756 | + require.NoError(t, ls.Save()) |
| 757 | + ls.Release() |
| 758 | + |
| 759 | + cfg := defaultConfig() |
| 760 | + cfg.dataDir = dataDir |
| 761 | + |
| 762 | + var killCalled bool |
| 763 | + cfg.killProcess = func(_ int, _ syscall.Signal) error { |
| 764 | + killCalled = true |
| 765 | + return nil |
| 766 | + } |
| 767 | + cfg.processAlive = func(_ int) bool { return false } |
| 768 | + |
| 769 | + terminateStaleRunner(cfg) |
| 770 | + assert.False(t, killCalled, "should not attempt to kill a dead process") |
| 771 | +} |
| 772 | + |
| 773 | +func TestTerminateStaleRunner_AliveProcess_GracefulExit(t *testing.T) { |
| 774 | + t.Parallel() |
| 775 | + |
| 776 | + dataDir := t.TempDir() |
| 777 | + |
| 778 | + mgr := state.NewManager(dataDir) |
| 779 | + ls, err := mgr.LoadAndLock(context.Background()) |
| 780 | + require.NoError(t, err) |
| 781 | + ls.State.Active = true |
| 782 | + ls.State.PID = 99999 |
| 783 | + require.NoError(t, ls.Save()) |
| 784 | + ls.Release() |
| 785 | + |
| 786 | + cfg := defaultConfig() |
| 787 | + cfg.dataDir = dataDir |
| 788 | + |
| 789 | + var mu sync.Mutex |
| 790 | + var signals []syscall.Signal |
| 791 | + aliveCount := 0 |
| 792 | + |
| 793 | + cfg.killProcess = func(pid int, sig syscall.Signal) error { |
| 794 | + assert.Equal(t, 99999, pid) |
| 795 | + mu.Lock() |
| 796 | + signals = append(signals, sig) |
| 797 | + mu.Unlock() |
| 798 | + return nil |
| 799 | + } |
| 800 | + cfg.processAlive = func(_ int) bool { |
| 801 | + mu.Lock() |
| 802 | + defer mu.Unlock() |
| 803 | + aliveCount++ |
| 804 | + // Process is alive on first check (before SIGTERM), dead on second |
| 805 | + // (after SIGTERM + first poll). |
| 806 | + return aliveCount <= 1 |
| 807 | + } |
| 808 | + |
| 809 | + terminateStaleRunner(cfg) |
| 810 | + |
| 811 | + mu.Lock() |
| 812 | + defer mu.Unlock() |
| 813 | + require.Len(t, signals, 1, "should only send SIGTERM") |
| 814 | + assert.Equal(t, syscall.SIGTERM, signals[0]) |
| 815 | +} |
| 816 | + |
| 817 | +func TestTerminateStaleRunner_AliveProcess_RequiresKill(t *testing.T) { |
| 818 | + t.Parallel() |
| 819 | + |
| 820 | + dataDir := t.TempDir() |
| 821 | + |
| 822 | + mgr := state.NewManager(dataDir) |
| 823 | + ls, err := mgr.LoadAndLock(context.Background()) |
| 824 | + require.NoError(t, err) |
| 825 | + ls.State.Active = true |
| 826 | + ls.State.PID = 99999 |
| 827 | + require.NoError(t, ls.Save()) |
| 828 | + ls.Release() |
| 829 | + |
| 830 | + cfg := defaultConfig() |
| 831 | + cfg.dataDir = dataDir |
| 832 | + |
| 833 | + var mu sync.Mutex |
| 834 | + var signals []syscall.Signal |
| 835 | + |
| 836 | + cfg.killProcess = func(pid int, sig syscall.Signal) error { |
| 837 | + assert.Equal(t, 99999, pid) |
| 838 | + mu.Lock() |
| 839 | + signals = append(signals, sig) |
| 840 | + mu.Unlock() |
| 841 | + return nil |
| 842 | + } |
| 843 | + // Process never exits on its own. |
| 844 | + cfg.processAlive = func(_ int) bool { return true } |
| 845 | + |
| 846 | + terminateStaleRunner(cfg) |
| 847 | + |
| 848 | + mu.Lock() |
| 849 | + defer mu.Unlock() |
| 850 | + require.Len(t, signals, 2, "should send SIGTERM then SIGKILL") |
| 851 | + assert.Equal(t, syscall.SIGTERM, signals[0]) |
| 852 | + assert.Equal(t, syscall.SIGKILL, signals[1]) |
| 853 | +} |
| 854 | + |
| 855 | +func TestTerminateStaleRunner_ZeroPID(t *testing.T) { |
| 856 | + t.Parallel() |
| 857 | + |
| 858 | + dataDir := t.TempDir() |
| 859 | + |
| 860 | + // Write state with PID=0 (clean shutdown). |
| 861 | + mgr := state.NewManager(dataDir) |
| 862 | + ls, err := mgr.LoadAndLock(context.Background()) |
| 863 | + require.NoError(t, err) |
| 864 | + ls.State.Active = false |
| 865 | + ls.State.PID = 0 |
| 866 | + require.NoError(t, ls.Save()) |
| 867 | + ls.Release() |
| 868 | + |
| 869 | + cfg := defaultConfig() |
| 870 | + cfg.dataDir = dataDir |
| 871 | + |
| 872 | + var killCalled bool |
| 873 | + cfg.killProcess = func(_ int, _ syscall.Signal) error { |
| 874 | + killCalled = true |
| 875 | + return nil |
| 876 | + } |
| 877 | + |
| 878 | + terminateStaleRunner(cfg) |
| 879 | + assert.False(t, killCalled, "should not attempt to kill PID 0") |
| 880 | +} |
| 881 | + |
| 882 | +func TestRun_WithCleanDataDir_TerminatesStaleRunner(t *testing.T) { |
| 883 | + t.Parallel() |
| 884 | + |
| 885 | + dataDir := t.TempDir() |
| 886 | + |
| 887 | + // Pre-populate state as if a previous runner crashed. |
| 888 | + mgr := state.NewManager(dataDir) |
| 889 | + ls, err := mgr.LoadAndLock(context.Background()) |
| 890 | + require.NoError(t, err) |
| 891 | + ls.State.Active = true |
| 892 | + ls.State.PID = 2147483647 // dead PID |
| 893 | + require.NoError(t, ls.Save()) |
| 894 | + ls.Release() |
| 895 | + |
| 896 | + rootfsDir := filepath.Join(dataDir, "rootfs") |
| 897 | + require.NoError(t, os.MkdirAll(rootfsDir, 0o755)) |
| 898 | + |
| 899 | + handle := &mockVMHandle{id: "1234", alive: true} |
| 900 | + netProv := &mockNetProvider{sockPath: "/tmp/fake.sock"} |
| 901 | + |
| 902 | + vm, err := Run(context.Background(), "test:latest", |
| 903 | + WithDataDir(dataDir), |
| 904 | + WithCleanDataDir(), |
| 905 | + WithPreflightChecker(preflight.NewEmpty()), |
| 906 | + WithRootFSPath(rootfsDir), |
| 907 | + WithNetProvider(netProv), |
| 908 | + WithBackend(&mockBackend{startHandle: handle}), |
| 909 | + ) |
| 910 | + require.NoError(t, err) |
| 911 | + require.NotNil(t, vm) |
| 912 | + |
| 913 | + // The new state should reflect the new VM, not the stale one. |
| 914 | + loaded, loadErr := mgr.Load() |
| 915 | + require.NoError(t, loadErr) |
| 916 | + assert.True(t, loaded.Active) |
| 917 | + assert.Equal(t, 1234, loaded.PID) |
| 918 | +} |
0 commit comments