Skip to content

Commit a70c788

Browse files
authored
More Durable Shell Bug Fixes (#2822)
* Lots of work on ResyncController, issues with stale data, and connection switching * Add+Fix termination messages for shell controller * Prune done/detached jobs on a timer * Remove circular dependencies from wcore, use blockclose event in both jobcontroller and blockcontroller instead of direct calls * Fix concurrency bugs with job termination * Send object update events when modifying the block datastructure in jobcontroller
1 parent ff9923f commit a70c788

20 files changed

Lines changed: 444 additions & 161 deletions

File tree

cmd/server/main-server.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,7 @@ func main() {
568568
go startupActivityUpdate(firstLaunch) // must be after startConfigWatcher()
569569
blocklogger.InitBlockLogger()
570570
jobcontroller.InitJobController()
571+
blockcontroller.InitBlockController()
571572
wcore.InitTabIndicatorStore()
572573
go func() {
573574
defer func() {

cmd/wsh/cmd/wshcmd-jobdebug.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,12 @@ var jobDebugDetachJobCmd = &cobra.Command{
9595
RunE: jobDebugDetachJobRun,
9696
}
9797

98+
var jobDebugBlockAttachmentCmd = &cobra.Command{
99+
Use: "blockattachment",
100+
Short: "show the attached job for a block",
101+
RunE: jobDebugBlockAttachmentRun,
102+
}
103+
98104
var jobIdFlag string
99105
var jobDebugJsonFlag bool
100106
var jobConnFlag string
@@ -120,6 +126,7 @@ func init() {
120126
jobDebugCmd.AddCommand(jobDebugStartCmd)
121127
jobDebugCmd.AddCommand(jobDebugAttachJobCmd)
122128
jobDebugCmd.AddCommand(jobDebugDetachJobCmd)
129+
jobDebugCmd.AddCommand(jobDebugBlockAttachmentCmd)
123130

124131
jobDebugListCmd.Flags().BoolVar(&jobDebugJsonFlag, "json", false, "output as JSON")
125132

@@ -418,3 +425,23 @@ func jobDebugDetachJobRun(cmd *cobra.Command, args []string) error {
418425
fmt.Printf("Job %s detached successfully\n", detachJobIdFlag)
419426
return nil
420427
}
428+
429+
func jobDebugBlockAttachmentRun(cmd *cobra.Command, args []string) error {
430+
blockORef, err := resolveBlockArg()
431+
if err != nil {
432+
return err
433+
}
434+
435+
blockId := blockORef.OID
436+
jobStatus, err := wshclient.BlockJobStatusCommand(RpcClient, blockId, &wshrpc.RpcOpts{Timeout: 5000})
437+
if err != nil {
438+
return fmt.Errorf("getting block job status: %w", err)
439+
}
440+
441+
if jobStatus.JobId == "" {
442+
fmt.Printf("Block %s: no attached job\n", blockId)
443+
} else {
444+
fmt.Printf("Block %s: attached to job %s\n", blockId, jobStatus.JobId)
445+
}
446+
return nil
447+
}

frontend/app/block/blockframe-header.tsx

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,18 @@ import * as jotai from "jotai";
2323
import * as React from "react";
2424
import { BlockFrameProps } from "./blocktypes";
2525

26-
function getDurableIconProps(jobStatus: BlockJobStatusData, connStatus: ConnStatus) {
26+
function getDurableIconProps(jobStatus: BlockJobStatusData, connStatus: ConnStatus, isConfigedDurable?: boolean | null) {
2727
let color = "text-muted";
2828
let titleText = "Durable Session";
29+
let iconType: "fa-solid" | "fa-regular" = "fa-solid";
30+
31+
if (isConfigedDurable === false) {
32+
color = "text-muted";
33+
titleText = "Standard Session";
34+
iconType = "fa-regular";
35+
return { color, titleText, iconType };
36+
}
37+
2938
const status = jobStatus?.status;
3039
if (status === "connected") {
3140
color = "text-sky-500";
@@ -57,7 +66,7 @@ function getDurableIconProps(jobStatus: BlockJobStatusData, connStatus: ConnStat
5766
titleText = "No Session";
5867
}
5968
}
60-
return { color, titleText };
69+
return { color, titleText, iconType };
6170
}
6271

6372
function handleHeaderContextMenu(
@@ -209,6 +218,7 @@ const BlockFrame_Header = ({
209218
const preIconButton = util.useAtomValueSafe(viewModel?.preIconButton);
210219
const useTermHeader = util.useAtomValueSafe(viewModel?.useTermHeader);
211220
const termDurableStatus = util.useAtomValueSafe(viewModel?.termDurableStatus);
221+
const termConfigedDurable = util.useAtomValueSafe(viewModel?.termConfigedDurable);
212222
const hideViewName = util.useAtomValueSafe(viewModel?.hideViewName);
213223
const magnified = jotai.useAtomValue(nodeModel.isMagnified);
214224
const prevMagifiedState = React.useRef(magnified);
@@ -230,7 +240,11 @@ const BlockFrame_Header = ({
230240

231241
const viewIconElem = getViewIconElem(viewIconUnion, blockData);
232242

233-
const { color: durableIconColor, titleText: durableTitle } = getDurableIconProps(termDurableStatus, connStatus);
243+
const { color: durableIconColor, titleText: durableTitle, iconType: durableIconType } = getDurableIconProps(
244+
termDurableStatus,
245+
connStatus,
246+
termConfigedDurable
247+
);
234248

235249
return (
236250
<div
@@ -257,9 +271,9 @@ const BlockFrame_Header = ({
257271
isTerminalBlock={isTerminalBlock}
258272
/>
259273
)}
260-
{useTermHeader && termDurableStatus != null && (
274+
{useTermHeader && termConfigedDurable != null && (
261275
<div className="iconbutton disabled text-[13px] ml-[-4px]" key="durable-status">
262-
<i className={`fa-sharp fa-solid fa-shield ${durableIconColor}`} title={durableTitle} />
276+
<i className={`fa-sharp ${durableIconType} fa-shield ${durableIconColor}`} title={durableTitle} />
263277
</div>
264278
)}
265279
<HeaderTextElems viewModel={viewModel} blockData={blockData} preview={preview} error={error} />

frontend/app/store/global.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,10 @@ function getSingleOrefAtomCache(oref: string): Map<string, Atom<any>> {
425425
return orefCache;
426426
}
427427

428-
// this function must be kept up to date with IsBlockTermDurable in pkg/jobcontroller/jobcontroller.go
429-
function getBlockTermDurableAtom(blockId: string): Atom<boolean> {
428+
// this function should be kept up to date with IsBlockTermDurable in pkg/jobcontroller/jobcontroller.go
429+
// Note: null/false both map to false in the Go code, but this returns a special null value
430+
// to indicate when the block is not even eligible to be durable
431+
function getBlockTermDurableAtom(blockId: string): Atom<null | boolean> {
430432
const blockCache = getSingleBlockAtomCache(blockId);
431433
const durableAtomName = "#termdurable";
432434
let durableAtom = blockCache.get(durableAtomName);
@@ -438,23 +440,23 @@ function getBlockTermDurableAtom(blockId: string): Atom<boolean> {
438440
const block = get(blockAtom);
439441

440442
if (block == null) {
441-
return false;
443+
return null;
442444
}
443445

444446
// Check if view is "term", and controller is "shell"
445447
if (block.meta?.view != "term" || block.meta?.controller != "shell") {
446-
return false;
448+
return null;
447449
}
448450

449451
// 1. Check if block has a JobId
450452
if (block.jobid != null && block.jobid != "") {
451453
return true;
452454
}
453455

454-
// 2. Check if connection is local or WSL (not durable)
456+
// 2. Check if connection is local or WSL (not eligible for durability)
455457
const connName = block.meta?.connection ?? "";
456458
if (isLocalConnName(connName) || isWslConnName(connName)) {
457-
return false;
459+
return null;
458460
}
459461

460462
// 3. Check config hierarchy: blockmeta → connection → global (default true)

frontend/app/view/term/term-model.ts

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import {
3535
import * as services from "@/store/services";
3636
import * as keyutil from "@/util/keyutil";
3737
import { isMacOS, isWindows } from "@/util/platformutil";
38-
import { boundNumber, stringToBase64 } from "@/util/util";
38+
import { boundNumber, fireAndForget, stringToBase64 } from "@/util/util";
3939
import * as jotai from "jotai";
4040
import * as React from "react";
4141
import { getBlockingCommand } from "./shellblocking";
@@ -79,6 +79,7 @@ export class TermViewModel implements ViewModel {
7979
isCmdController: jotai.Atom<boolean>;
8080
isRestarting: jotai.PrimitiveAtom<boolean>;
8181
termDurableStatus: jotai.Atom<BlockJobStatusData | null>;
82+
termConfigedDurable: jotai.Atom<null | boolean>;
8283
searchAtoms?: SearchAtoms;
8384

8485
constructor(blockId: string, nodeModel: BlockNodeModel, tabModel: TabModel) {
@@ -312,7 +313,7 @@ export class TermViewModel implements ViewModel {
312313
const buttonDecl: IconButtonDecl = {
313314
elemtype: "iconbutton",
314315
icon: iconName,
315-
click: this.forceRestartController.bind(this),
316+
click: () => fireAndForget(() => this.forceRestartController()),
316317
title: title,
317318
};
318319
rtn.push(buttonDecl);
@@ -351,6 +352,7 @@ export class TermViewModel implements ViewModel {
351352
}
352353
return blockJobStatus;
353354
});
355+
this.termConfigedDurable = getBlockTermDurableAtom(this.blockId);
354356
this.blockJobStatusAtom = jotai.atom(null) as jotai.PrimitiveAtom<BlockJobStatusData>;
355357
this.blockJobStatusVersionTs = 0;
356358
const initialBlockJobStatus = RpcApi.BlockJobStatusCommand(TabRpcClient, blockId);
@@ -695,7 +697,7 @@ export class TermViewModel implements ViewModel {
695697
}
696698
const shellProcStatus = globalStore.get(this.shellProcStatus);
697699
if ((shellProcStatus == "done" || shellProcStatus == "init") && keyutil.checkKeyPressed(waveEvent, "Enter")) {
698-
this.forceRestartController();
700+
fireAndForget(() => this.forceRestartController());
699701
return false;
700702
}
701703
const appHandled = appHandleKeyDown(waveEvent);
@@ -714,28 +716,28 @@ export class TermViewModel implements ViewModel {
714716
});
715717
}
716718

717-
forceRestartController() {
719+
async forceRestartController() {
718720
if (globalStore.get(this.isRestarting)) {
719721
return;
720722
}
721723
this.triggerRestartAtom();
724+
await RpcApi.ControllerDestroyCommand(TabRpcClient, this.blockId);
722725
const termsize = {
723726
rows: this.termRef.current?.terminal?.rows,
724727
cols: this.termRef.current?.terminal?.cols,
725728
};
726-
const prtn = RpcApi.ControllerResyncCommand(TabRpcClient, {
729+
await RpcApi.ControllerResyncCommand(TabRpcClient, {
727730
tabid: globalStore.get(atoms.staticTabId),
728731
blockid: this.blockId,
729732
forcerestart: true,
730733
rtopts: { termsize: termsize },
731734
});
732-
prtn.catch((e) => console.log("error controller resync (force restart)", e));
733735
}
734736

735-
async restartSessionInStandardMode() {
737+
async restartSessionWithDurability(isDurable: boolean) {
736738
await RpcApi.SetMetaCommand(TabRpcClient, {
737739
oref: WOS.makeORef("block", this.blockId),
738-
meta: { "term:durable": false },
740+
meta: { "term:durable": isDurable },
739741
});
740742
await RpcApi.ControllerDestroyCommand(TabRpcClient, this.blockId);
741743
const termsize = {
@@ -1040,7 +1042,7 @@ export class TermViewModel implements ViewModel {
10401042
});
10411043
advancedSubmenu.push({
10421044
label: "Force Restart Controller",
1043-
click: this.forceRestartController.bind(this),
1045+
click: () => fireAndForget(() => this.forceRestartController()),
10441046
});
10451047
const isClearOnStart = blockData?.meta?.["cmd:clearonstart"];
10461048
advancedSubmenu.push({
@@ -1145,7 +1147,17 @@ export class TermViewModel implements ViewModel {
11451147
submenu: [
11461148
{
11471149
label: "Restart Session in Standard Mode",
1148-
click: () => this.restartSessionInStandardMode(),
1150+
click: () => this.restartSessionWithDurability(false),
1151+
},
1152+
],
1153+
});
1154+
} else if (isDurable === false) {
1155+
advancedSubmenu.push({
1156+
label: "Session Durability",
1157+
submenu: [
1158+
{
1159+
label: "Restart Session in Durable Mode",
1160+
click: () => this.restartSessionWithDurability(true),
11491161
},
11501162
],
11511163
});

frontend/types/custom.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ declare global {
312312
viewText?: jotai.Atom<string | HeaderElem[]>;
313313

314314
termDurableStatus?: jotai.Atom<BlockJobStatusData | null>;
315+
termConfigedDurable?: jotai.Atom<null | boolean>;
315316

316317
// Icon button displayed before the title in the header.
317318
preIconButton?: jotai.Atom<IconButtonDecl>;

pkg/blockcontroller/blockcontroller.go

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/wavetermdev/waveterm/pkg/wavebase"
2424
"github.com/wavetermdev/waveterm/pkg/waveobj"
2525
"github.com/wavetermdev/waveterm/pkg/wps"
26+
"github.com/wavetermdev/waveterm/pkg/wshrpc/wshclient"
2627
"github.com/wavetermdev/waveterm/pkg/wslconn"
2728
"github.com/wavetermdev/waveterm/pkg/wstore"
2829
)
@@ -118,6 +119,24 @@ func getAllControllers() map[string]Controller {
118119
return result
119120
}
120121

122+
func InitBlockController() {
123+
rpcClient := wshclient.GetBareRpcClient()
124+
rpcClient.EventListener.On(wps.Event_BlockClose, handleBlockCloseEvent)
125+
wshclient.EventSubCommand(rpcClient, wps.SubscriptionRequest{
126+
Event: wps.Event_BlockClose,
127+
AllScopes: true,
128+
}, nil)
129+
}
130+
131+
func handleBlockCloseEvent(event *wps.WaveEvent) {
132+
blockId, ok := event.Data.(string)
133+
if !ok {
134+
log.Printf("[blockclose] invalid event data type")
135+
return
136+
}
137+
go DestroyBlockController(blockId)
138+
}
139+
121140
// Public API Functions
122141

123142
func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts *waveobj.RuntimeOpts, force bool) error {
@@ -131,10 +150,22 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts
131150
}
132151

133152
controllerName := blockData.Meta.GetString(waveobj.MetaKey_Controller, "")
153+
connName := blockData.Meta.GetString(waveobj.MetaKey_Connection, "")
134154

135155
// Get existing controller
136156
existing := getController(blockId)
137157

158+
// Check for connection change FIRST - always destroy on conn change
159+
if existing != nil {
160+
existingStatus := existing.GetRuntimeStatus()
161+
if existingStatus.ShellProcConnName != connName {
162+
log.Printf("stopping blockcontroller %s due to conn change (from %q to %q)\n", blockId, existingStatus.ShellProcConnName, connName)
163+
DestroyBlockController(blockId)
164+
time.Sleep(100 * time.Millisecond)
165+
existing = nil
166+
}
167+
}
168+
138169
// If no controller needed, stop existing if present
139170
if controllerName == "" {
140171
if existing != nil {
@@ -144,12 +175,10 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts
144175
}
145176

146177
// Determine if we should use DurableShellController vs ShellController
147-
connName := blockData.Meta.GetString(waveobj.MetaKey_Connection, "")
148-
shouldUseDurableShellController := jobcontroller.IsBlockTermDurable(blockData) && controllerName == BlockController_Shell
178+
shouldUseDurableShellController := controllerName == BlockController_Shell && jobcontroller.IsBlockIdTermDurable(blockId)
149179

150180
// Check if we need to morph controller type
151181
if existing != nil {
152-
existingStatus := existing.GetRuntimeStatus()
153182
needsReplace := false
154183

155184
switch existing.(type) {
@@ -175,19 +204,6 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts
175204
time.Sleep(100 * time.Millisecond)
176205
existing = nil
177206
}
178-
179-
// For shell/cmd, check if connection changed (but not for job controller)
180-
if !needsReplace && (controllerName == BlockController_Shell || controllerName == BlockController_Cmd) {
181-
if _, isShellController := existing.(*ShellController); isShellController {
182-
// Check if connection changed, including between different local connections
183-
if existingStatus.ShellProcStatus == Status_Running && existingStatus.ShellProcConnName != connName {
184-
log.Printf("stopping blockcontroller %s due to conn change (from %q to %q)\n", blockId, existingStatus.ShellProcConnName, connName)
185-
DestroyBlockController(blockId)
186-
time.Sleep(100 * time.Millisecond)
187-
existing = nil
188-
}
189-
}
190-
}
191207
}
192208

193209
// Force restart if requested
@@ -237,7 +253,6 @@ func ResyncController(ctx context.Context, tabId string, blockId string, rtOpts
237253
if status.ShellProcStatus == Status_Init {
238254
// For shell/cmd, check connection status first (for non-local connections)
239255
if controllerName == BlockController_Shell || controllerName == BlockController_Cmd {
240-
connName := blockData.Meta.GetString(waveobj.MetaKey_Connection, "")
241256
if !conncontroller.IsLocalConnName(connName) {
242257
err = CheckConnStatus(blockId)
243258
if err != nil {

0 commit comments

Comments
 (0)