Skip to content

Commit 1a56737

Browse files
blinkagent[bot]blink-so[bot]ethanndickson
authored
fix: move defer out of retry loop in waitForJob (#308)
## Problem The `waitForJob` function had a `defer closer.Close()` inside a `for` loop. Go defers execute when the *function* returns, not when the loop iteration ends. This means: 1. Each retry iteration adds a new deferred close that only runs when the function returns, not at the end of each iteration. 2. On retries, multiple closers accumulate unnecessarily. Note: In v0.0.12, this was also a nil pointer dereference because `defer closer.Close()` was called *before* checking the error from `TemplateVersionLogsAfter`, causing a panic when `closer` was nil. That ordering was fixed in v0.0.13, but the defer-in-loop issue remained. ## Fix Extract the loop body into a separate `waitForJobOnce` function so the `defer` executes properly at the end of each attempt, closing the log stream before retrying. ## Context This was observed as a SIGSEGV panic in CI when using `terraform-provider-coderd` v0.0.12 in the `coder/coder` dogfood workflow: ``` panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x1000fcd] goroutine 41 [running]: github.com/coder/terraform-provider-coderd/internal/provider.waitForJob(...) github.com/coder/terraform-provider-coderd/internal/provider/template_resource.go:1086 +0x1ad ``` --------- Co-authored-by: blink-so[bot] <211532188+blink-so[bot]@users.noreply.github.com> Co-authored-by: Ethan Dickson <ethanndickson@gmail.com>
1 parent 7bd3d02 commit 1a56737

4 files changed

Lines changed: 315 additions & 40 deletions

File tree

go.mod

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ go 1.25.6
44

55
require (
66
cdr.dev/slog/v3 v3.0.0-rc1
7-
github.com/coder/coder/v2 v2.31.0
7+
github.com/coder/coder/v2 v2.30.1
8+
github.com/coder/retry v1.5.1
9+
github.com/coder/websocket v1.8.14
810
github.com/docker/docker v28.5.2+incompatible
911
github.com/docker/go-connections v0.6.0
1012
github.com/google/uuid v1.6.0
@@ -63,7 +65,6 @@ require (
6365
github.com/coder/pretty v0.0.0-20230908205945-e89ba86370e0 // indirect
6466
github.com/coder/serpent v0.13.0 // indirect
6567
github.com/coder/terraform-provider-coder/v2 v2.13.1 // indirect
66-
github.com/coder/websocket v1.8.14 // indirect
6768
github.com/containerd/errdefs v1.0.0 // indirect
6869
github.com/containerd/errdefs/pkg v0.3.0 // indirect
6970
github.com/coreos/go-oidc/v3 v3.17.0 // indirect

go.sum

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,12 @@ github.com/clipperhouse/uax29/v2 v2.3.0 h1:SNdx9DVUqMoBuBoW3iLOj4FQv3dN5mDtuqwuh
118118
github.com/clipperhouse/uax29/v2 v2.3.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
119119
github.com/cloudflare/circl v1.6.3 h1:9GPOhQGF9MCYUeXyMYlqTR6a5gTrgR/fBLXvUgtVcg8=
120120
github.com/cloudflare/circl v1.6.3/go.mod h1:2eXP6Qfat4O/Yhh8BznvKnJ+uzEoTQ6jVKJRn81BiS4=
121-
github.com/coder/coder/v2 v2.31.0 h1:Qx6yLXyaaxZ5k9UOTmUQ9457iz7togEnHtc05xylIks=
122-
github.com/coder/coder/v2 v2.31.0/go.mod h1:2DE8CPEuDj07THeNcbClDlv0bxR9ThA3A4TOHxBHuZY=
121+
github.com/coder/coder/v2 v2.30.1 h1:5dxGKxWx80xb6lNd958y8Y4h3fBbQubDgIooHTTv/RQ=
122+
github.com/coder/coder/v2 v2.30.1/go.mod h1:w40ThqnpVr727SVnu3wwUrK2woxNx1MrV1zVxxABimk=
123123
github.com/coder/pretty v0.0.0-20230908205945-e89ba86370e0 h1:3A0ES21Ke+FxEM8CXx9n47SZOKOpgSE1bbJzlE4qPVs=
124124
github.com/coder/pretty v0.0.0-20230908205945-e89ba86370e0/go.mod h1:5UuS2Ts+nTToAMeOjNlnHFkPahrtDkmpydBen/3wgZc=
125+
github.com/coder/retry v1.5.1 h1:iWu8YnD8YqHs3XwqrqsjoBTAVqT9ml6z9ViJ2wlMiqc=
126+
github.com/coder/retry v1.5.1/go.mod h1:blHMk9vs6LkoRT9ZHyuZo360cufXEhrxqvEzeMtRGoY=
125127
github.com/coder/serpent v0.13.0 h1:6EoWjpEypkb8cS6i0eCF4qoAv9vrEVaX26RW+3FMMvo=
126128
github.com/coder/serpent v0.13.0/go.mod h1:7OIvFBYMd+OqarMy5einBl8AtRr8LliopVU7pyrwucY=
127129
github.com/coder/terraform-provider-coder/v2 v2.13.1 h1:dtPaJUvueFm+XwBPUMWQCc5Z1QUQBW4B4RNyzX4h4y8=

internal/provider/template_resource.go

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ import (
88
"io"
99
"slices"
1010
"strings"
11+
"time"
1112

1213
"cdr.dev/slog/v3"
1314
"github.com/coder/coder/v2/coderd/util/ptr"
1415
"github.com/coder/coder/v2/codersdk"
1516
"github.com/coder/coder/v2/provisionersdk"
17+
"github.com/coder/retry"
1618
"github.com/coder/terraform-provider-coderd/internal/codersdkvalidator"
1719
"github.com/google/uuid"
1820
"github.com/hashicorp/terraform-plugin-framework-validators/listvalidator"
@@ -1104,49 +1106,70 @@ func uploadDirectory(ctx context.Context, client *codersdk.Client, logger slog.L
11041106

11051107
func waitForJob(ctx context.Context, client *codersdk.Client, version *codersdk.TemplateVersion) ([]codersdk.ProvisionerJobLog, error) {
11061108
const maxRetries = 3
1107-
var jobLogs []codersdk.ProvisionerJobLog
1108-
for retries := 0; retries < maxRetries; retries++ {
1109-
logs, closer, err := client.TemplateVersionLogsAfter(ctx, version.ID, 0)
1109+
var allLogs []codersdk.ProvisionerJobLog
1110+
var lastLogID int64
1111+
1112+
for attempts, retrier := 0, retry.New(500*time.Millisecond, 5*time.Second); attempts < maxRetries && retrier.Wait(ctx); attempts++ {
1113+
logs, done, err := waitForJobOnce(ctx, client, version, lastLogID)
1114+
allLogs = append(allLogs, logs...)
1115+
if len(logs) > 0 {
1116+
lastLogID = logs[len(logs)-1].ID
1117+
}
11101118
if err != nil {
1111-
return jobLogs, fmt.Errorf("begin streaming logs: %w", err)
1119+
return allLogs, err
11121120
}
1113-
defer func() {
1114-
if err := closer.Close(); err != nil {
1115-
tflog.Warn(ctx, "error closing template version log stream", map[string]any{
1116-
"error": err,
1117-
})
1118-
}
1119-
}()
1120-
for {
1121-
logs, ok := <-logs
1122-
if !ok {
1123-
break
1124-
}
1125-
tflog.Info(ctx, logs.Output, map[string]interface{}{
1126-
"job_id": logs.ID,
1127-
"job_stage": logs.Stage,
1128-
"log_source": logs.Source,
1129-
"level": logs.Level,
1130-
"created_at": logs.CreatedAt,
1131-
})
1132-
if logs.Output != "" {
1133-
jobLogs = append(jobLogs, logs)
1134-
}
1121+
if done {
1122+
return allLogs, nil
11351123
}
1136-
latestResp, err := client.TemplateVersion(ctx, version.ID)
1137-
if err != nil {
1138-
return jobLogs, err
1124+
tflog.Warn(ctx, fmt.Sprintf("provisioner job still active, retrying (attempt %d/%d)", attempts+1, maxRetries))
1125+
}
1126+
1127+
if err := ctx.Err(); err != nil {
1128+
return allLogs, err
1129+
}
1130+
return allLogs, fmt.Errorf("provisioner job did not complete after %d retries", maxRetries)
1131+
}
1132+
1133+
func waitForJobOnce(ctx context.Context, client *codersdk.Client, version *codersdk.TemplateVersion, after int64) ([]codersdk.ProvisionerJobLog, bool, error) {
1134+
logCh, closer, err := client.TemplateVersionLogsAfter(ctx, version.ID, after)
1135+
if err != nil {
1136+
return nil, false, fmt.Errorf("begin streaming logs: %w", err)
1137+
}
1138+
defer func() {
1139+
if err := closer.Close(); err != nil {
1140+
tflog.Warn(ctx, "error closing template version log stream", map[string]any{
1141+
"error": err,
1142+
})
11391143
}
1140-
if latestResp.Job.Status.Active() {
1141-
tflog.Warn(ctx, fmt.Sprintf("provisioner job still active, continuing to wait...: %s", latestResp.Job.Status))
1142-
continue
1144+
}()
1145+
var jobLogs []codersdk.ProvisionerJobLog
1146+
for {
1147+
logMsg, ok := <-logCh
1148+
if !ok {
1149+
break
11431150
}
1144-
if latestResp.Job.Status != codersdk.ProvisionerJobSucceeded {
1145-
return jobLogs, fmt.Errorf("provisioner job did not succeed: %s (%s)", latestResp.Job.Status, latestResp.Job.Error)
1151+
tflog.Info(ctx, logMsg.Output, map[string]interface{}{
1152+
"job_id": logMsg.ID,
1153+
"job_stage": logMsg.Stage,
1154+
"log_source": logMsg.Source,
1155+
"level": logMsg.Level,
1156+
"created_at": logMsg.CreatedAt,
1157+
})
1158+
if logMsg.Output != "" {
1159+
jobLogs = append(jobLogs, logMsg)
11461160
}
1147-
return jobLogs, nil
11481161
}
1149-
return jobLogs, fmt.Errorf("provisioner job did not complete after %d retries", maxRetries)
1162+
latestResp, err := client.TemplateVersion(ctx, version.ID)
1163+
if err != nil {
1164+
return jobLogs, false, err
1165+
}
1166+
if latestResp.Job.Status.Active() {
1167+
return jobLogs, false, nil
1168+
}
1169+
if latestResp.Job.Status != codersdk.ProvisionerJobSucceeded {
1170+
return jobLogs, false, fmt.Errorf("provisioner job did not succeed: %s (%s)", latestResp.Job.Status, latestResp.Job.Error)
1171+
}
1172+
return jobLogs, true, nil
11501173
}
11511174

11521175
type newVersionRequest struct {

0 commit comments

Comments
 (0)