Skip to content

Commit fc98679

Browse files
authored
Merge branch 'master' into dependabot/go_modules/go_modules-dd7da38a6b
2 parents 03e0426 + 8bc63f0 commit fc98679

45 files changed

Lines changed: 2597 additions & 243 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/CONTRIBUTING.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,24 @@ Here are a few things you can do that will increase the likelihood of your pull
1919
- Keep your change as focused as possible. If there are multiple changes you would like to make that are not dependent upon each other, consider submitting them as separate pull requests.
2020
- Write a [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
2121

22+
## Development Guidelines
23+
24+
### Channel Safety
25+
26+
When working with channels in goroutines, it's critical to prevent deadlocks that can occur when a channel receiver exits due to an error while senders are still trying to send values. Always use `base.SendWithContext` for channel sends to avoid deadlocks:
27+
28+
```go
29+
// ✅ CORRECT - Uses helper to prevent deadlock
30+
if err := base.SendWithContext(ctx, ch, value); err != nil {
31+
return err // context was cancelled
32+
}
33+
34+
// ❌ WRONG - Can deadlock if receiver exits
35+
ch <- value
36+
```
37+
38+
Even if the destination channel is buffered, deadlocks could still occur if the buffer fills up and the receiver exits, so it's important to use `SendWithContext` in those cases as well.
39+
2240
## Resources
2341

2442
- [Contributing to Open Source on GitHub](https://guides.github.com/activities/contributing-to-open-source/)

.github/workflows/replica-tests.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,20 @@ jobs:
2828
- name: Run tests
2929
run: script/docker-gh-ost-replica-tests run
3030

31+
- name: Set artifact name
32+
if: failure()
33+
run: |
34+
ARTIFACT_NAME=$(echo "${{ matrix.image }}" | tr '/:' '-')
35+
echo "ARTIFACT_NAME=test-logs-${ARTIFACT_NAME}" >> $GITHUB_ENV
36+
37+
- name: Upload test logs on failure
38+
if: failure()
39+
uses: actions/upload-artifact@v4
40+
with:
41+
name: ${{ env.ARTIFACT_NAME }}
42+
path: /tmp/gh-ost-test.*
43+
retention-days: 7
44+
3145
- name: Teardown environment
3246
if: always()
3347
run: script/docker-gh-ost-replica-tests down

doc/hooks.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ The full list of supported hooks is best found in code: [hooks.go](https://githu
4949
- `gh-ost-on-before-cut-over`
5050
- `gh-ost-on-success`
5151
- `gh-ost-on-failure`
52+
- `gh-ost-on-batch-copy-retry`
5253

5354
### Context
5455

@@ -82,6 +83,7 @@ The following variable are available on particular hooks:
8283

8384
- `GH_OST_COMMAND` is only available in `gh-ost-on-interactive-command`
8485
- `GH_OST_STATUS` is only available in `gh-ost-on-status`
86+
- `GH_OST_LAST_BATCH_COPY_ERROR` is only available in `gh-ost-on-batch-copy-retry`
8587

8688
### Examples
8789

go/base/context.go

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package base
77

88
import (
9+
"context"
910
"fmt"
1011
"math"
1112
"os"
@@ -225,6 +226,16 @@ type MigrationContext struct {
225226
InCutOverCriticalSectionFlag int64
226227
PanicAbort chan error
227228

229+
// Context for cancellation signaling across all goroutines
230+
// Stored in struct as it spans the entire migration lifecycle, not per-function.
231+
// context.Context is safe for concurrent use by multiple goroutines.
232+
ctx context.Context //nolint:containedctx
233+
cancelFunc context.CancelFunc
234+
235+
// Stores the fatal error that triggered abort
236+
AbortError error
237+
abortMutex *sync.Mutex
238+
228239
OriginalTableColumnsOnApplier *sql.ColumnList
229240
OriginalTableColumns *sql.ColumnList
230241
OriginalTableVirtualColumns *sql.ColumnList
@@ -293,6 +304,7 @@ type ContextConfig struct {
293304
}
294305

295306
func NewMigrationContext() *MigrationContext {
307+
ctx, cancelFunc := context.WithCancel(context.Background())
296308
return &MigrationContext{
297309
Uuid: uuid.NewString(),
298310
defaultNumRetries: 60,
@@ -313,6 +325,9 @@ func NewMigrationContext() *MigrationContext {
313325
lastHeartbeatOnChangelogMutex: &sync.Mutex{},
314326
ColumnRenameMap: make(map[string]string),
315327
PanicAbort: make(chan error),
328+
ctx: ctx,
329+
cancelFunc: cancelFunc,
330+
abortMutex: &sync.Mutex{},
316331
Log: NewDefaultLogger(),
317332
}
318333
}
@@ -611,6 +626,13 @@ func (this *MigrationContext) GetIteration() int64 {
611626
return atomic.LoadInt64(&this.Iteration)
612627
}
613628

629+
func (this *MigrationContext) SetNextIterationRangeMinValues() {
630+
this.MigrationIterationRangeMinValues = this.MigrationIterationRangeMaxValues
631+
if this.MigrationIterationRangeMinValues == nil {
632+
this.MigrationIterationRangeMinValues = this.MigrationRangeMinValues
633+
}
634+
}
635+
614636
func (this *MigrationContext) MarkPointOfInterest() int64 {
615637
this.pointOfInterestTimeMutex.Lock()
616638
defer this.pointOfInterestTimeMutex.Unlock()
@@ -970,9 +992,59 @@ func (this *MigrationContext) GetGhostTriggerName(triggerName string) string {
970992
return triggerName + this.TriggerSuffix
971993
}
972994

973-
// validateGhostTriggerLength check if the ghost trigger name length is not more than 64 characters
995+
// ValidateGhostTriggerLengthBelowMaxLength checks if the given trigger name (already transformed
996+
// by GetGhostTriggerName) does not exceed the maximum allowed length.
974997
func (this *MigrationContext) ValidateGhostTriggerLengthBelowMaxLength(triggerName string) bool {
975-
ghostTriggerName := this.GetGhostTriggerName(triggerName)
998+
return utf8.RuneCountInString(triggerName) <= mysql.MaxTableNameLength
999+
}
1000+
1001+
// GetContext returns the migration context for cancellation checking
1002+
func (this *MigrationContext) GetContext() context.Context {
1003+
return this.ctx
1004+
}
1005+
1006+
// SetAbortError stores the fatal error that triggered abort
1007+
// Only the first error is stored (subsequent errors are ignored)
1008+
func (this *MigrationContext) SetAbortError(err error) {
1009+
this.abortMutex.Lock()
1010+
defer this.abortMutex.Unlock()
1011+
if this.AbortError == nil {
1012+
this.AbortError = err
1013+
}
1014+
}
9761015

977-
return utf8.RuneCountInString(ghostTriggerName) <= mysql.MaxTableNameLength
1016+
// GetAbortError retrieves the stored abort error
1017+
func (this *MigrationContext) GetAbortError() error {
1018+
this.abortMutex.Lock()
1019+
defer this.abortMutex.Unlock()
1020+
return this.AbortError
1021+
}
1022+
1023+
// CancelContext cancels the migration context to signal all goroutines to stop
1024+
// The cancel function is safe to call multiple times and from multiple goroutines.
1025+
func (this *MigrationContext) CancelContext() {
1026+
if this.cancelFunc != nil {
1027+
this.cancelFunc()
1028+
}
1029+
}
1030+
1031+
// SendWithContext attempts to send a value to a channel, but returns early
1032+
// if the context is cancelled. This prevents goroutine deadlocks when the
1033+
// channel receiver has exited due to an error.
1034+
//
1035+
// Use this instead of bare channel sends (ch <- val) in goroutines to ensure
1036+
// proper cleanup when the migration is aborted.
1037+
//
1038+
// Example:
1039+
//
1040+
// if err := base.SendWithContext(ctx, ch, value); err != nil {
1041+
// return err // context was cancelled
1042+
// }
1043+
func SendWithContext[T any](ctx context.Context, ch chan<- T, val T) error {
1044+
select {
1045+
case ch <- val:
1046+
return nil
1047+
case <-ctx.Done():
1048+
return ctx.Err()
1049+
}
9781050
}

go/base/context_test.go

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
package base
77

88
import (
9+
"errors"
910
"os"
1011
"strings"
12+
"sync"
1113
"testing"
1214
"time"
1315

@@ -86,38 +88,69 @@ func TestGetTriggerNames(t *testing.T) {
8688
}
8789

8890
func TestValidateGhostTriggerLengthBelowMaxLength(t *testing.T) {
91+
// Tests simulate the real call pattern: GetGhostTriggerName first, then validate the result.
8992
{
93+
// Short trigger name with suffix appended: well under 64 chars
9094
context := NewMigrationContext()
9195
context.TriggerSuffix = "_gho"
92-
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength("my_trigger"))
96+
ghostName := context.GetGhostTriggerName("my_trigger") // "my_trigger_gho" = 14 chars
97+
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength(ghostName))
9398
}
9499
{
100+
// 64-char original + "_ghost" suffix = 70 chars → exceeds limit
95101
context := NewMigrationContext()
96102
context.TriggerSuffix = "_ghost"
97-
require.False(t, context.ValidateGhostTriggerLengthBelowMaxLength(strings.Repeat("my_trigger_ghost", 4))) // 64 characters + "_ghost"
103+
ghostName := context.GetGhostTriggerName(strings.Repeat("my_trigger_ghost", 4)) // 64 + 6 = 70
104+
require.False(t, context.ValidateGhostTriggerLengthBelowMaxLength(ghostName))
98105
}
99106
{
107+
// 48-char original + "_ghost" suffix = 54 chars → valid
100108
context := NewMigrationContext()
101109
context.TriggerSuffix = "_ghost"
102-
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength(strings.Repeat("my_trigger_ghost", 3))) // 48 characters + "_ghost"
110+
ghostName := context.GetGhostTriggerName(strings.Repeat("my_trigger_ghost", 3)) // 48 + 6 = 54
111+
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength(ghostName))
103112
}
104113
{
114+
// RemoveTriggerSuffix: 64-char name ending in "_ghost" → suffix removed → 58 chars → valid
105115
context := NewMigrationContext()
106116
context.TriggerSuffix = "_ghost"
107117
context.RemoveTriggerSuffix = true
108-
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength(strings.Repeat("my_trigger_ghost", 4))) // 64 characters + "_ghost" removed
118+
ghostName := context.GetGhostTriggerName(strings.Repeat("my_trigger_ghost", 4)) // suffix removed → 58
119+
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength(ghostName))
109120
}
110121
{
122+
// RemoveTriggerSuffix: name doesn't end in suffix → suffix appended → 65 + 6 = 71 chars → exceeds
111123
context := NewMigrationContext()
112124
context.TriggerSuffix = "_ghost"
113125
context.RemoveTriggerSuffix = true
114-
require.False(t, context.ValidateGhostTriggerLengthBelowMaxLength(strings.Repeat("my_trigger_ghost", 4)+"X")) // 65 characters + "_ghost" not removed
126+
ghostName := context.GetGhostTriggerName(strings.Repeat("my_trigger_ghost", 4) + "X") // no match, appended → 71
127+
require.False(t, context.ValidateGhostTriggerLengthBelowMaxLength(ghostName))
115128
}
116129
{
130+
// RemoveTriggerSuffix: 70-char name ending in "_ghost" → suffix removed → 64 chars → exactly at limit → valid
117131
context := NewMigrationContext()
118132
context.TriggerSuffix = "_ghost"
119133
context.RemoveTriggerSuffix = true
120-
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength(strings.Repeat("my_trigger_ghost", 4)+"_ghost")) // 70 characters + last "_ghost" removed
134+
ghostName := context.GetGhostTriggerName(strings.Repeat("my_trigger_ghost", 4) + "_ghost") // suffix removed → 64
135+
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength(ghostName))
136+
}
137+
{
138+
// Edge case: exactly 64 chars after transformation → valid (boundary test)
139+
context := NewMigrationContext()
140+
context.TriggerSuffix = "_ght"
141+
originalName := strings.Repeat("x", 60) // 60 chars
142+
ghostName := context.GetGhostTriggerName(originalName) // 60 + 4 = 64
143+
require.Equal(t, 64, len(ghostName))
144+
require.True(t, context.ValidateGhostTriggerLengthBelowMaxLength(ghostName))
145+
}
146+
{
147+
// Edge case: 65 chars after transformation → exceeds (boundary test)
148+
context := NewMigrationContext()
149+
context.TriggerSuffix = "_ght"
150+
originalName := strings.Repeat("x", 61) // 61 chars
151+
ghostName := context.GetGhostTriggerName(originalName) // 61 + 4 = 65
152+
require.Equal(t, 65, len(ghostName))
153+
require.False(t, context.ValidateGhostTriggerLengthBelowMaxLength(ghostName))
121154
}
122155
}
123156

@@ -182,3 +215,58 @@ func TestReadConfigFile(t *testing.T) {
182215
}
183216
}
184217
}
218+
219+
func TestSetAbortError_StoresFirstError(t *testing.T) {
220+
ctx := NewMigrationContext()
221+
222+
err1 := errors.New("first error")
223+
err2 := errors.New("second error")
224+
225+
ctx.SetAbortError(err1)
226+
ctx.SetAbortError(err2)
227+
228+
got := ctx.GetAbortError()
229+
if got != err1 { //nolint:errorlint // Testing pointer equality for sentinel error
230+
t.Errorf("Expected first error %v, got %v", err1, got)
231+
}
232+
}
233+
234+
func TestSetAbortError_ThreadSafe(t *testing.T) {
235+
ctx := NewMigrationContext()
236+
237+
var wg sync.WaitGroup
238+
errs := []error{
239+
errors.New("error 1"),
240+
errors.New("error 2"),
241+
errors.New("error 3"),
242+
}
243+
244+
// Launch 3 goroutines trying to set error concurrently
245+
for _, err := range errs {
246+
wg.Add(1)
247+
go func(e error) {
248+
defer wg.Done()
249+
ctx.SetAbortError(e)
250+
}(err)
251+
}
252+
253+
wg.Wait()
254+
255+
// Should store exactly one of the errors
256+
got := ctx.GetAbortError()
257+
if got == nil {
258+
t.Fatal("Expected error to be stored, got nil")
259+
}
260+
261+
// Verify it's one of the errors we sent
262+
found := false
263+
for _, err := range errs {
264+
if got == err { //nolint:errorlint // Testing pointer equality for sentinel error
265+
found = true
266+
break
267+
}
268+
}
269+
if !found {
270+
t.Errorf("Stored error %v not in list of sent errors", got)
271+
}
272+
}

go/cmd/gh-ost/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,9 @@ func main() {
316316
if migrationContext.CheckpointIntervalSeconds < 10 {
317317
migrationContext.Log.Fatalf("--checkpoint-seconds should be >=10")
318318
}
319+
if migrationContext.CountTableRows && migrationContext.PanicOnWarnings {
320+
migrationContext.Log.Warning("--exact-rowcount with --panic-on-warnings: row counts cannot be exact due to warning detection")
321+
}
319322

320323
switch *cutOver {
321324
case "atomic", "default", "":

0 commit comments

Comments
 (0)