From 31f81d2c0c612dddfff06cae3bd59818d6cbe14b Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Wed, 22 Apr 2026 15:40:08 -0700 Subject: [PATCH 1/3] Fix Warning 1300 for varbinary columns with bytes invalid as utf8mb4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When gh-ost replays a binlog DML event, the go-mysql library returns varbinary column values as a Go `string` (not `[]byte`). In convertArg, the existing code only converted string → []byte when the column had a non-empty Charset (e.g. utf8mb4 for varchar). varbinary columns have no character set, so Charset is always "", and the string fell through unconverted. The Go MySQL driver sends `string` args as MYSQL_TYPE_VAR_STRING with the connection's utf8mb4 charset metadata attached, causing MySQL to validate the bytes. If a varbinary value (e.g. a binary UUID) contains byte sequences that are invalid utf8mb4, MySQL emits Warning 1300. With gh-ost's panic-on-warnings enabled, this aborts the migration. Fix: add an else-if branch that detects binary storage types by MySQLType (binary, varbinary, *blob) and returns []byte, so the driver sends MYSQL_TYPE_BLOB (binary data) with no charset validation. MySQLType is used rather than Charset == "" alone because test Column objects built via NewColumnList leave MySQLType unset, which would have changed the return type for all no-charset columns in existing tests. In production, inspect.go always populates MySQLType from information_schema.data_type. Co-Authored-By: Claude Sonnet 4.6 --- go/sql/types.go | 6 ++++++ go/sql/types_test.go | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/go/sql/types.go b/go/sql/types.go index 9a50bc620..101f8b5d8 100644 --- a/go/sql/types.go +++ b/go/sql/types.go @@ -70,6 +70,12 @@ func (this *Column) convertArg(arg interface{}) interface{} { if arg2Bytes != nil { if this.Charset != "" && this.charsetConversion == nil { arg = arg2Bytes + } else if this.Charset == "" && (strings.Contains(this.MySQLType, "binary") || strings.HasSuffix(this.MySQLType, "blob")) { + // varbinary/binary/blob column: no charset means binary storage. Return []byte so + // the MySQL driver sends MYSQL_TYPE_BLOB (binary) rather than MYSQL_TYPE_VAR_STRING + // (text with utf8mb4 metadata), which would cause MySQL to validate the bytes and + // emit Warning 1300 for byte sequences that are invalid utf8mb4. + arg = arg2Bytes } else { if encoding, ok := charsetEncodingMap[this.Charset]; ok { decodedBytes, _ := encoding.NewDecoder().Bytes(arg2Bytes) diff --git a/go/sql/types_test.go b/go/sql/types_test.go index 83c74073a..ff96ee967 100644 --- a/go/sql/types_test.go +++ b/go/sql/types_test.go @@ -95,6 +95,47 @@ func TestConvertArgBinaryColumnPadding(t *testing.T) { require.Equal(t, []byte{0x00, 0x00}, resultBytes[18:]) } +func TestConvertArgVarbinaryStringWithInvalidUTF8Bytes(t *testing.T) { + // go-mysql returns varbinary binlog row values as Go `string` (not `[]uint8`). + // When convertArg receives a string for a column with no Charset (varbinary), + // it must return []byte — not the original string. The Go MySQL driver sends + // string args as MYSQL_TYPE_VAR_STRING with utf8mb4 charset metadata, which + // causes MySQL to validate the bytes and emit Warning 1300 for invalid sequences. + // gh-ost's panic-on-warnings then turns that warning into a fatal migration error. + // See: uuid varbinary(16) rows whose binary UUID bytes happen to be invalid utf8mb4. + rawBytes := []byte{0x91, 0xC3, 0xCD, 0x00, 0x01, 0x02} + + col := Column{ + Name: "uuid", + Charset: "", // varbinary has no character set + MySQLType: "varbinary", // set by inspect.go from information_schema data_type + } + + result := col.convertArg(string(rawBytes)) + + require.IsType(t, []byte{}, result, + "varbinary value from binlog (Go string) must be returned as []byte, not string, "+ + "to prevent MySQL driver from sending it with utf8mb4 charset metadata") + require.Equal(t, rawBytes, result.([]byte)) +} + +func TestConvertArgVarbinaryBytesWithInvalidUTF8Bytes(t *testing.T) { + // When go-mysql returns varbinary values as []uint8 (rather than string), + // convertArg should also return []byte consistently. + rawBytes := []uint8{0x91, 0xC3, 0xCD, 0x00, 0x01, 0x02} + + col := Column{ + Name: "uuid", + Charset: "", + MySQLType: "varbinary", + } + + result := col.convertArg(rawBytes) + + require.IsType(t, []byte{}, result) + require.Equal(t, []byte(rawBytes), result.([]byte)) +} + func TestConvertArgBinaryColumnNoPaddingWhenFull(t *testing.T) { // When binary value is already at full length, no padding should occur fullValue := []uint8{ From 596e87dc0eeca5bf95ab7db96144ec640b64ebff Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Wed, 22 Apr 2026 15:54:33 -0700 Subject: [PATCH 2/3] Address PR review feedback on varbinary Warning 1300 fix - Use MySQLType "varbinary(16)" in tests instead of bare "varbinary", matching the real value produced by information_schema COLUMN_TYPE (which includes length). Guards against future refactors that might switch from substring matching to exact matching. - Correct test comment: MySQLType is populated from COLUMN_TYPE, not data_type. - Broaden types.go comment to say "the connection's charset/collation metadata (often utf8mb4)" since gh-ost's connection charset is configurable via --charset. Co-Authored-By: Claude Sonnet 4.6 --- go/sql/types.go | 5 +++-- go/sql/types_test.go | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/go/sql/types.go b/go/sql/types.go index 101f8b5d8..1a8f8a2e2 100644 --- a/go/sql/types.go +++ b/go/sql/types.go @@ -73,8 +73,9 @@ func (this *Column) convertArg(arg interface{}) interface{} { } else if this.Charset == "" && (strings.Contains(this.MySQLType, "binary") || strings.HasSuffix(this.MySQLType, "blob")) { // varbinary/binary/blob column: no charset means binary storage. Return []byte so // the MySQL driver sends MYSQL_TYPE_BLOB (binary) rather than MYSQL_TYPE_VAR_STRING - // (text with utf8mb4 metadata), which would cause MySQL to validate the bytes and - // emit Warning 1300 for byte sequences that are invalid utf8mb4. + // (text with the connection's charset/collation metadata, often utf8mb4), which would + // cause MySQL to validate the bytes and emit Warning 1300 for byte sequences that are + // invalid in that charset. arg = arg2Bytes } else { if encoding, ok := charsetEncodingMap[this.Charset]; ok { diff --git a/go/sql/types_test.go b/go/sql/types_test.go index ff96ee967..570b305d3 100644 --- a/go/sql/types_test.go +++ b/go/sql/types_test.go @@ -107,15 +107,15 @@ func TestConvertArgVarbinaryStringWithInvalidUTF8Bytes(t *testing.T) { col := Column{ Name: "uuid", - Charset: "", // varbinary has no character set - MySQLType: "varbinary", // set by inspect.go from information_schema data_type + Charset: "", // varbinary has no character set + MySQLType: "varbinary(16)", // set by inspect.go from information_schema COLUMN_TYPE } result := col.convertArg(string(rawBytes)) require.IsType(t, []byte{}, result, "varbinary value from binlog (Go string) must be returned as []byte, not string, "+ - "to prevent MySQL driver from sending it with utf8mb4 charset metadata") + "to prevent MySQL driver from sending it with the connection's charset metadata") require.Equal(t, rawBytes, result.([]byte)) } @@ -127,7 +127,7 @@ func TestConvertArgVarbinaryBytesWithInvalidUTF8Bytes(t *testing.T) { col := Column{ Name: "uuid", Charset: "", - MySQLType: "varbinary", + MySQLType: "varbinary(16)", // set by inspect.go from information_schema COLUMN_TYPE } result := col.convertArg(rawBytes) From b416d6dd12e9589a4ee54a65280881464f61d48f Mon Sep 17 00:00:00 2001 From: meiji163 Date: Wed, 22 Apr 2026 16:38:13 -0700 Subject: [PATCH 3/3] appease linter --- go/sql/types_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/sql/types_test.go b/go/sql/types_test.go index 570b305d3..9275bbb85 100644 --- a/go/sql/types_test.go +++ b/go/sql/types_test.go @@ -133,7 +133,7 @@ func TestConvertArgVarbinaryBytesWithInvalidUTF8Bytes(t *testing.T) { result := col.convertArg(rawBytes) require.IsType(t, []byte{}, result) - require.Equal(t, []byte(rawBytes), result.([]byte)) + require.Equal(t, rawBytes, result.([]byte)) } func TestConvertArgBinaryColumnNoPaddingWhenFull(t *testing.T) {