Skip to content

Commit 026c8c5

Browse files
authored
Merge pull request #1218 from skunkworker/pr/retry-correctness
Fix COMMIT/ROLLBACK/savepoint retry data-loss; harden MySQL connection-loss detection
2 parents d90f60e + d50ac96 commit 026c8c5

7 files changed

Lines changed: 414 additions & 10 deletions

File tree

lib/arjdbc/abstract/database_statements.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def internal_exec_query(sql, name = nil, binds = NO_BINDS, prepare: false, async
4444

4545
binds = convert_legacy_binds_to_attributes(binds) if binds.first.is_a?(Array)
4646

47-
with_raw_connection do |conn|
47+
with_raw_connection(allow_retry: allow_retry, materialize_transactions: materialize_transactions) do |conn|
4848
if without_prepared_statement?(binds)
4949
log(sql, name, async: async) { conn.execute_query(sql) }
5050
else

lib/arjdbc/abstract/transaction_support.rb

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,19 @@ def begin_isolated_db_transaction(isolation)
4343
end
4444
end
4545

46+
# COMMIT and ROLLBACK must NOT be retried on a connection failure, matching
47+
# ActiveRecord's native adapters (which use `allow_retry: false`). Retrying
48+
# a COMMIT after a dropped backend is unsafe on a networked database:
49+
# `with_raw_connection` would reconnect, replay an *empty* transaction (the
50+
# original writes died with the old backend), COMMIT it successfully, and
51+
# report success - silently losing the transaction's writes. (BEGIN above
52+
# stays retryable because it is idempotent.) See the save-point note below.
53+
4654
# Commits the current database transaction.
4755
# @override
4856
def commit_db_transaction
4957
log('COMMIT', 'TRANSACTION') do
50-
with_raw_connection(allow_retry: true, materialize_transactions: false) do |conn|
58+
with_raw_connection(allow_retry: false, materialize_transactions: true) do |conn|
5159
conn.commit
5260
end
5361
end
@@ -58,14 +66,24 @@ def commit_db_transaction
5866
# @override
5967
def exec_rollback_db_transaction
6068
log('ROLLBACK', 'TRANSACTION') do
61-
with_raw_connection(allow_retry: true, materialize_transactions: false) do |conn|
69+
with_raw_connection(allow_retry: false, materialize_transactions: true) do |conn|
6270
conn.rollback
6371
end
6472
end
6573
end
6674

6775
########################## Savepoint Interface ############################
6876

77+
# Save-point operations must NOT be retried on a connection failure. They
78+
# only ever run inside an already-open transaction, so a dropped backend
79+
# means the transaction's prior writes are gone. Retrying via
80+
# `with_raw_connection(allow_retry: true)` would reconnect, replay an
81+
# *empty* transaction (the original writes died with the backend), run the
82+
# save-point statement against it, and report success - silently losing
83+
# data. ActiveRecord's native adapters route these through
84+
# `internal_execute` (allow_retry: false, materialize_transactions: true);
85+
# we mirror that here, as do the COMMIT/ROLLBACK methods above.
86+
6987
# Creates a (transactional) save-point one can rollback to.
7088
# Unlike 'plain' `ActiveRecord` it is allowed to pass a save-point name.
7189
# @param name the save-point name
@@ -74,7 +92,7 @@ def exec_rollback_db_transaction
7492
# @extension added optional name parameter
7593
def create_savepoint(name = current_savepoint_name)
7694
log("SAVEPOINT #{name}", 'TRANSACTION') do
77-
with_raw_connection(allow_retry: true, materialize_transactions: false) do |conn|
95+
with_raw_connection(allow_retry: false, materialize_transactions: true) do |conn|
7896
conn.create_savepoint(name)
7997
end
8098
end
@@ -87,7 +105,7 @@ def create_savepoint(name = current_savepoint_name)
87105
# @extension added optional name parameter
88106
def exec_rollback_to_savepoint(name = current_savepoint_name)
89107
log("ROLLBACK TO SAVEPOINT #{name}", 'TRANSACTION') do
90-
with_raw_connection(allow_retry: true, materialize_transactions: false) do |conn|
108+
with_raw_connection(allow_retry: false, materialize_transactions: true) do |conn|
91109
conn.rollback_savepoint(name)
92110
end
93111
end
@@ -100,7 +118,7 @@ def exec_rollback_to_savepoint(name = current_savepoint_name)
100118
# @extension added optional name parameter
101119
def release_savepoint(name = current_savepoint_name)
102120
log("RELEASE SAVEPOINT #{name}", 'TRANSACTION') do
103-
with_raw_connection(allow_retry: true, materialize_transactions: false) do |conn|
121+
with_raw_connection(allow_retry: false, materialize_transactions: true) do |conn|
104122
conn.release_savepoint(name)
105123
end
106124
end

lib/arjdbc/mysql/adapter.rb

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,42 @@ def jdbc_column_class
252252
::ActiveRecord::ConnectionAdapters::MySQL::Column
253253
end
254254

255+
# MySQL / MariaDB surface a dropped server connection as a JDBC error in
256+
# SQLState class 08 (connection exception) - most commonly 08S01
257+
# "Communications link failure" - or with one of the "server gone" vendor
258+
# error codes. The driver may also wrap it in a recoverable / non-transient
259+
# connection exception. None of these are caught by the message- and
260+
# error-code-based cases below (which fall through to a plain JDBCError /
261+
# StatementInvalid), so AR's with_raw_connection reconnect/retry machinery
262+
# never kicks in. See https://dev.mysql.com/doc/connector-j/en/connector-j-reference-error-sqlstates.html
263+
CONNECTION_FAILURE_SQL_STATES = %w[
264+
08000
265+
08001
266+
08003
267+
08004
268+
08006
269+
08007
270+
08S01
271+
].freeze
272+
# CR_SERVER_GONE_ERROR (2006), CR_SERVER_LOST (2013),
273+
# ER_SERVER_SHUTDOWN (1053), ER_CONNECTION_KILLED (1927),
274+
# ER_CLIENT_INTERACTION_TIMEOUT (4031).
275+
CONNECTION_FAILURE_ERROR_CODES = [2006, 2013, 1053, 1927, 4031].freeze
276+
CONNECTION_FAILURE_MESSAGES = /
277+
Communications?\ link\ failure |
278+
No\ operations\ allowed\ after\ connection\ closed |
279+
Connection\.*\ refused |
280+
Could\ not\ connect\ to |
281+
Server\ shutdown\ in\ progress |
282+
Connection\ is\ closed
283+
/x.freeze
284+
private_constant :CONNECTION_FAILURE_SQL_STATES, :CONNECTION_FAILURE_ERROR_CODES, :CONNECTION_FAILURE_MESSAGES
285+
255286
def translate_exception(exception, message:, sql:, binds:)
287+
if exception.is_a?(::ActiveRecord::JDBCError) && connection_lost?(exception)
288+
return ::ActiveRecord::ConnectionFailed.new(message, sql: sql, binds: binds, connection_pool: @pool)
289+
end
290+
256291
case message
257292
when /Table .* doesn't exist/i
258293
StatementInvalid.new(message, sql: sql, binds: binds, connection_pool: @pool)
@@ -263,6 +298,25 @@ def translate_exception(exception, message:, sql:, binds:)
263298
end
264299
end
265300

301+
# Detects a lost server connection from a JDBC error so it can be
302+
# translated to ActiveRecord::ConnectionFailed (retryable). Mirrors the
303+
# PostgreSQL adapter's handling of backend disconnects (e.g. a proxy such
304+
# as ProxySQL dropping an idle connection).
305+
def connection_lost?(exception)
306+
state = exception.sql_state if exception.respond_to?(:sql_state)
307+
return true if state && CONNECTION_FAILURE_SQL_STATES.include?(state)
308+
309+
code = exception.error_code if exception.respond_to?(:error_code)
310+
return true if code && CONNECTION_FAILURE_ERROR_CODES.include?(code)
311+
312+
message = exception.message
313+
return true if message && CONNECTION_FAILURE_MESSAGES.match?(message)
314+
315+
cause = exception.cause if exception.respond_to?(:cause)
316+
cause.is_a?(Java::JavaSql::SQLRecoverableException) ||
317+
cause.is_a?(Java::JavaSql::SQLNonTransientConnectionException)
318+
end
319+
266320
# defined in MySQL::DatabaseStatements which is not included
267321
def default_insert_value(column)
268322
super unless column.auto_increment?
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
require 'db/mysql'
2+
3+
# Regression tests for the COMMIT-retry data-loss footgun (#1) on MySQL.
4+
#
5+
# commit_db_transaction / exec_rollback_db_transaction must go through
6+
# with_raw_connection(allow_retry: false), matching ActiveRecord's native
7+
# MySQL adapter. With allow_retry: true, a connection drop at COMMIT time can
8+
# make AR reconnect, replay an *empty* transaction (the original writes died
9+
# with the dropped backend), COMMIT it successfully, and report success -
10+
# silently losing the transaction's writes.
11+
#
12+
# Note on MySQL vs PostgreSQL: PostgreSQL translates backend drops (e.g.
13+
# pgbouncer reaping a connection) into a retryable ActiveRecord::ConnectionFailed,
14+
# which directly exposes this footgun. MySQL currently translates connection
15+
# loss to a plain ActiveRecord::JDBCError, which is NOT a retryable connection
16+
# error, so AR will not retry a failed COMMIT today regardless of the flag.
17+
# These tests therefore pin the safe contract directly (allow_retry: false) so
18+
# the footgun cannot be silently reintroduced if MySQL later gains
19+
# ConnectionFailed translation.
20+
class MySQLCommitNoRetryTest < Test::Unit::TestCase
21+
22+
def setup
23+
@adapter = ActiveRecord::Base.connection
24+
end
25+
26+
def test_commit_db_transaction_does_not_allow_retry
27+
@adapter.expects(:with_raw_connection)
28+
.with(allow_retry: false, materialize_transactions: true)
29+
.returns(nil)
30+
31+
@adapter.commit_db_transaction
32+
end
33+
34+
def test_exec_rollback_db_transaction_does_not_allow_retry
35+
@adapter.expects(:with_raw_connection)
36+
.with(allow_retry: false, materialize_transactions: true)
37+
.returns(nil)
38+
39+
@adapter.exec_rollback_db_transaction
40+
end
41+
42+
end
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
require 'db/mysql'
2+
3+
# Regression tests for the MySQL/MariaDB connection-lost translation, the
4+
# MySQL analog of the PostgreSQL backend-disconnect patch.
5+
#
6+
# A JDBCError whose SQLState (class 08), vendor error code, message, or wrapped
7+
# Java exception indicates the server connection is gone must translate to
8+
# ActiveRecord::ConnectionFailed so AR's with_raw_connection(allow_retry:)
9+
# machinery will reconnect and retry. Without this, a proxy (e.g. ProxySQL) or
10+
# the server dropping an idle connection surfaces as a raw JDBCError /
11+
# StatementInvalid and the safe retry never triggers.
12+
class MySQLConnectionLostTest < Test::Unit::TestCase
13+
14+
def setup
15+
@adapter = ActiveRecord::Base.connection
16+
end
17+
18+
# https://dev.mysql.com/doc/connector-j/en/connector-j-reference-error-sqlstates.html
19+
# Class 08 - Connection Exception (08S01 = "Communications link failure").
20+
CONNECTION_FAILURE_SQL_STATES = %w[
21+
08000
22+
08001
23+
08003
24+
08004
25+
08006
26+
08007
27+
08S01
28+
]
29+
30+
# CR_SERVER_GONE_ERROR, CR_SERVER_LOST, ER_SERVER_SHUTDOWN,
31+
# ER_CONNECTION_KILLED, ER_CLIENT_INTERACTION_TIMEOUT.
32+
CONNECTION_FAILURE_ERROR_CODES = [2006, 2013, 1053, 1927, 4031]
33+
34+
CONNECTION_FAILURE_MESSAGES = [
35+
'Communications link failure',
36+
'No operations allowed after connection closed',
37+
'Connection refused',
38+
'Could not connect to address=(host=localhost)(port=3306)',
39+
'Server shutdown in progress',
40+
'Connection is closed',
41+
]
42+
43+
CONNECTION_FAILURE_SQL_STATES.each do |state|
44+
define_method("test_translates_sqlstate_#{state}_to_connection_failed") do
45+
err = jdbc_error('boom', sql_state: state)
46+
result = translate(err)
47+
assert_kind_of ActiveRecord::ConnectionFailed, result,
48+
"expected SQLState #{state} to translate to ConnectionFailed, got #{result.class}"
49+
end
50+
end
51+
52+
CONNECTION_FAILURE_ERROR_CODES.each do |code|
53+
define_method("test_translates_error_code_#{code}_to_connection_failed") do
54+
err = jdbc_error('boom', error_code: code)
55+
result = translate(err)
56+
assert_kind_of ActiveRecord::ConnectionFailed, result,
57+
"expected error code #{code} to translate to ConnectionFailed, got #{result.class}"
58+
end
59+
end
60+
61+
CONNECTION_FAILURE_MESSAGES.each_with_index do |msg, i|
62+
define_method("test_translates_message_#{i}_to_connection_failed") do
63+
err = jdbc_error(msg)
64+
result = translate(err)
65+
assert_kind_of ActiveRecord::ConnectionFailed, result,
66+
"expected message #{msg.inspect} to translate to ConnectionFailed, got #{result.class}"
67+
end
68+
end
69+
70+
def test_recoverable_jdbc_exception_translates_to_connection_failed
71+
cause = Java::JavaSql::SQLRecoverableException.new('socket gone')
72+
err = ActiveRecord::JDBCError.new('socket gone', cause)
73+
assert_kind_of ActiveRecord::ConnectionFailed, translate(err)
74+
end
75+
76+
def test_non_transient_connection_exception_translates_to_connection_failed
77+
cause = Java::JavaSql::SQLNonTransientConnectionException.new('link down')
78+
err = ActiveRecord::JDBCError.new('link down', cause)
79+
assert_kind_of ActiveRecord::ConnectionFailed, translate(err)
80+
end
81+
82+
def test_does_not_translate_duplicate_entry_to_connection_failed
83+
# ER_DUP_ENTRY (1062) is a data error, not a connection failure.
84+
err = jdbc_error("Duplicate entry 'x' for key 'PRIMARY'", sql_state: '23000', error_code: 1062)
85+
result = translate(err)
86+
assert_kind_of ActiveRecord::RecordNotUnique, result
87+
assert !result.is_a?(ActiveRecord::ConnectionFailed)
88+
end
89+
90+
def test_does_not_translate_syntax_error_to_connection_failed
91+
# ER_PARSE_ERROR (1064) must not be mistaken for a connection failure.
92+
err = jdbc_error('You have an error in your SQL syntax', sql_state: '42000', error_code: 1064)
93+
result = translate(err)
94+
assert !result.is_a?(ActiveRecord::ConnectionFailed),
95+
"syntax error should not translate to ConnectionFailed, got #{result.class}"
96+
end
97+
98+
private
99+
100+
def translate(jdbc_error)
101+
@adapter.send(:translate_exception_class, jdbc_error, 'SELECT 1', [])
102+
end
103+
104+
def jdbc_error(message, sql_state: nil, error_code: 0)
105+
cause = Java::JavaSql::SQLException.new(message, sql_state, error_code)
106+
ActiveRecord::JDBCError.new(message, cause)
107+
end
108+
end

0 commit comments

Comments
 (0)