From 99f6173cc50c8109f3e142d61455c5d61061faf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Apr 2026 09:49:21 +0300 Subject: [PATCH] MDEV-39263 innodb_snapshot_isolation fails to prevent lost updates under contention lock_clust_rec_read_check_and_lock(): Refine the check whether the transaction that last modified the record is still active. The only thing that should matter is whether we are allowed to see the record. If the implicit lock holder transaction was active and we succeeded in acquiring the lock, this means that the transaction had been committed. We must return DB_RECORD_CHANGED (ER_CHECKREAD) in that case. We want to avoid returning it before the lock wait, in case the other transaction will be rolled back. Thanks to Vadim Tkachenko of Percona for reporting this bug, as well as Kyle Kingsbury for the broader testing that led to this finding. Vadim's test case was simplified by me and the root cause analyzed with https://rr-project.org and an additional patch that added std::this_thread::yield() at the start of trx_t::commit_persist(). An even better spot should have been right after the call to trx_t::commit_state(). The addition to the test innodb.lock_isolation is based on the work by Teemu Ollakka, which was essential for refining this fix. --- .../suite/innodb/r/lock_isolation.result | 22 ++++++- .../innodb/r/snapshot_isolation_race.result | 37 ++++++++++++ mysql-test/suite/innodb/t/lock_isolation.test | 32 +++++++++- .../innodb/t/snapshot_isolation_race.test | 58 +++++++++++++++++++ storage/innobase/lock/lock0lock.cc | 37 ++++++++++-- storage/innobase/trx/trx0trx.cc | 1 + 6 files changed, 179 insertions(+), 8 deletions(-) create mode 100644 mysql-test/suite/innodb/r/snapshot_isolation_race.result create mode 100644 mysql-test/suite/innodb/t/snapshot_isolation_race.test diff --git a/mysql-test/suite/innodb/r/lock_isolation.result b/mysql-test/suite/innodb/r/lock_isolation.result index 2044f001ad84a..ee91e5e779067 100644 --- a/mysql-test/suite/innodb/r/lock_isolation.result +++ b/mysql-test/suite/innodb/r/lock_isolation.result @@ -280,7 +280,6 @@ connection consistent; SELECT * FROM t1; ERROR HY000: Record has changed since last read in table 't1' COMMIT; -disconnect consistent; connection default; DROP TABLE t1,t2; # @@ -292,4 +291,25 @@ SELECT * FROM t1 FOR UPDATE; a DROP TABLE t1; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +# +# MDEV-39263 Lost update despite innodb_snapshot_isolation +# +CREATE TABLE t1 (f1 INT PRIMARY KEY, val TEXT) ENGINE=InnoDB; +connection consistent; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +SELECT * FROM t1 WHERE f1 = 1; +f1 val +connection default; +SET DEBUG_SYNC="trx_after_commit_state SIGNAL committed WAIT_FOR cleanup"; +SET STATEMENT innodb_lock_wait_timeout=3 FOR INSERT INTO t1 VALUES(1,'6'); +connection consistent; +SET DEBUG_SYNC="now WAIT_FOR committed"; +SET STATEMENT innodb_lock_wait_timeout=3 FOR +UPDATE t1 SET val = CONCAT(val, ',', '3') WHERE f1 = 1; +ERROR HY000: Record has changed since last read in table 't1' +SET DEBUG_SYNC="now SIGNAL cleanup"; +disconnect consistent; +connection default; +SET DEBUG_SYNC="RESET"; +DROP TABLE t1; # End of 10.6 tests diff --git a/mysql-test/suite/innodb/r/snapshot_isolation_race.result b/mysql-test/suite/innodb/r/snapshot_isolation_race.result new file mode 100644 index 0000000000000..9d6974d0b5e25 --- /dev/null +++ b/mysql-test/suite/innodb/r/snapshot_isolation_race.result @@ -0,0 +1,37 @@ +# +# MDEV-39263 Lost update despite innodb_snapshot_isolation +# +CREATE TABLE si_log(seq SERIAL, val INT UNSIGNED) +ENGINE=InnoDB STATS_PERSISTENT=0; +CREATE TABLE si_counter(val INT UNSIGNED) ENGINE=InnoDB STATS_PERSISTENT=0; +INSERT INTO si_counter VALUES(0); +CREATE PROCEDURE step() +BEGIN NOT ATOMIC +DECLARE EXIT HANDLER FOR 1020, 1213, 1205 BEGIN ROLLBACK; END; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +SELECT val INTO @val FROM si_counter; +UPDATE si_counter SET val=@val+1; +INSERT INTO si_log (val) VALUES (@val); +DO SLEEP(0.0001); +COMMIT; +END| +CREATE PROCEDURE test(c INT UNSIGNED) +BEGIN +WHILE c DO +SET c=c-1; +call step(); +START TRANSACTION; +SELECT COUNT(*) INTO @count FROM si_log; +SELECT SUM(val) INTO @sum FROM si_counter; +COMMIT; +IF (@count != @sum) THEN shutdown; END IF; +END WHILE; +END| +connect stop_purge,localhost,root; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; +SET innodb_snapshot_isolation=ON; +disconnect stop_purge; +DROP TABLE si_log, si_counter; +DROP PROCEDURE test; +DROP PROCEDURE step; diff --git a/mysql-test/suite/innodb/t/lock_isolation.test b/mysql-test/suite/innodb/t/lock_isolation.test index 7506754cf8ac7..55305fab27e5f 100644 --- a/mysql-test/suite/innodb/t/lock_isolation.test +++ b/mysql-test/suite/innodb/t/lock_isolation.test @@ -291,7 +291,6 @@ COMMIT; SELECT * FROM t1; --enable_ps2_protocol COMMIT; ---disconnect consistent --connection default DROP TABLE t1,t2; @@ -305,5 +304,36 @@ SELECT * FROM t1 FOR UPDATE; DROP TABLE t1; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; +--echo # +--echo # MDEV-39263 Lost update despite innodb_snapshot_isolation +--echo # + +CREATE TABLE t1 (f1 INT PRIMARY KEY, val TEXT) ENGINE=InnoDB; + +--connection consistent +START TRANSACTION WITH CONSISTENT SNAPSHOT; +SELECT * FROM t1 WHERE f1 = 1; + +# Stop autocommit INSERT execution in commit_in_memory() between commit_state() +# and trx_sys.deregister_rw(), before locks are released +--connection default +SET DEBUG_SYNC="trx_after_commit_state SIGNAL committed WAIT_FOR cleanup"; +send SET STATEMENT innodb_lock_wait_timeout=3 FOR INSERT INTO t1 VALUES(1,'6'); + +--connection consistent +SET DEBUG_SYNC="now WAIT_FOR committed"; +--error ER_CHECKREAD +SET STATEMENT innodb_lock_wait_timeout=3 FOR +UPDATE t1 SET val = CONCAT(val, ',', '3') WHERE f1 = 1; + +SET DEBUG_SYNC="now SIGNAL cleanup"; +--disconnect consistent + +--connection default + +--reap +SET DEBUG_SYNC="RESET"; +DROP TABLE t1; + --source include/wait_until_count_sessions.inc --echo # End of 10.6 tests diff --git a/mysql-test/suite/innodb/t/snapshot_isolation_race.test b/mysql-test/suite/innodb/t/snapshot_isolation_race.test new file mode 100644 index 0000000000000..c8e9bc0a9b215 --- /dev/null +++ b/mysql-test/suite/innodb/t/snapshot_isolation_race.test @@ -0,0 +1,58 @@ +--source include/have_innodb.inc + +--echo # +--echo # MDEV-39263 Lost update despite innodb_snapshot_isolation +--echo # + +CREATE TABLE si_log(seq SERIAL, val INT UNSIGNED) +ENGINE=InnoDB STATS_PERSISTENT=0; + +CREATE TABLE si_counter(val INT UNSIGNED) ENGINE=InnoDB STATS_PERSISTENT=0; +INSERT INTO si_counter VALUES(0); + +delimiter |; +CREATE PROCEDURE step() +BEGIN NOT ATOMIC + DECLARE EXIT HANDLER FOR 1020, 1213, 1205 BEGIN ROLLBACK; END; + START TRANSACTION WITH CONSISTENT SNAPSHOT; + SELECT val INTO @val FROM si_counter; + UPDATE si_counter SET val=@val+1; + INSERT INTO si_log (val) VALUES (@val); + DO SLEEP(0.0001); + COMMIT; +END| + +CREATE PROCEDURE test(c INT UNSIGNED) +BEGIN + WHILE c DO + SET c=c-1; + call step(); + START TRANSACTION; + SELECT COUNT(*) INTO @count FROM si_log; + SELECT SUM(val) INTO @sum FROM si_counter; + COMMIT; + IF (@count != @sum) THEN shutdown; END IF; + END WHILE; +END| +delimiter ;| + +--connect stop_purge,localhost,root +START TRANSACTION WITH CONSISTENT SNAPSHOT; +--connection default +SET innodb_snapshot_isolation=ON; + +--disable_query_log +send call test(800); +--connect con1,localhost,root +SET innodb_snapshot_isolation=ON; +call test(800); +--disconnect con1 +--connection default +--reap + +--enable_query_log +--disconnect stop_purge + +DROP TABLE si_log, si_counter; +DROP PROCEDURE test; +DROP PROCEDURE step; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index c5b83b5be6c33..7cd1a1849e505 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -6321,16 +6321,33 @@ lock_clust_rec_read_check_and_lock( return DB_SUCCESS; } + trx_id_t trx_id = 0; + if (heap_no > PAGE_HEAP_NO_SUPREMUM && gap_mode != LOCK_GAP - && trx->snapshot_isolation + && trx->snapshot_isolation && trx->read_view.is_open()) { - trx_id_t trx_id= trx_read_trx_id(rec + - row_trx_id_offset(rec, index)); - if (!trx_sys.is_registered(trx, trx_id) - && !trx->read_view.changes_visible(trx_id) + trx_id = trx_read_trx_id(rec + row_trx_id_offset(rec, index)); + if (!trx->read_view.changes_visible(trx_id) && IF_WSREP(!(trx->is_wsrep() && wsrep_thd_skip_locking(trx->mysql_thd)), true)) { - return DB_RECORD_CHANGED; + /* Our record was last modified by a transaction that + we should not see. If that transaction has been + committed, we can return an error immediately, + without waiting for a record lock. */ + if (!trx_sys.is_registered(trx, trx_id)) { + return DB_RECORD_CHANGED; + } + /* If lock_rec_lock() below returns DB_LOCK_WAIT, + there is a chance that the implicit lock holder will + be rolled back while we are waiting for a lock + timeout. In that case, this function would be invoked + again after the lock wait has been resolved. + + If the lock_rec_lock() succeeds, we will have to + return this error. */ + } else { + /* We are allowed to see this record. */ + trx_id = 0; } } @@ -6342,6 +6359,14 @@ lock_clust_rec_read_check_and_lock( DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock"); + if (UNIV_UNLIKELY(trx_id != 0) && err <= DB_SUCCESS_LOCKED_REC) { + /* The last modifier of rec had just been committed. + (It cannot be rolled back, because our caller is holding + block->page.lock, which protects rec.) + We already determined that rec is too new for us. */ + err = DB_RECORD_CHANGED; + } + return(err); } /*********************************************************************//** diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index e2adc085c5397..c6c332661a236 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1403,6 +1403,7 @@ TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr) ut_ad(!l); #endif /* UNIV_DEBUG */ commit_state(); + DEBUG_SYNC_C("trx_after_commit_state"); if (id) {