diff --git a/mysql-test/suite/gcol/r/innodb_virtual_basic.result b/mysql-test/suite/gcol/r/innodb_virtual_basic.result index 35534d68e632a..2e3de6045bf27 100644 --- a/mysql-test/suite/gcol/r/innodb_virtual_basic.result +++ b/mysql-test/suite/gcol/r/innodb_virtual_basic.result @@ -48,9 +48,9 @@ INSERT INTO t VALUES (1290, 212, DEFAULT, "xmx"); ROLLBACK; SELECT c FROM t; c -NULL 13 29 +NULL SELECT * FROM t; a b c h 10 3 13 mm @@ -303,23 +303,23 @@ END| CALL UPDATE_t(); SELECT c FROM t; c -NULL 19 -29 2103 +29 +NULL CALL DELETE_insert_t(); SELECT c FROM t; c -NULL 19 -29 2103 +29 +NULL DROP INDEX idx ON t; CALL UPDATE_t(); SELECT c FROM t; c -2103 19 +2103 29 NULL DROP PROCEDURE DELETE_insert_t; @@ -523,10 +523,10 @@ UPDATE t SET h = "e" WHERE h="a"; ROLLBACK; SELECT a, c, h FROM t; a c h -NULL NULL d 11 14 a 18 19 b 28 29 c +NULL NULL d DROP TABLE t; CREATE TABLE `t1` ( `col1` int(11) NOT NULL, diff --git a/mysql-test/suite/gcol/t/innodb_virtual_basic.test b/mysql-test/suite/gcol/t/innodb_virtual_basic.test index dd0409a373c27..280b0100bb37c 100644 --- a/mysql-test/suite/gcol/t/innodb_virtual_basic.test +++ b/mysql-test/suite/gcol/t/innodb_virtual_basic.test @@ -42,6 +42,7 @@ INSERT INTO t VALUES (128, 22, DEFAULT, "xx"); INSERT INTO t VALUES (1290, 212, DEFAULT, "xmx"); ROLLBACK; +--sorted_result SELECT c FROM t; SELECT * FROM t; @@ -356,13 +357,16 @@ END| delimiter ;| CALL UPDATE_t(); +--sorted_result SELECT c FROM t; CALL DELETE_insert_t(); +--sorted_result SELECT c FROM t; DROP INDEX idx ON t; CALL UPDATE_t(); +--sorted_result SELECT c FROM t; DROP PROCEDURE DELETE_insert_t; @@ -537,6 +541,7 @@ START TRANSACTION; UPDATE t SET m =10 WHERE m = 1; UPDATE t SET h = "e" WHERE h="a"; ROLLBACK; +--sorted_result SELECT a, c, h FROM t; DROP TABLE t; diff --git a/mysql-test/suite/vcol/r/races.result b/mysql-test/suite/vcol/r/races.result index c46ed5ba2ef53..c93c8b01e2eeb 100644 --- a/mysql-test/suite/vcol/r/races.result +++ b/mysql-test/suite/vcol/r/races.result @@ -14,3 +14,17 @@ disconnect con1; connection default; drop table t1; set debug_sync='reset'; +# +# MDEV-39261 MariaDB crash on startup in presence of +# indexed virtual columns +# +# Create 33 tables with virtual index +InnoDB 0 transactions not purged +connect purge_control,localhost,root; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; +# Do update on all 33 tables +# restart: --innodb_purge_threads=1 --debug_dbug=d,ib_purge_virtual_index_callback +InnoDB 0 transactions not purged +# Drop all 33 tables +# restart diff --git a/mysql-test/suite/vcol/t/races.test b/mysql-test/suite/vcol/t/races.test index 1bf4e43dec919..b6b42b1771da9 100644 --- a/mysql-test/suite/vcol/t/races.test +++ b/mysql-test/suite/vcol/t/races.test @@ -20,3 +20,59 @@ disconnect con1; connection default; drop table t1; set debug_sync='reset'; + +--echo # +--echo # MDEV-39261 MariaDB crash on startup in presence of +--echo # indexed virtual columns +--echo # +# To make purge thread to work on multiple tables on the same batch, +# we need 33 tables because there are 32 pre-existing purge_node exists. + +--echo # Create 33 tables with virtual index +--disable_query_log +let $i = 33; +while ($i) +{ + eval CREATE TABLE t$i( + a INT PRIMARY KEY, + b INT DEFAULT 1, INDEX(b), + c INT GENERATED ALWAYS AS (a + b) VIRTUAL, + INDEX(c) + ) ENGINE=InnoDB; + eval INSERT INTO t$i(a) VALUES(1); + dec $i; +} +--enable_query_log +--source ../../innodb/include/wait_all_purged.inc +--connect purge_control,localhost,root +START TRANSACTION WITH CONSISTENT SNAPSHOT; + +--connection default +--echo # Do update on all 33 tables +--disable_query_log +let $i = 33; +while ($i) +{ + eval UPDATE t$i SET b = 11 WHERE a = 1; + dec $i; +} +--enable_query_log + +let $shutdown_timeout=0; +let $restart_parameters=--innodb_purge_threads=1 --debug_dbug=d,ib_purge_virtual_index_callback; +--source include/restart_mysqld.inc +--source ../../innodb/include/wait_all_purged.inc + +--echo # Drop all 33 tables +--disable_query_log +let $i = 33; +while ($i) +{ + eval DROP TABLE t$i; + dec $i; +} +--enable_query_log + +let $restart_parameters=; +let $shutdown_timeout=; +--source include/restart_mysqld.inc diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 79d7d7d32324a..94507b57a0458 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -820,7 +820,7 @@ static inline bool check_field_pointers(const TABLE *table) leave prelocked mode if needed. */ -int close_thread_tables(THD *thd) +int close_thread_tables(THD *thd) noexcept { TABLE *table; int error= 0; diff --git a/sql/sql_base.h b/sql/sql_base.h index 8dd9bca5b1d93..4381dd0dd5b96 100644 --- a/sql/sql_base.h +++ b/sql/sql_base.h @@ -163,7 +163,7 @@ TABLE_LIST *find_table_in_list(TABLE_LIST *table, TABLE_LIST *TABLE_LIST::*link, const LEX_CSTRING *db_name, const LEX_CSTRING *table_name); -int close_thread_tables(THD *thd); +int close_thread_tables(THD *thd) noexcept; void switch_to_nullable_trigger_fields(List &items, TABLE *); void switch_defaults_to_nullable_trigger_fields(TABLE *table); bool fill_record_n_invoke_before_triggers(THD *thd, TABLE *table, diff --git a/sql/sql_class.cc b/sql/sql_class.cc index e9de780adbcf3..fb4e8e6bf59eb 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5024,10 +5024,10 @@ extern "C" const char *thd_priv_user(MYSQL_THD thd, size_t *length) have only one table open at any given time. */ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, - const char *tb, size_t tblen) + const char *tb, size_t tblen, + MDL_ticket *mdl_ticket) noexcept { DBUG_ENTER("open_purge_table"); - DBUG_ASSERT(thd->open_tables == NULL); DBUG_ASSERT(thd->locked_tables_mode < LTM_PRELOCKED); /* Purge already hold the MDL for the table */ @@ -5038,6 +5038,7 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, tl->init_one_table(&db_name, &table_name, 0, TL_READ); tl->i_s_requested_object= OPEN_TABLE_ONLY; + tl->mdl_request.ticket= mdl_ticket; bool error= open_table(thd, tl, &ot_ctx); @@ -5050,11 +5051,9 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, DBUG_RETURN(error ? NULL : tl->table); } -TABLE *get_purge_table(THD *thd) +MDL_ticket *get_mdl_ticket(TABLE *table) { - /* see above, at most one table can be opened */ - DBUG_ASSERT(thd->open_tables == NULL || thd->open_tables->next == NULL); - return thd->open_tables; + return table->mdl_ticket; } /** Find an open table in the list of prelocked tabled @@ -5217,10 +5216,13 @@ void destroy_background_thd(MYSQL_THD thd) void reset_thd(MYSQL_THD thd) { + const char *proc_info= thd->proc_info; + thd->proc_info="reset"; close_thread_tables(thd); thd->release_transactional_locks(); thd->free_items(); free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC)); + thd->proc_info= proc_info; } /** diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index bced539a0eb7c..58279219c61f7 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -626,13 +626,13 @@ bool dict_table_t::parse_name(char (&db_name)[NAME_LEN + 1], dict_sys.unfreeze(); *db_name_len= filename_to_tablename(db_buf, db_name, - MAX_DATABASE_NAME_LEN + 1, true); + NAME_LEN + 1, true); if (is_temp) return false; *tbl_name_len= filename_to_tablename(tbl_buf, tbl_name, - MAX_TABLE_NAME_LEN + 1, true); + NAME_LEN + 1, true); return true; } diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index b0c34dc6d3040..17c27148b3516 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -71,6 +71,7 @@ static bool stats_initialised; static THD *dict_stats_thd; +void reset_thd(MYSQL_THD thd); /*****************************************************************//** Free the resources occupied by the recalc pool, called once during thread de-initialization. */ @@ -393,7 +394,7 @@ static void dict_stats_func(void*) while (dict_stats_process_entry_from_recalc_pool(dict_stats_thd)) {} dict_defrag_process_entries_from_defrag_pool(dict_stats_thd); - innobase_reset_background_thd(dict_stats_thd); + reset_thd(dict_stats_thd); set_current_thd(nullptr); if (!is_recalc_pool_empty()) dict_stats_schedule(MIN_RECALC_INTERVAL * 1000); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f01ff8f3dcb4c..fa398d43685b9 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -124,10 +124,11 @@ TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len, const char *table, size_t table_len); MYSQL_THD create_background_thd(); void reset_thd(MYSQL_THD thd); -TABLE *get_purge_table(THD *thd); TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, - const char *tb, size_t tblen); -void close_thread_tables(THD* thd); + const char *tb, size_t tblen, + MDL_ticket *mdl_ticket) noexcept; +int close_thread_tables(THD* thd) noexcept; +MDL_ticket *get_mdl_ticket(TABLE *table) noexcept; #ifdef MYSQL_DYNAMIC_PLUGIN #define tc_size 400 @@ -1772,26 +1773,6 @@ MYSQL_THD innobase_create_background_thd(const char* name) return thd; } - -/** Close opened tables, free memory, delete items for a MYSQL_THD. -@param[in] thd MYSQL_THD to reset */ -void -innobase_reset_background_thd(MYSQL_THD thd) -{ - if (!thd) { - thd = current_thd; - } - - ut_ad(thd); - ut_ad(THDVAR(thd, background_thread)); - - /* background purge thread */ - const char *proc_info= thd_proc_info(thd, "reset"); - reset_thd(thd); - thd_proc_info(thd, proc_info); -} - - /******************************************************************//** Check if the transaction is an auto-commit transaction. TRUE also implies that it is a SELECT (read-only) transaction. @@ -8536,7 +8517,7 @@ ATTRIBUTE_COLD bool wsrep_append_table_key(MYSQL_THD thd, const dict_table_t &ta { char db_buf[NAME_LEN + 1]; char tbl_buf[NAME_LEN + 1]; - ulint db_buf_len, tbl_buf_len; + size_t db_buf_len, tbl_buf_len; if (!table.parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len)) { @@ -20071,37 +20052,28 @@ ha_innobase::multi_range_read_explain_info( for purge thread */ static TABLE* innodb_find_table_for_vc(THD* thd, dict_table_t* table) { - TABLE *mysql_table; - const bool bg_thread = THDVAR(thd, background_thread); - - if (bg_thread) { - if ((mysql_table = get_purge_table(thd))) { - return mysql_table; - } - } else { - if (table->vc_templ->mysql_table_query_id - == thd_get_query_id(thd)) { - return table->vc_templ->mysql_table; - } + table->lock_mutex_lock(); + TABLE *maria_table = table->vc_templ->mysql_table; + const uint64_t cached_id = table->vc_templ->mysql_table_query_id; + table->lock_mutex_unlock(); + if (cached_id == thd_get_query_id(thd)) { + return maria_table; } + TABLE *mysql_table; char db_buf[NAME_LEN + 1]; char tbl_buf[NAME_LEN + 1]; - ulint db_buf_len, tbl_buf_len; + size_t db_buf_len, tbl_buf_len; if (!table->parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len)) { return NULL; } - - if (bg_thread) { - return open_purge_table(thd, db_buf, db_buf_len, - tbl_buf, tbl_buf_len); - } - mysql_table = find_fk_open_table(thd, db_buf, db_buf_len, tbl_buf, tbl_buf_len); + table->lock_mutex_lock(); table->vc_templ->mysql_table = mysql_table; table->vc_templ->mysql_table_query_id = thd_get_query_id(thd); + table->lock_mutex_unlock(); return mysql_table; } diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 0ab4e4d1fdae6..9b731d50edca1 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -2551,6 +2551,17 @@ struct dict_table_t { static dict_table_t *create(const span &name, fil_space_t *space, ulint n_cols, ulint n_v_cols, ulint flags, ulint flags2); + + /** @return whether the table has any indexed virtual column */ + bool has_virtual_index() const noexcept + { + if (UNIV_UNLIKELY(n_v_cols != 0)) + for (dict_index_t *index = indexes.start; + index; index = UT_LIST_GET_NEXT(indexes, index)) + if (index->has_virtual()) + return true; + return false; + } }; inline void dict_index_t::set_modified(mtr_t& mtr) const diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index 098986febdf71..8ffc8c0627429 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -420,11 +420,6 @@ MYSQL_THD innobase_create_background_thd(const char* name); @param[in] thd MYSQL_THD to destroy */ void destroy_background_thd(MYSQL_THD thd); -/** Close opened tables, free memory, delete items for a MYSQL_THD. -@param[in] thd MYSQL_THD to reset */ -void -innobase_reset_background_thd(MYSQL_THD); - /** Open a table based on a database and table name. @param db schema name @param name table name within the schema diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h index baa7777e6c81e..70214977d535b 100644 --- a/storage/innobase/include/row0purge.h +++ b/storage/innobase/include/row0purge.h @@ -67,6 +67,48 @@ row_purge_step( que_thr_t* thr) /*!< in: query thread */ MY_ATTRIBUTE((nonnull, warn_unused_result)); +/** Table context for purge operations. Uses pointer to store +either TABLE* or MDL_ticket* in a single union +For tables WITH indexed virtual columns: +- Opens TABLE* via open_purge_table() +- Stores TABLE* with LSB=1 flag in mariadb_table +- MDL_ticket* is accessed via TABLE->mdl_ticket +- get_ticket() returns TABLE->mdl_ticket + +For tables WITHOUT indexed virtual columns: +- Only acquires MDL_ticket* (no TABLE* needed) +- Stores MDL_ticket* with LSB=0 in the union +- get_ticket() returns the stored ticket directly */ +class purge_table +{ + union + { + /** TABLE* with the least signficant bit set */ + uintptr_t mariadb_table; + /** metadata lock when !get_mariadb_table() */ + MDL_ticket *ticket; + }; +public: + dict_table_t *table; + + purge_table() : ticket(nullptr), table(nullptr) {} + + inline TABLE *get_maria_table() const noexcept + { + return UNIV_UNLIKELY(mariadb_table & 1) + ? reinterpret_cast(mariadb_table & ~uintptr_t{1}) + : nullptr; + } + + /** @return whether we must wait for MDL on the table */ + bool must_wait() const noexcept + { return table == reinterpret_cast(-1); } + + inline MDL_ticket *get_ticket() const noexcept; + inline void set_mariadb_table(TABLE *t) noexcept; + inline void set_ticket(MDL_ticket *t) noexcept; +}; + /** Purge worker context */ struct purge_node_t { @@ -93,8 +135,8 @@ struct purge_node_t /** whether the operation is in progress */ bool in_progress= false; #endif - /** table where purge is done */ - dict_table_t *table= nullptr; + /** purge table handle */ + purge_table pt; /** update vector for a clustered index record */ upd_t *update; /** row reference to the next row to handle, or nullptr */ @@ -111,8 +153,9 @@ struct purge_node_t /** Undo recs to purge */ std::queue undo_recs; - /** map of table identifiers to table handles and meta-data locks */ - std::unordered_map> tables; + /** map of table identifiers to table handles and TABLE* object, + which is set by purge co-ordinator thread */ + std::unordered_map tables; /** Constructor */ explicit purge_node_t(que_thr_t *parent) : diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h index 2ddffa41af195..e4284bbd10026 100644 --- a/storage/innobase/include/row0vers.h +++ b/storage/innobase/include/row0vers.h @@ -65,13 +65,15 @@ bool dtuple_vcol_data_missing(const dtuple_t &tuple, @param[in,out] row the cluster index row in dtuple form @param[in] clust_index clustered index @param[in] index the secondary index -@param[in] heap heap used to build virtual dtuple. */ +@param[in] heap heap used to build virtual dtuple. +@param[in] maria_table MariaDB table object */ bool row_vers_build_clust_v_col( dtuple_t* row, dict_index_t* clust_index, dict_index_t* index, - mem_heap_t* heap); + mem_heap_t* heap, + TABLE* maria_table= nullptr); /** Build a dtuple contains virtual column data for current cluster index @param[in] rec cluster index rec @param[in] clust_index cluster index @@ -83,6 +85,7 @@ row_vers_build_clust_v_col( @param[in,out] heap heap memory @param[in,out] v_heap heap memory to keep virtual column tuple @param[in,out] mtr mini-transaction +@param[in] maria_table MariaDB table object @return dtuple contains virtual column data */ dtuple_t* row_vers_build_cur_vrow( @@ -94,7 +97,8 @@ row_vers_build_cur_vrow( roll_ptr_t roll_ptr, mem_heap_t* heap, mem_heap_t* v_heap, - mtr_t* mtr); + mtr_t* mtr, + TABLE* maria_table= nullptr); /*****************************************************************//** Constructs the version of a clustered index record which a consistent diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index 0222139018972..f731af7028174 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -29,6 +29,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0sys.h" #include "que0types.h" #include "srw_lock.h" +#include "row0purge.h" #include #include @@ -230,6 +231,15 @@ class purge_sys_t trx_rseg_t* rseg; /*!< Rollback segment for the next undo record to purge */ private: + /** Coordinator thread's THD during batch processing. + The coordinator thread sets this at the start of trx_purge() + and clears it at the end. This is being used in + purge_node_t::end() to determine whether InnoDB should + call reset_thd(). The coordinator skips this call + because it manages table cleanup centrally in + trx_purge() after all workers complete. */ + THD* coordinator_thd= nullptr; + uint32_t page_no; /*!< Page number for the next undo record to purge, page number of the log header, if dummy record */ @@ -316,8 +326,12 @@ class purge_sys_t /** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */ void resume(); - /** Close and reopen all tables in case of a MDL conflict with DDL */ - dict_table_t *close_and_reopen(table_id_t id, THD *thd, MDL_ticket **mdl); + /** Close and reopen all tables in case of a MDL conflict with DDL + @param id table identifier that triggered reopen + @param thd coordinator thread + @return purge_table for the reopened table, or empty on error */ + purge_table close_and_reopen(table_id_t id, THD *thd) noexcept; + private: /** Suspend purge during a DDL operation on FULLTEXT INDEX tables */ void wait_FTS(bool also_sys); @@ -417,7 +431,7 @@ class purge_sys_t /** A wrapper around trx_sys_t::clone_oldest_view(). */ template - void clone_oldest_view() + void clone_oldest_view(THD *thd) { if (!also_end_view) wait_FTS(true); @@ -427,6 +441,7 @@ class purge_sys_t (end_view= view). clamp_low_limit_id(head.trx_no ? head.trx_no : tail.trx_no); latch.wr_unlock(); + coordinator_thd= thd; } /** Wake up the purge threads if there is work to do. */ @@ -476,6 +491,12 @@ class purge_sys_t marked for truncate. @param space undo tablespace being truncated */ void cleanse_purge_queue(const fil_space_t &space); + + /** Reset the state of a purge_worker_task at the end of a batch */ + inline void reset_worker_thd(THD *thd) const noexcept; + + /** Set coordinator thread */ + inline void set_coordinator_thd(THD *thd) noexcept; }; /** The global data structure coordinating a purge */ diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 44e79d17e97b1..8537763bd239f 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -50,6 +50,7 @@ Created 3/14/1997 Heikki Tuuri #include "debug_sync.h" #include +void reset_thd(MYSQL_THD thd); /************************************************************************* IMPORTANT NOTE: Any operation that generates redo MUST check that there is enough space in the redo log before for that operation. This is @@ -81,7 +82,7 @@ row_purge_reposition_pcur( } else { node->found_clust = row_search_on_row_ref( - &node->pcur, mode, node->table, node->ref, mtr); + &node->pcur, mode, node->pt.table, node->ref, mtr); if (node->found_clust) { btr_pcur_store_position(&node->pcur, mtr); @@ -107,7 +108,7 @@ row_purge_remove_clust_if_poss_low( purge_node_t* node, /*!< in/out: row purge node */ btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */ { - dict_index_t* index = dict_table_get_first_index(node->table); + dict_index_t* index = dict_table_get_first_index(node->pt.table); table_id_t table_id = 0; index_id_t index_id = 0; dict_table_t *table = nullptr; @@ -146,7 +147,7 @@ row_purge_remove_clust_if_poss_low( return success; } - if (node->table->id == DICT_INDEXES_ID) { + if (node->pt.table->id == DICT_INDEXES_ID) { /* If this is a record of the SYS_INDEXES table, then we have to free the file segments of the index tree associated with the index */ @@ -489,15 +490,12 @@ static bool row_purge_is_unsafe(const purge_node_t &node, dtuple_t* cur_vrow = NULL; ut_ad(index->table == clust_index->table); + ut_ad(node.pt.table == index->table); heap = mem_heap_create(1024); clust_offsets = rec_get_offsets(rec, clust_index, NULL, clust_index->n_core_fields, ULINT_UNDEFINED, &heap); - if (dict_index_has_virtual(index)) { - v_heap = mem_heap_create(100); - } - if (!rec_get_deleted_flag(rec, rec_offs_comp(clust_offsets))) { row_ext_t* ext; @@ -534,7 +532,8 @@ static bool row_purge_is_unsafe(const purge_node_t &node, || dbug_v_purge) { if (!row_vers_build_clust_v_col( - row, clust_index, index, heap)) { + row, clust_index, index, heap, + node.pt.get_maria_table())) { goto unsafe_to_purge; } @@ -610,10 +609,12 @@ static bool row_purge_is_unsafe(const purge_node_t &node, deleted, but the previous version of it might not. We will need to get the virtual column data from undo record associated with current cluster index */ + v_heap = mem_heap_create(100); cur_vrow = row_vers_build_cur_vrow( rec, clust_index, &clust_offsets, - index, trx_id, roll_ptr, heap, v_heap, mtr); + index, trx_id, roll_ptr, heap, v_heap, mtr, + node.pt.get_maria_table()); } version = rec; @@ -655,7 +656,11 @@ static bool row_purge_is_unsafe(const purge_node_t &node, } /* Keep the virtual row info for the next version, unless it is changed */ - mem_heap_empty(v_heap); + if (v_heap) { + mem_heap_empty(v_heap); + } else { + v_heap = mem_heap_create(100); + } cur_vrow = dtuple_copy(vrow, v_heap); dtuple_dup_v_fld(cur_vrow, v_heap); } @@ -901,7 +906,7 @@ static trx_id_t row_purge_remove_sec_if_poss_leaf(purge_node_t *node, trx_id_t page_max_trx_id = 0; log_free_check(); - ut_ad(index->table == node->table); + ut_ad(index->table == node->pt.table); ut_ad(!index->table->is_temporary()); mtr.start(); index->set_modified(mtr); @@ -1076,7 +1081,7 @@ static void row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr) if (row_purge_reposition_pcur(BTR_MODIFY_LEAF, node, mtr)) { dict_index_t* index = dict_table_get_first_index( - node->table); + node->pt.table); ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; rec_t* rec = btr_pcur_get_rec(&node->pcur); mem_heap_t* heap = NULL; @@ -1152,7 +1157,7 @@ row_purge_upd_exist_or_extern_func( { mem_heap_t* heap; - ut_ad(!node->table->skip_alter_undo); + ut_ad(!node->pt.table->skip_alter_undo); if (node->rec_type == TRX_UNDO_UPD_DEL_REC || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) @@ -1180,7 +1185,7 @@ row_purge_upd_exist_or_extern_func( heap, ROW_BUILD_FOR_PURGE); row_purge_remove_sec_if_poss(node, node->index, entry); - ut_ad(node->table); + ut_ad(node->pt.table); mem_heap_empty(heap); } @@ -1190,7 +1195,7 @@ row_purge_upd_exist_or_extern_func( skip_secondaries: mtr_t mtr; - dict_index_t* index = dict_table_get_first_index(node->table); + dict_index_t* index = dict_table_get_first_index(node->pt.table); /* Free possible externally stored fields */ for (ulint i = 0; i < upd_get_n_fields(node->update); i++) { @@ -1461,27 +1466,20 @@ row_purge_parse_undo_rec( break; } - auto &tables_entry= node->tables[table_id]; - node->table = tables_entry.first; - if (!node->table) { + node->pt = node->tables[table_id]; + if (!node->pt.table) { return false; } -#ifndef DBUG_OFF - if (MDL_ticket* mdl = tables_entry.second) { - static_cast(thd_mdl_context(current_thd)) - ->lock_warrant = mdl->get_ctx(); - } -#endif - ut_ad(!node->table->is_temporary()); + ut_ad(!node->pt.table->is_temporary()); - clust_index = dict_table_get_first_index(node->table); + clust_index = dict_table_get_first_index(node->pt.table); if (clust_index->is_corrupted()) { /* The table was corrupt in the data dictionary. dict_set_corrupted() works on an index, and we do not have an index to call it with. */ - DBUG_ASSERT(table_id == node->table->id); + DBUG_ASSERT(table_id == node->pt.table->id); return false; } @@ -1535,11 +1533,11 @@ row_purge_record_func( bool updated_extern) { ut_ad(!node->found_clust); - ut_ad(!node->table->skip_alter_undo); + ut_ad(!node->pt.table->skip_alter_undo); ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); node->index = dict_table_get_next_index( - dict_table_get_first_index(node->table)); + dict_table_get_first_index(node->pt.table)); bool purged = true; @@ -1549,10 +1547,10 @@ row_purge_record_func( case TRX_UNDO_DEL_MARK_REC: purged = row_purge_del_mark(node); if (purged) { - if (node->table->stat_initialized + if (node->pt.table->stat_initialized && srv_stats_include_delete_marked) { dict_stats_update_if_needed( - node->table, *thr->graph->trx); + node->pt.table, *thr->graph->trx); } MONITOR_INC(MONITOR_N_DEL_ROW_PURGE); } @@ -1636,6 +1634,22 @@ inline void purge_node_t::start() cmpl_info= 0; } +inline void purge_sys_t::reset_worker_thd(THD *thd) const noexcept +{ + /* Only reset THD for worker threads, not the coordinator. + The coordinator thread opens TABLE* objects in + trx_purge_attach_undo_recs() and stores them in + purge_node_t->tables. These TABLE* objects must remain open until + the entire purge batch completes. Coordinator thread could + close the tables prematurely if it calls reset_thd() + The coordinator handles cleanup centrally in trx_purge() after all + purge_node_t entries are processed. Worker threads have their own + THD lifecycle and must call reset_thd() to clean up their + thread-local resources. */ + if (thd != coordinator_thd) + reset_thd(thd); +} + /** Reset the state at end @return the query graph parent */ inline que_node_t *purge_node_t::end(THD *thd) @@ -1643,7 +1657,7 @@ inline que_node_t *purge_node_t::end(THD *thd) DBUG_ASSERT(common.type == QUE_NODE_PURGE); ut_ad(undo_recs.empty()); ut_d(in_progress= false); - innobase_reset_background_thd(thd); + purge_sys.reset_worker_thd(thd); #ifndef DBUG_OFF static_cast(thd_mdl_context(thd))->lock_warrant= nullptr; #endif diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index 617aab8a967e4..c967dd1510186 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -459,10 +459,10 @@ row_vers_build_clust_v_col( dtuple_t* row, dict_index_t* clust_index, dict_index_t* index, - mem_heap_t* heap) + mem_heap_t* heap, + TABLE* maria_table) { THD* thd= current_thd; - TABLE* maria_table= 0; ut_ad(dict_index_has_virtual(index)); ut_ad(index->table == clust_index->table); @@ -632,7 +632,8 @@ row_vers_build_cur_vrow( roll_ptr_t roll_ptr, mem_heap_t* heap, mem_heap_t* v_heap, - mtr_t* mtr) + mtr_t* mtr, + TABLE* maria_table) { dtuple_t* cur_vrow = NULL; @@ -653,7 +654,7 @@ row_vers_build_cur_vrow( NULL, NULL, NULL, NULL, heap); if (!row_vers_build_clust_v_col(row, clust_index, index, - heap)) { + heap, maria_table)) { return nullptr; } diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 8ac3f93cd1557..e779cfc75de7b 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -43,6 +43,16 @@ Created 3/26/1996 Heikki Tuuri #include #include "log.h" +TABLE *open_purge_table(THD *thd, const char *db, size_t dblen, + const char *tb, size_t tblen, + MDL_ticket *mdl_ticket) noexcept; +int close_thread_tables(THD* thd) noexcept; +MDL_ticket *get_mdl_ticket(TABLE *table) noexcept; + +#ifdef UNIV_DEBUG +unsigned long long thd_get_query_id(const MYSQL_THD thd); +#endif /* UNIV_DEBUG */ + /** Maximum allowable purge history length. <=0 means 'infinite'. */ ulong srv_max_purge_lag = 0; @@ -1047,29 +1057,65 @@ inline trx_purge_rec_t purge_sys_t::fetch_next_rec() return get_next_rec(roll_ptr); } +inline MDL_ticket *purge_table::get_ticket() const noexcept +{ + if (TABLE* t= get_maria_table()) + return get_mdl_ticket(t); + return ticket; +} + +inline void purge_table::set_mariadb_table(TABLE *t) noexcept +{ + mariadb_table= reinterpret_cast(t) | 1; +} + +inline void purge_table::set_ticket(MDL_ticket *t) noexcept +{ + ticket= t; +} + /** Close all tables that were opened in a purge batch for a worker. -@param node purge task context -@param thd purge coordinator thread handle */ -static void trx_purge_close_tables(purge_node_t *node, THD *thd) noexcept +@param thd purge coordinator thread handle +@param batch_cleanup if true, clears the list of opened tables + in the purge node. */ +static void trx_purge_close_tables(THD *thd, bool batch_cleanup=false) noexcept { - for (auto &t : node->tables) + MDL_context *mdl_context = static_cast(thd_mdl_context(thd)); + std::vector mdl_tickets; + for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; + thr= UT_LIST_GET_NEXT(thrs, thr)) { - dict_table_t *table= t.second.first; - if (table != nullptr && table != reinterpret_cast(-1)) - dict_table_close(table); + purge_node_t* node = static_cast(thr->child); + for (auto &t : node->tables) + { + purge_table& pt = t.second; + if (pt.table && !pt.must_wait()) + { + if (pt.get_maria_table()) + { + pt.table->lock_mutex_lock(); + pt.table->vc_templ->mysql_table= nullptr; + pt.table->vc_templ->mysql_table_query_id= 0; + pt.table->lock_mutex_unlock(); + } + + dict_table_close(pt.table); + pt.table= nullptr; + MDL_ticket *ticket= t.second.get_ticket(); + if (ticket) + mdl_tickets.push_back(ticket); + } + } + if (batch_cleanup) + node->tables.clear(); } - MDL_context *mdl_context= static_cast(thd_mdl_context(thd)); + close_thread_tables(thd); - for (auto &t : node->tables) + for (auto mdl : mdl_tickets) { - dict_table_t *table= t.second.first; - if (table != nullptr && table != reinterpret_cast(-1)) - { - t.second.first= reinterpret_cast(-1); - if (mdl_context != nullptr && t.second.second != nullptr) - mdl_context->release_lock(t.second.second); - } + if (mdl && mdl_context) + mdl_context->release_lock(mdl); } } @@ -1136,81 +1182,105 @@ static dict_table_t *trx_purge_table_acquire(dict_table_t *table, /** Open a table handle for the purge of committed transaction history @param table_id InnoDB table identifier @param mdl_context metadata lock acquisition context -@param mdl metadata lock -@return table handle -@retval nullptr if the table is not found or accessible -@retval -1 if the purge of history must be suspended due to DDL */ -static dict_table_t *trx_purge_table_open(table_id_t table_id, - MDL_context *mdl_context, - MDL_ticket **mdl) noexcept +@return purge_table with dict_table_t* and TABLE* (or)MDL_ticket, +possibly with must_wait() || table == nullptr */ +static purge_table trx_purge_table_open(table_id_t table_id, + MDL_context *mdl_context) noexcept { - dict_table_t *table; + purge_table result; + MDL_ticket *mdl= nullptr; for (;;) { dict_sys.freeze(SRW_LOCK_CALL); - table= dict_sys.find_table(table_id); - if (table) + result.table= dict_sys.find_table(table_id); + if (result.table) break; dict_sys.unfreeze(); dict_sys.lock(SRW_LOCK_CALL); - table= dict_load_table_on_id(table_id, DICT_ERR_IGNORE_FK_NOKEY); + result.table= dict_load_table_on_id(table_id, DICT_ERR_IGNORE_FK_NOKEY); dict_sys.unlock(); - if (!table) - return nullptr; + if (!result.table) + return result; /* At this point, the freshly loaded table may already have been evicted. We must look it up again while holding a shared dict_sys.latch. We keep trying this until the table is found in the cache or it cannot be found in the dictionary (because the table has been dropped or rebuilt). */ } - table= trx_purge_table_acquire(table, mdl_context, mdl); + result.table= trx_purge_table_acquire(result.table, mdl_context, &mdl); dict_sys.unfreeze(); - return table; + + if (mdl && result.table->has_virtual_index()) + { + char db_buf[NAME_LEN + 1]; + char tbl_buf[NAME_LEN + 1]; + size_t db_len, tbl_len; + + if (result.table->parse_name(db_buf, tbl_buf, &db_len, &tbl_len)) + { + THD *thd= current_thd; + TABLE *maria_table= open_purge_table(thd, db_buf, db_len, + tbl_buf, tbl_len, mdl); + if (maria_table) + { + if (result.table->vc_templ) + { + ut_ad(thd_get_query_id(thd) == 0); + result.table->lock_mutex_lock(); + result.table->vc_templ->mysql_table= maria_table; + result.table->vc_templ->mysql_table_query_id= 0; + result.table->lock_mutex_unlock(); + } + result.set_mariadb_table(maria_table); + return result; + } + } + } + result.set_ticket(mdl); + return result; } ATTRIBUTE_COLD -dict_table_t *purge_sys_t::close_and_reopen(table_id_t id, THD *thd, - MDL_ticket **mdl) +purge_table purge_sys_t::close_and_reopen(table_id_t id, THD *thd) noexcept { MDL_context *mdl_context= static_cast(thd_mdl_context(thd)); ut_ad(mdl_context); - retry: +retry: ut_ad(m_active); - - for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; - thr= UT_LIST_GET_NEXT(thrs, thr)) - trx_purge_close_tables(static_cast(thr->child), thd); + trx_purge_close_tables(thd); m_active= false; wait_FTS(false); m_active= true; - dict_table_t *table= trx_purge_table_open(id, mdl_context, mdl); - if (table == reinterpret_cast(-1)) + purge_table pt= trx_purge_table_open(id, mdl_context); + if (pt.must_wait()) goto retry; - for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; + /* Reopen all other tables from all nodes */ + for (que_thr_t *thr= UT_LIST_GET_FIRST(query->thrs); thr; thr= UT_LIST_GET_NEXT(thrs, thr)) { purge_node_t *node= static_cast(thr->child); - for (auto &t : node->tables) + for (auto it = node->tables.begin(); it != node->tables.end(); ) { - if (t.second.first) + it->second= trx_purge_table_open(it->first, mdl_context); + if (it->second.must_wait()) { - t.second.first= trx_purge_table_open(t.first, mdl_context, - &t.second.second); - if (t.second.first == reinterpret_cast(-1)) - { - if (table) - dict_table_close(table, false, thd, *mdl); - goto retry; - } + if (pt.table) + dict_table_close(pt.table, false, thd, pt.get_ticket()); + goto retry; } +#ifndef DBUG_OFF + if (MDL_ticket *mdl= it->second.get_ticket()) + static_cast(thd_mdl_context(thd))->lock_warrant= + mdl->get_ctx(); +#endif + ++it; } } - - return table; + return pt; } /** Run a purge batch. @@ -1259,29 +1329,33 @@ static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, } table_id_t table_id= trx_undo_rec_get_table_id(purge_rec.undo_rec); - purge_node_t *&table_node= table_id_map[table_id]; if (table_node) ut_ad(!table_node->in_progress); if (!table_node) { - std::pair p; - p.first= trx_purge_table_open(table_id, mdl_context, &p.second); - if (p.first == reinterpret_cast(-1)) - p.first= purge_sys.close_and_reopen(table_id, thd, &p.second); + purge_table pt= trx_purge_table_open(table_id, mdl_context); + if (pt.must_wait()) + pt= purge_sys.close_and_reopen(table_id, thd); if (!thr || !(thr= UT_LIST_GET_NEXT(thrs, thr))) thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); ++*n_work_items; table_node= static_cast(thr->child); - ut_a(que_node_get_type(table_node) == QUE_NODE_PURGE); - ut_d(auto pair=) table_node->tables.emplace(table_id, p); - ut_ad(pair.second); - if (p.first) + + table_node->tables.emplace(table_id, pt); + if (pt.table) + { +#ifndef DBUG_OFF + if (MDL_ticket *mdl= pt.get_ticket()) + static_cast(thd_mdl_context(thd))->lock_warrant= + mdl->get_ctx(); +#endif goto enqueue; + } } - else if (table_node->tables[table_id].first) + else if (table_node->tables[table_id].table) { enqueue: table_node->undo_recs.push(purge_rec); @@ -1365,6 +1439,13 @@ void purge_sys_t::batch_cleanup(const purge_sys_t::iterator &head) #ifdef SUX_LOCK_GENERIC end_latch.wr_unlock(); #endif + coordinator_thd= nullptr; +} + + +inline void purge_sys_t::set_coordinator_thd(THD *thd) noexcept +{ + coordinator_thd= thd; } /** @@ -1376,11 +1457,11 @@ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size) { ut_ad(n_tasks > 0); - purge_sys.clone_oldest_view(); + THD *const thd= current_thd; - ut_d(if (srv_purge_view_update_only_debug) return 0); + purge_sys.clone_oldest_view(thd); - THD *const thd= current_thd; + ut_d(if (srv_purge_view_update_only_debug) return 0); /* Fetch the UNDO recs that need to be purged. */ ulint n_work= 0; @@ -1410,12 +1491,12 @@ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size) for (auto i= n_work; i--; ) { if (!thr) - thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); + thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); else - thr= UT_LIST_GET_NEXT(thrs, thr); + thr= UT_LIST_GET_NEXT(thrs, thr); if (!thr) - break; + break; ut_ad(thr->state == QUE_THR_COMPLETED); thr->state= QUE_THR_RUNNING; @@ -1444,13 +1525,7 @@ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size) if (workers) trx_purge_wait_for_workers_to_complete(); - for (thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr && n_work--; - thr= UT_LIST_GET_NEXT(thrs, thr)) - { - purge_node_t *node= static_cast(thr->child); - trx_purge_close_tables(node, thd); - node->tables.clear(); - } + trx_purge_close_tables(thd, true); } purge_sys.batch_cleanup(head); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index e2adc085c5397..109d5c8b27751 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -779,7 +779,7 @@ dberr_t trx_lists_init_at_db_start() ib::info() << "Trx id counter is " << trx_sys.get_max_trx_id(); } - purge_sys.clone_oldest_view(); + purge_sys.clone_oldest_view(nullptr); return DB_SUCCESS; }