diff --git a/mysql-test/enableCases.list b/mysql-test/enableCases.list new file mode 100644 index 0000000000000000000000000000000000000000..f084231e125f74ca333bd340ebd2a7ae0d008bb2 --- /dev/null +++ b/mysql-test/enableCases.list @@ -0,0 +1 @@ +recreate_test_db.test \ No newline at end of file diff --git a/mysql-test/mysql-source-code-meta.patch b/mysql-test/mysql-source-code-meta.patch index bca87731e65301ba860e3a4e2906d8fa45aa08a6..5daca0ed6d492efc9d3c53e8690bd0bda375a282 100644 --- a/mysql-test/mysql-source-code-meta.patch +++ b/mysql-test/mysql-source-code-meta.patch @@ -109,6 +109,30 @@ index dbd6c18..4666753 100644 /** Remove table statistics entries from mysql.table_stats and +diff --git a/sql/dd/dd_resource_group.cc b/sql/dd/dd_resource_group.cc +index ab99cee..933b86d 100644 +--- a/sql/dd/dd_resource_group.cc ++++ b/sql/dd/dd_resource_group.cc +@@ -32,6 +32,7 @@ + #include "sql/sql_class.h" // THD + #include "sql/thd_raii.h" + #include "sql/transaction.h" // trans_commit ++#include "sql/mysqld.h" + + namespace dd { + +@@ -112,6 +113,11 @@ bool create_resource_group(THD *thd, + + bool update_resource_group(THD *thd, const String_type &resource_grp_name, + const resourcegroups::Resource_group &res_grp_ref) { ++ int cluster_role = tse_hton->get_cluster_role(); ++ if (cluster_role == 1) { ++ // Do not update resrouce_group table when starting on slave cluster. ++ return false; ++ } + DBUG_TRACE; + + dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client()); diff --git a/sql/dd/dd_table.cc b/sql/dd/dd_table.cc index 7bf60f4..7be1743 100644 --- a/sql/dd/dd_table.cc @@ -156,10 +180,18 @@ index 7f9b9df..1d5575d 100644 LogErr(WARNING_LEVEL, ER_SKIP_UPDATING_METADATA_IN_SE_RO_MODE, schema_name_abbrev); diff --git a/sql/dd/impl/bootstrap/bootstrapper.cc b/sql/dd/impl/bootstrap/bootstrapper.cc -index d08718a..6440b0b 100644 +index d08718a..6f0d3ca 100644 --- a/sql/dd/impl/bootstrap/bootstrapper.cc +++ b/sql/dd/impl/bootstrap/bootstrapper.cc -@@ -81,7 +81,7 @@ namespace { +@@ -71,6 +71,7 @@ + #include "sql/mysqld.h" + #include "sql/sd_notify.h" // sysd::notify + #include "sql/thd_raii.h" ++#include "sql/mysqld.h" + + using namespace dd; + +@@ -81,7 +82,7 @@ namespace { // Initialize recovery in the DDSE. bool DDSE_dict_recover(THD *thd, dict_recovery_mode_t dict_recovery_mode, uint version) { @@ -168,7 +200,7 @@ index d08718a..6440b0b 100644 if (ddse->dict_recover == nullptr) return true; bool error = ddse->dict_recover(dict_recovery_mode, version); -@@ -266,6 +266,9 @@ bool acquire_exclusive_mdl(THD *thd) { +@@ -266,6 +267,9 @@ bool acquire_exclusive_mdl(THD *thd) { reset ID, store persistently, and update the storage adapter. */ bool flush_meta_data(THD *thd) { @@ -178,7 +210,7 @@ index d08718a..6440b0b 100644 // Acquire exclusive meta data locks for the relevant DD objects. if (acquire_exclusive_mdl(thd)) return true; -@@ -573,6 +576,9 @@ bool flush_meta_data(THD *thd) { +@@ -573,6 +577,9 @@ bool flush_meta_data(THD *thd) { if (persisted_dd_table != nullptr) delete persisted_dd_table; } @@ -188,7 +220,7 @@ index d08718a..6440b0b 100644 bootstrap::DD_bootstrap_ctx::instance().set_stage(bootstrap::Stage::SYNCED); return dd::end_transaction(thd, false); -@@ -580,6 +586,8 @@ bool flush_meta_data(THD *thd) { +@@ -580,6 +587,8 @@ bool flush_meta_data(THD *thd) { // Insert additional data into the DD tables. bool populate_tables(THD *thd) { @@ -197,7 +229,7 @@ index d08718a..6440b0b 100644 // Iterate over DD tables, populate tables. for (System_tables::Const_iterator it = System_tables::instance()->begin(); it != System_tables::instance()->end(); ++it) { -@@ -610,6 +618,9 @@ bool populate_tables(THD *thd) { +@@ -610,6 +619,9 @@ bool populate_tables(THD *thd) { bootstrap::DD_bootstrap_ctx::instance().set_stage( bootstrap::Stage::POPULATED); @@ -207,16 +239,21 @@ index d08718a..6440b0b 100644 return false; } -@@ -621,7 +632,7 @@ bool repopulate_charsets_and_collations(THD *thd) { +@@ -621,7 +633,12 @@ bool repopulate_charsets_and_collations(THD *thd) { to retrieve the handlerton for the DDSE should be replaced by a more generic mechanism. */ - handlerton *ddse = ha_resolve_by_legacy_type(thd, DB_TYPE_INNODB); ++ int cluster_role = tse_hton->get_cluster_role(); ++ if (cluster_role == 1) { ++ // Do not update the charset table in slave cluster. ++ return false; ++ } + handlerton *ddse = ha_resolve_by_legacy_type(thd, DB_TYPE_CTC); if (ddse->is_dict_readonly && ddse->is_dict_readonly()) { LogErr(WARNING_LEVEL, ER_DD_NO_WRITES_NO_REPOPULATION, "InnoDB", " "); return false; -@@ -724,7 +735,7 @@ namespace bootstrap { +@@ -724,7 +741,7 @@ namespace bootstrap { predefined tables and tablespaces. */ bool DDSE_dict_init(THD *thd, dict_init_mode_t dict_init_mode, uint version) { @@ -225,7 +262,7 @@ index d08718a..6440b0b 100644 /* The lists with element wrappers are mem root allocated. The wrapped -@@ -908,6 +919,10 @@ bool initialize(THD *thd) { +@@ -908,6 +925,10 @@ bool initialize(THD *thd) { // Normal server restart. bool restart(THD *thd) { @@ -236,7 +273,7 @@ index d08718a..6440b0b 100644 bootstrap::DD_bootstrap_ctx::instance().set_stage(bootstrap::Stage::STARTED); /* -@@ -946,7 +961,9 @@ bool restart(THD *thd) { +@@ -946,7 +967,9 @@ bool restart(THD *thd) { dd::execute_query(thd, "DROP SCHEMA schema_read_only") || dd::execute_query(thd, "CREATE TABLE IF NOT EXISTS S.restart(i INT)")) assert(false);); @@ -247,7 +284,7 @@ index d08718a..6440b0b 100644 bootstrap::DD_bootstrap_ctx::instance().set_stage(bootstrap::Stage::FINISHED); LogErr(INFORMATION_LEVEL, ER_DD_VERSION_FOUND, d->get_actual_dd_version(thd)); -@@ -1502,7 +1519,7 @@ bool sync_meta_data(THD *thd) { +@@ -1502,7 +1525,7 @@ bool sync_meta_data(THD *thd) { return true; // Reset the DDSE local dictionary cache. @@ -256,7 +293,7 @@ index d08718a..6440b0b 100644 if (ddse->dict_cache_reset == nullptr) return true; for (System_tables::Const_iterator it = System_tables::instance()->begin(); -@@ -1797,7 +1814,7 @@ bool update_versions(THD *thd, bool is_dd_upgrade_57) { +@@ -1797,7 +1820,7 @@ bool update_versions(THD *thd, bool is_dd_upgrade_57) { back in case of an abort, so this better be the last step we do before committing. */ @@ -973,7 +1010,7 @@ index 217a0c1..fe900eb 100644 if (unlikely(error)) return error; diff --git a/sql/handler.h b/sql/handler.h -index 0530ca1..214027d 100644 +index 0530ca1..bab9b08 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -662,7 +662,8 @@ enum legacy_db_type { @@ -1004,7 +1041,7 @@ index 0530ca1..214027d 100644 enum_binlog_command binlog_command, const char *query, uint query_length, const char *db, const char *table_name); -@@ -2133,6 +2134,13 @@ typedef bool (*check_fk_column_compat_t)( +@@ -2133,6 +2134,14 @@ typedef bool (*check_fk_column_compat_t)( typedef bool (*is_reserved_db_name_t)(handlerton *hton, const char *name); @@ -1014,11 +1051,12 @@ index 0530ca1..214027d 100644 +typedef int (*get_metadata_status_t)(); +typedef int (*op_before_load_meta_t)(THD *thd); +typedef int (*op_after_load_meta_t)(THD *thd); ++typedef int (*get_cluster_role_t)(); + /** Prepare the secondary engine for executing a statement. This function is called right after the secondary engine TABLE objects have been opened by -@@ -2420,6 +2428,8 @@ struct handlerton { +@@ -2420,6 +2429,8 @@ struct handlerton { */ uint savepoint_offset; @@ -1027,7 +1065,7 @@ index 0530ca1..214027d 100644 /* handlerton methods */ close_connection_t close_connection; -@@ -2464,6 +2474,14 @@ struct handlerton { +@@ -2464,6 +2475,15 @@ struct handlerton { dict_set_server_version_t dict_set_server_version; is_reserved_db_name_t is_reserved_db_name; @@ -1038,11 +1076,12 @@ index 0530ca1..214027d 100644 + get_metadata_status_t get_metadata_status; + op_before_load_meta_t op_before_load_meta; + op_after_load_meta_t op_after_load_meta; ++ get_cluster_role_t get_cluster_role; + /** Global handler flags. */ uint32 flags{0}; -@@ -4482,7 +4500,7 @@ class handler { +@@ -4482,7 +4502,7 @@ class handler { and delete_row() below. */ int ha_external_lock(THD *thd, int lock_type); @@ -1051,7 +1090,7 @@ index 0530ca1..214027d 100644 /** Update the current row. -@@ -6161,12 +6179,13 @@ class handler { +@@ -6161,12 +6181,13 @@ class handler { } @param buf Buffer to write from. @@ -1066,7 +1105,7 @@ index 0530ca1..214027d 100644 return HA_ERR_WRONG_COMMAND; } -@@ -6178,6 +6197,7 @@ class handler { +@@ -6178,6 +6199,7 @@ class handler { the columns required for the error message are not read, the error message will contain garbage. */ @@ -1074,7 +1113,7 @@ index 0530ca1..214027d 100644 virtual int update_row(const uchar *old_data MY_ATTRIBUTE((unused)), uchar *new_data MY_ATTRIBUTE((unused))) { return HA_ERR_WRONG_COMMAND; -@@ -6874,7 +6894,7 @@ void ha_pre_dd_shutdown(void); +@@ -6874,7 +6896,7 @@ void ha_pre_dd_shutdown(void); @retval true Error */ bool ha_flush_logs(bool binlog_group_flush = false); @@ -1083,7 +1122,7 @@ index 0530ca1..214027d 100644 int ha_create_table(THD *thd, const char *path, const char *db, const char *table_name, HA_CREATE_INFO *create_info, bool update_create_info, bool is_temp_table, -@@ -6973,7 +6993,7 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht, +@@ -6973,7 +6995,7 @@ void trans_register_ha(THD *thd, bool all, handlerton *ht, int ha_reset_logs(THD *thd); int ha_binlog_index_purge_file(THD *thd, const char *file); void ha_reset_slave(THD *thd); diff --git a/mysql-test/mysql-test-run-ST.pl b/mysql-test/mysql-test-run-ST.pl index ba2f10232713134b885b31a1aae87950da2e31d3..9c330a7f6a5b33564222900cb8c3425d8eabb7a9 100644 --- a/mysql-test/mysql-test-run-ST.pl +++ b/mysql-test/mysql-test-run-ST.pl @@ -33,8 +33,8 @@ # https://dev.mysql.com/doc/dev/mysql-server/latest/PAGE_MYSQL_TEST_RUN.html # ############################################################################## -$ENV{MTR_BINDIR} = '/home/regress/mysql-server/bld_debug'; -chdir('/home/regress/mysql-server/mysql-test'); +$ENV{MTR_BINDIR} = '/usr/local/mysql'; +#chdir('/home/regress/mysql-server/mysql-test'); use strict; use warnings; @@ -6258,11 +6258,24 @@ sub mysqld_start ($$$$) { # Remember data dir for gmon.out files if using gprof $gprof_dirs{ $mysqld->value('datadir') } = 1 if $opt_gprof; - if (daac_start()) { - mtr_error("Failed to start daac."); - } +# if (daac_start()) { +# mtr_error("Failed to start daac."); +# } if (defined $exe) { - $mysqld->{'proc'} = `pidof mysqld`; + $mysqld->{'proc'} = + My::SafeProcess->new(name => $mysqld->name(), + path => $exe, + args => \$args, + output => $output, + error => $output, + append => 1, + verbose => $opt_verbose, + nocore => $opt_skip_core, + host => undef, + shutdown => sub { mysqld_stop($mysqld) }, + envs => \@opt_mysqld_envs, + pid_file => $pid_file, + daemon_mode => $mysqld->{'daemonize'}); mtr_verbose("Started $mysqld->{proc}"); } diff --git a/mysql-test/suite/tianchi/r/recreate_test_db.result b/mysql-test/suite/tianchi/r/recreate_test_db.result new file mode 100644 index 0000000000000000000000000000000000000000..6640698d6d5377d12d8d3f53eba3a2d3a99ee8fd --- /dev/null +++ b/mysql-test/suite/tianchi/r/recreate_test_db.result @@ -0,0 +1,2 @@ +drop database if exists test; +create database test; diff --git a/storage/tianchi/ha_tse.cc b/storage/tianchi/ha_tse.cc index 43450a9792a1362a5fdcaa9cb7a6c240ecc0a345..cb5fcd22a624cacc14da29b6155aa0a2d1d62b91 100644 --- a/storage/tianchi/ha_tse.cc +++ b/storage/tianchi/ha_tse.cc @@ -103,6 +103,7 @@ #include "sql/sql_insert.h" #include "sql/sql_plugin.h" #include "sql/sql_initialize.h" // opt_initialize_insecure +#include "sql/dd/upgrade/server.h" // UPGRADE_FORCE #include "sql/abstract_query_plan.h" #include "tse_stats.h" @@ -121,6 +122,8 @@ #include "sql/sql_table.h" #include "sql/mysqld_thd_manager.h" #include "sql/sql_backup_lock.h" +#include "ctc_meta_data.h" +#include "sql/mysqld.h" //------------------------------------------------------------------------------// // SYSTEM VARIABLES // @@ -203,6 +206,11 @@ static MYSQL_SYSVAR_UINT(autoinc_lock_mode, ctc_autoinc_lock_mode, PLUGIN_VAR_RQ "The AUTOINC lock modes supported by CTC.", nullptr, nullptr, CTC_AUTOINC_NO_LOCKING, CTC_AUTOINC_OLD_STYLE_LOCKING, CTC_AUTOINC_NO_LOCKING, 0); +uint32_t ctc_update_analyze_time = CTC_ANALYZE_TIME_SEC; +static MYSQL_SYSVAR_UINT(update_analyze_time, ctc_update_analyze_time, PLUGIN_VAR_RQCMDARG, + "CBO updating time by CTC.", nullptr, nullptr, CTC_ANALYZE_TIME_SEC, + 0, UINT32_MAX, 0); + // All global and session system variables must be published to mysqld before // use. This is done by constructing a NULL-terminated array of the variables // and linking to it in the plugin public interface. @@ -222,6 +230,7 @@ static SYS_VAR *tse_system_variables[] = { MYSQL_SYSVAR(stats_enabled), MYSQL_SYSVAR(autoinc_lock_mode), MYSQL_SYSVAR(disaster_cluster_role), + MYSQL_SYSVAR(update_analyze_time), nullptr }; @@ -364,6 +373,12 @@ bool is_meta_version_initialize() { return false; } +// 是否为--upgrade=FORCE +bool is_meta_version_upgrading_force() { + bool is_meta_normalization = CHECK_HAS_MEMBER(handlerton, get_metadata_switch); + return is_meta_normalization && (opt_upgrade_mode == UPGRADE_FORCE); +} + bool is_alter_table_scan(bool m_error_if_not_empty) { return m_error_if_not_empty; } @@ -380,7 +395,7 @@ bool engine_skip_ddl(MYSQL_THD thd) { bool engine_ddl_passthru(MYSQL_THD thd) { // 元数据归一初始化场景,接口流程需要走到参天 - if (is_meta_version_initialize()) { + if (is_meta_version_initialize() || is_meta_version_upgrading_force()) { return false; } bool is_mysql_local = user_var_set(thd, "ctc_ddl_local_enabled"); @@ -2759,9 +2774,8 @@ EXTER_ATTACK int ha_tse::update_row(const uchar *old_data, uchar *new_data) { if (!flag.no_foreign_key_check) { flag.no_cascade_check = flag.dd_update ? true : pre_check_for_cascade(true); } - bool is_mysqld_starting = is_starting(); ret = (ct_errno_t)tse_update_row(&m_tch, cantian_new_record_buf_size, tse_buf, - &upd_fields[0], upd_fields.size(), flag, &is_mysqld_starting); + &upd_fields[0], upd_fields.size(), flag); check_error_code_to_mysql(thd, &ret); // 如果m_tse_buf为空,说明tse_buf是动态申请的,在函数退出之前要释放掉 if (m_tse_buf == nullptr) { @@ -3205,7 +3219,7 @@ EXTER_ATTACK int ha_tse::rnd_pos(uchar *buf, uchar *pos) { void ha_tse::info_low() { if (m_share && m_share->cbo_stats != nullptr) { - stats.records = m_share->cbo_stats->estimate_rows; + stats.records = m_share->cbo_stats->tse_cbo_stats_table.estimate_rows; } } @@ -3820,20 +3834,30 @@ enum_alter_inplace_result ha_tse::check_if_supported_inplace_alter( @brief Construct tse range key based on mysql range key */ -void ha_tse::set_tse_range_key(tse_range_key *tse_range_key, key_range *mysql_range_key, tse_cmp_type_t default_type) { +void ha_tse::set_tse_range_key(tse_key *tse_key, key_range *mysql_range_key, bool is_min_key) { if (!mysql_range_key) { - tse_range_key->cmp_type = CMP_TYPE_UNKNOWN; - tse_range_key->len = 0; + tse_key->cmp_type = CMP_TYPE_NULL; + tse_key->len = 0; + tse_key->col_map = 0; return; } - tse_range_key->col_map = mysql_range_key->keypart_map; - tse_range_key->key = (const char *)mysql_range_key->key; - tse_range_key->len = mysql_range_key->length; - if (mysql_range_key->flag == HA_READ_KEY_EXACT) { - tse_range_key->cmp_type = CMP_TYPE_EQUAL; - } else { - tse_range_key->cmp_type = default_type; + tse_key->col_map = mysql_range_key->keypart_map; + tse_key->key = mysql_range_key->key; + tse_key->len = mysql_range_key->length; + + switch(mysql_range_key->flag) { + case HA_READ_KEY_EXACT: + tse_key->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + break; + case HA_READ_BEFORE_KEY: + tse_key->cmp_type = CMP_TYPE_OPEN_INTERNAL; + break; + case HA_READ_AFTER_KEY: + tse_key->cmp_type = is_min_key ? CMP_TYPE_OPEN_INTERNAL : CMP_TYPE_CLOSE_INTERNAL; + break; + default: + tse_key->cmp_type = CMP_TYPE_NULL; } } @@ -3853,22 +3877,32 @@ void ha_tse::set_tse_range_key(tse_range_key *tse_range_key, key_range *mysql_ra ha_rows ha_tse::records_in_range(uint inx, key_range *min_key, key_range *max_key) { DBUG_TRACE; - tse_range_key tse_min_key; - tse_range_key tse_max_key; - set_tse_range_key(&tse_min_key, min_key, CMP_TYPE_GREAT); - set_tse_range_key(&tse_max_key, max_key, CMP_TYPE_LESS); + tse_key tse_min_key; + tse_key tse_max_key; + set_tse_range_key(&tse_min_key, min_key, true); + set_tse_range_key(&tse_max_key, max_key, false); + if (tse_max_key.len < tse_min_key.len) { + tse_max_key.cmp_type = CMP_TYPE_NULL; + } else if (tse_max_key.len > tse_min_key.len) { + tse_min_key.cmp_type = CMP_TYPE_NULL; + } + tse_range_key key = {&tse_min_key, &tse_max_key}; uint64_t n_rows = 0; - part_info_t part_info = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}; + double density; if (m_share) { - double density = calc_density_one_table(inx, &tse_min_key, &tse_max_key, part_info, m_share->cbo_stats, *table); + if (!m_share->cbo_stats->is_updated) { + tse_log_debug("table %s has not been analyzed", table->alias); + density = DEFAULT_RANGE_DENSITY; + } + density = calc_density_one_table(inx, &key, m_share->cbo_stats->tse_cbo_stats_table, *table); /* * This is a safe-guard logic since we don't handle tse call error in this method, * we need this to make sure that our optimizer continue to work even when we * miscalculated the density, and it's still prefer index read */ - n_rows += m_share->cbo_stats->estimate_rows * density; + n_rows += m_share->cbo_stats->tse_cbo_stats_table.estimate_rows * density; } /* @@ -3974,27 +4008,6 @@ THR_LOCK_DATA **ha_tse::store_lock(THD *, THR_LOCK_DATA **to, return to; } - -int tse_query_cluster_role(bool *is_slave, bool *cantian_cluster_ready) { - void *shm_inst = get_one_shm_inst(NULL); - query_cluster_role_request *req = (query_cluster_role_request*) alloc_share_mem(shm_inst, sizeof(query_cluster_role_request)); - DBUG_EXECUTE_IF("check_init_shm_oom", { req = NULL; }); - if (req == NULL) { - tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst, sizeof(query_cluster_role_request)); - return ERR_ALLOC_MEMORY; - } - - int result = ERR_CONNECTION_FAILED; - int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_QUERY_CLUSTER_ROLE, req, nullptr); - if (ret == CT_SUCCESS) { - result = req->result; - *is_slave = req->is_slave; - *cantian_cluster_ready = req->cluster_ready; - } - free_share_mem(shm_inst, req); - - return result; -} int32_t tse_get_cluster_role() { if (cluster_role != (int32_t)dis_cluster_role::DEFAULT) { @@ -4270,8 +4283,34 @@ int tse_set_cluster_role_by_cantian(bool is_slave) { // todo: add mutex for cluster_role if (is_slave) { cluster_role = (int32_t)dis_cluster_role::STANDBY; + if(is_starting() || is_initialize()) { + tse_log_system("[Disaster Recovecy] starting or initializing"); + super_read_only = true; + read_only = true; + opt_readonly = true; + tse_log_system("[Disaster Recovery] set super_read_only = true."); + } else { + tse_ddl_broadcast_request local_req {{0}, {0}, {0}, {0}, 0, 0, 0, 0, {0}}; + memcpy(local_req.user_name, "super_read_only", strlen("super_read_only")); + memcpy(local_req.user_ip, "on", strlen("on")); + ctc_set_sys_var(&local_req); + tse_log_system("[Disaster Recovery] ctc_set_sys_var: local_req->user_ip: %s", local_req.user_ip); + } } else { cluster_role = (int32_t)dis_cluster_role::PRIMARY; + if(is_starting() || is_initialize()) { + tse_log_system("[Disaster Recovecy] starting or initializing"); + super_read_only = false; + read_only = false; + opt_readonly = false; + tse_log_system("[Disaster Recovery] set super_read_only = false."); + } else { + tse_ddl_broadcast_request local_req {{0}, {0}, {0}, {0}, 0, 0, 0, 0, {0}}; + memcpy(local_req.user_name, "super_read_only", strlen("super_read_only")); + memcpy(local_req.user_ip, "off", strlen("off")); + ctc_set_sys_var(&local_req); + tse_log_system("[Disaster Recovery] ctc_set_sys_var: local_req->user_ip: %s", local_req.user_ip); + } } return 0; } @@ -4552,6 +4591,7 @@ static typename std::enable_if::type set_hton_ tse_hton->op_after_load_meta = tse_op_after_load_meta; tse_hton->drop_database = tse_drop_database_with_err; tse_hton->binlog_log_query = tse_binlog_log_query_with_err; + tse_hton->get_cluster_role = tse_get_cluster_role; } template @@ -5161,30 +5201,17 @@ int ha_tse::initialize_cbo_stats() if (!m_share || m_share->cbo_stats != nullptr) { return CT_SUCCESS; } - m_share->cbo_stats = (tianchi_cbo_stats_t*)tse_alloc_buf(&m_tch, sizeof(tianchi_cbo_stats_t)); + m_share->cbo_stats = (tianchi_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tianchi_cbo_stats_t), MYF(MY_WME)); if (m_share->cbo_stats == nullptr) { tse_log_error("alloc shm mem failed, m_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); return ERR_ALLOC_MEMORY; } - *m_share->cbo_stats = {0, 0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, 0, 0, 0, 0,{}}; - m_share->cbo_stats->tse_cbo_stats_table.num_distincts = - (uint32_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(uint32_t)); + *m_share->cbo_stats = {0, 0, 0, 0, 0, 0, nullptr, nullptr}; - m_share->cbo_stats->tse_cbo_stats_table.low_values = - (cache_variant_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(cache_variant_t)); + m_share->cbo_stats->tse_cbo_stats_table.columns = + (tse_cbo_stats_column_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(tse_cbo_stats_column_t), MYF(MY_WME)); - m_share->cbo_stats->tse_cbo_stats_table.high_values = - (cache_variant_t *)tse_alloc_buf(&m_tch, table->s->fields * sizeof(cache_variant_t)); - - if (m_share->cbo_stats->tse_cbo_stats_table.num_distincts == nullptr - || m_share->cbo_stats->tse_cbo_stats_table.low_values == nullptr - || m_share->cbo_stats->tse_cbo_stats_table.high_values == nullptr) { - tse_log_error("alloc shm mem error, size(%lu)", - table->s->fields * sizeof(uint32_t) + 2 * table->s->fields * sizeof(cache_variant_t)); - free_cbo_stats(); - return ERR_ALLOC_MEMORY; - } - + m_share->cbo_stats->msg_len = table->s->fields * sizeof(tse_cbo_stats_column_t); return CT_SUCCESS; } @@ -5193,7 +5220,7 @@ int ha_tse::get_cbo_stats_4share() THD *thd = ha_thd(); int ret = CT_SUCCESS; time_t now = time(nullptr); - if (m_share && (m_share->need_fetch_cbo || now - m_share->get_cbo_time > 60)) { + if (m_share && (m_share->need_fetch_cbo || now - m_share->get_cbo_time > ctc_update_analyze_time)) { if (m_tch.ctx_addr == INVALID_VALUE64) { char user_name[SMALL_RECORD_SIZE]; tse_split_normalized_name(table->s->normalized_path.str, user_name, SMALL_RECORD_SIZE, nullptr, 0, nullptr); @@ -5223,22 +5250,9 @@ void ha_tse::free_cbo_stats() return; } - if (m_share->cbo_stats->tse_cbo_stats_table.num_distincts != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->tse_cbo_stats_table.num_distincts); - m_share->cbo_stats->tse_cbo_stats_table.num_distincts = nullptr; - } - - if (m_share->cbo_stats->tse_cbo_stats_table.low_values != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->tse_cbo_stats_table.low_values); - m_share->cbo_stats->tse_cbo_stats_table.low_values = nullptr; - } - - if (m_share->cbo_stats->tse_cbo_stats_table.high_values != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats->tse_cbo_stats_table.high_values); - m_share->cbo_stats->tse_cbo_stats_table.high_values = nullptr; - } - - tse_free_buf(&m_tch, (uchar *) m_share->cbo_stats); + my_free((m_share->cbo_stats->tse_cbo_stats_table.columns)); + m_share->cbo_stats->tse_cbo_stats_table.columns = nullptr; + my_free((uchar *)(m_share->cbo_stats)); m_share->cbo_stats = nullptr; } diff --git a/storage/tianchi/ha_tse.h b/storage/tianchi/ha_tse.h index 72074e585476de25d7dd01cb3aa34671bbf3064c..f7597274256ffb5f80f0a9ccc7e02602d35c5224 100644 --- a/storage/tianchi/ha_tse.h +++ b/storage/tianchi/ha_tse.h @@ -132,6 +132,8 @@ again. */ #define TSE_INTERNAL_TMP_TABLE 2 #define TSE_TABLE_CONTAINS_VIRCOL 4 +#define CTC_ANALYZE_TIME_SEC 60 + /* cond pushdown */ #define INVALID_MAX_COLUMN (uint16_t)0xFFFF @@ -143,6 +145,8 @@ again. */ } #define IS_METADATA_NORMALIZATION() (tse_get_metadata_switch() == (int32_t)metadata_switchs::MATCH_META) +#define IS_PRIMARY_ROLE() (tse_get_cluster_role() == (int32_t)dis_cluster_role::PRIMARY) +#define IS_STANDBY_ROLE() (tse_get_cluster_role() == (int32_t)dis_cluster_role::STANDBY) static const uint ROW_ID_LENGTH = sizeof(uint64_t); static const uint TSE_START_TIMEOUT = 120; // seconds @@ -679,7 +683,7 @@ public: int records(ha_rows *num_rows) override; int records_from_index(ha_rows *num_rows, uint inx) override; - void set_tse_range_key(tse_range_key *tse_range_key, key_range *mysql_range_key, tse_cmp_type_t default_type); + void set_tse_range_key(tse_key *tse_key, key_range *mysql_range_key, bool is_min_key); /** @brief diff --git a/storage/tianchi/ha_tsepart.cc b/storage/tianchi/ha_tsepart.cc index 94ccef635a932806baacf43d9060ef7bd71cfc42..f687505fb62867a326b3c52bd25f640d86631cb7 100644 --- a/storage/tianchi/ha_tsepart.cc +++ b/storage/tianchi/ha_tsepart.cc @@ -57,6 +57,7 @@ #define INVALID_PART_ID (uint32)0xFFFFFFFF; extern handlerton *get_tse_hton(); +extern uint32_t ctc_update_analyze_time; constexpr uint64 INVALID_VALUE64 = 0xFFFFFFFFFFFFFFFFULL; constexpr int max_prefetch_num = MAX_PREFETCH_REC_NUM; @@ -793,32 +794,47 @@ void ha_tsepart::info_low() { table->part_info->num_parts; for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; part_id = m_part_info->get_next_used_partition(part_id)) { - stats.records += m_part_share->cbo_stats->estimate_part_rows_and_blocks[part_id]; + stats.records += m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id].estimate_rows; } } } ha_rows ha_tsepart::records_in_range(uint inx, key_range *min_key, key_range *max_key) { - tse_range_key tse_min_key; - tse_range_key tse_max_key; - set_tse_range_key(&tse_min_key, min_key, CMP_TYPE_GREAT); - set_tse_range_key(&tse_max_key, max_key, CMP_TYPE_LESS); - uint32_t used_parts; - uint32_t *part_ids = NULL; - get_used_partitions(m_part_info, &part_ids, &used_parts); + double density; + if (m_part_share && !m_part_share->cbo_stats->is_updated) { + tse_log_debug("table %s has not been analyzed", table->alias); + return 1; + } + + + tse_key tse_min_key; + tse_key tse_max_key; + set_tse_range_key(&tse_min_key, min_key, true); + set_tse_range_key(&tse_max_key, max_key, false); + tse_range_key key = {&tse_min_key, &tse_max_key}; + if (tse_max_key.len < tse_min_key.len) { + tse_max_key.cmp_type = CMP_TYPE_NULL; + } else if (tse_max_key.len > tse_min_key.len) { + tse_min_key.cmp_type = CMP_TYPE_NULL; + } + uint64_t n_rows_num = 0; - uint32_t part_num = table->part_info->num_parts; - uint32_t subpart_num = table->part_info->num_subparts; + uint part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; - for (uint i = 0; i < used_parts; i++) { - uint32_t part_id = m_is_sub_partitioned ? part_ids[i] / m_part_info->num_subparts : - part_ids[i]; - uint32_t subpart_id = m_is_sub_partitioned ? part_ids[i] % m_part_info->num_subparts : - INVALID_PART_ID; - part_info_t part_info = {part_id, subpart_id, part_num, subpart_num}; - double density = calc_density_one_table(inx, &tse_min_key, &tse_max_key, part_info, m_part_share->cbo_stats, *table); - n_rows_num += m_part_share->cbo_stats->estimate_part_rows_and_blocks[part_id] * density; +for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; + part_id = m_part_info->get_next_used_partition(part_id)) { + + set_tse_range_key(&tse_min_key, min_key, true); + set_tse_range_key(&tse_max_key, max_key, false); + if (tse_max_key.len < tse_min_key.len) { + tse_max_key.cmp_type = CMP_TYPE_NULL; + } else if (tse_max_key.len > tse_min_key.len) { + tse_min_key.cmp_type = CMP_TYPE_NULL; + } + density = calc_density_one_table(inx, &key, m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id], *table); + n_rows_num += m_part_share->cbo_stats->tse_cbo_stats_part_table[part_id].estimate_rows * density; } /* @@ -828,7 +844,6 @@ ha_rows ha_tsepart::records_in_range(uint inx, key_range *min_key, key_range *ma if (n_rows_num == 0) { n_rows_num = 1; } - my_free(part_ids); return (ha_rows)n_rows_num; } @@ -945,72 +960,33 @@ int ha_tsepart::initialize_cbo_stats() { } uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : table->part_info->num_parts; - uint32_t part_field = table->s->fields; - - /* estimate_part_rows_and_blocks数组包括part_rows和part_blocks,数组长度为2 * part_num */ - uint32_t rows_and_blocks_num = 2 * part_num; - - if (part_num * part_field * sizeof(cache_variant_t) > MAX_MESSAGE_SIZE) { - /* 申请共享内存超限时申请一片连续空间,包括tianchi_cbo_stats_t结构体、part_rows及part_blocks数组 - * 以及num_distincts、low_values、high_values三块数据区域 - */ - uint32_t data_size = sizeof(tianchi_cbo_stats_t) + rows_and_blocks_num * sizeof(uint32_t) + - part_num * part_field * (sizeof(uint32_t) + sizeof(cache_variant_t) + sizeof(cache_variant_t)); - m_part_share->cbo_stats = (tianchi_cbo_stats_t *)malloc(data_size); - if (m_part_share->cbo_stats == nullptr) { - tse_log_error("alloc mem failed, m_part_share->cbo_stats size(%lu)", sizeof(data_size)); - return ERR_ALLOC_MEMORY; - } - *m_part_share->cbo_stats = {0, 0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, 0, 0, 0, 0, {}}; - - uint8_t *offset = (uint8_t *)m_part_share->cbo_stats + sizeof(tianchi_cbo_stats_t) + rows_and_blocks_num * sizeof(uint32_t); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts = (uint32_t *)offset; - offset += part_num * part_field * sizeof(uint32_t); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values = (cache_variant_t *)offset; - offset += part_num * part_field * sizeof(cache_variant_t); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values = (cache_variant_t *)offset; - } else { - m_part_share->cbo_stats = (tianchi_cbo_stats_t*)tse_alloc_buf(&m_tch, sizeof(tianchi_cbo_stats_t) + - rows_and_blocks_num * sizeof(uint32_t)); + + m_part_share->cbo_stats = (tianchi_cbo_stats_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(tianchi_cbo_stats_t), MYF(MY_WME)); if (m_part_share->cbo_stats == nullptr) { tse_log_error("alloc shm mem failed, m_part_share->cbo_stats size(%lu)", sizeof(tianchi_cbo_stats_t)); return ERR_ALLOC_MEMORY; } - *m_part_share->cbo_stats = {0, 0, 0, 0, nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, 0, 0, 0, 0, {}}; - - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts = - (uint32_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(uint32_t)); - - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values = - (cache_variant_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(cache_variant_t)); - - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values = - (cache_variant_t *)tse_alloc_buf(&m_tch, part_num * part_field * sizeof(cache_variant_t)); - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts == nullptr || - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values == nullptr || - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values == nullptr) { - tse_log_error("alloc mem failed, size(%lu)", part_num * part_field * sizeof(uint32_t) - + 2 * part_num * part_field * sizeof(cache_variant_t)); - free_cbo_stats(); - return ERR_ALLOC_MEMORY; - } - } - m_part_share->cbo_stats->part_table_info.rows_and_blocks_size = rows_and_blocks_num * sizeof(uint32_t); - m_part_share->cbo_stats->part_table_info.num_distinct_size = part_num * part_field * sizeof(uint32_t); - m_part_share->cbo_stats->part_table_info.low_value_size = part_num * part_field * sizeof(cache_variant_t); - m_part_share->cbo_stats->part_table_info.high_value_size = part_num * part_field * sizeof(cache_variant_t); + *m_part_share->cbo_stats = {0, 0, 0, 0, 0, 0, nullptr, nullptr}; + + m_part_share->cbo_stats->part_cnt = part_num; + m_part_share->cbo_stats->tse_cbo_stats_part_table = + (tse_cbo_stats_table_t*)my_malloc(PSI_NOT_INSTRUMENTED, part_num * sizeof(tse_cbo_stats_table_t), MYF(MY_WME)); + + for (uint i = 0; i < part_num; i++) { + m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns = + (tse_cbo_stats_column_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(tse_cbo_stats_column_t), MYF(MY_WME)); + } + m_part_share->cbo_stats->msg_len = table->s->fields * sizeof(tse_cbo_stats_column_t); return CT_SUCCESS; } - int ha_tsepart::get_cbo_stats_4share() { THD *thd = ha_thd(); int ret = CT_SUCCESS; time_t now = time(nullptr); - if (m_part_share->need_fetch_cbo || now - m_part_share->get_cbo_time > 60) { + if (m_part_share->need_fetch_cbo || now - m_part_share->get_cbo_time > ctc_update_analyze_time) { if (m_tch.ctx_addr == INVALID_VALUE64) { char user_name[SMALL_RECORD_SIZE]; tse_split_normalized_name(table->s->normalized_path.str, user_name, SMALL_RECORD_SIZE, nullptr, 0, nullptr); @@ -1023,6 +999,22 @@ int ha_tsepart::get_cbo_stats_4share() } } update_member_tch(m_tch, get_tse_hton(), thd); + + uint32_t part_per_cnt = MAX_MESSAGE_SIZE / (table->s->fields * sizeof(tse_cbo_stats_column_t) + CBO_PART_MEM_RESIDUAL); + uint32_t part_cnt = m_part_share->cbo_stats->part_cnt; + uint32_t fetch_times = part_cnt / part_per_cnt; + m_part_share->cbo_stats->first_partid = 0; + m_part_share->cbo_stats->num_part_fetch = part_per_cnt; + + for (uint32_t i = 0; icbo_stats); + if (ret != CT_SUCCESS) { + return ret; + } + m_part_share->cbo_stats->first_partid += part_per_cnt; + } + + m_part_share->cbo_stats->num_part_fetch = part_cnt - m_part_share->cbo_stats->first_partid; ret = tse_get_cbo_stats(&m_tch, m_part_share->cbo_stats); update_sess_ctx_by_tch(m_tch, get_tse_hton(), thd); if (ret == CT_SUCCESS && m_part_share->cbo_stats->is_updated) { @@ -1039,41 +1031,17 @@ void ha_tsepart::free_cbo_stats() { return; } - if (m_part_share->cbo_stats->part_table_info.low_value_size <= MAX_MESSAGE_SIZE) { - // 释放m_part_share->cbo_stats指向的共享内存 - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts = nullptr; - } - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values = nullptr; - } - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values != nullptr) { - tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values); - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values = nullptr; - } - - tse_free_buf(&m_tch, (uchar *) m_part_share->cbo_stats); - } else { - // 释放m_part_share->cbo_stats指向的普通内存 - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts != nullptr) { - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_num_distincts = nullptr; - } - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values != nullptr) { - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_low_values = nullptr; - } - - if (m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values != nullptr) { - m_part_share->cbo_stats->tse_cbo_stats_table.part_table_high_values = nullptr; - } - - free(m_part_share->cbo_stats); + uint32_t part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; + for (uint i = 0; i < part_num; i++) { + my_free(m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns); + m_part_share->cbo_stats->tse_cbo_stats_part_table[i].columns = nullptr; } + my_free(m_part_share->cbo_stats->tse_cbo_stats_part_table); + m_part_share->cbo_stats->tse_cbo_stats_part_table = nullptr; + my_free(m_part_share->cbo_stats); m_part_share->cbo_stats = nullptr; + } int ha_tsepart::check(THD *, HA_CHECK_OPT *) diff --git a/storage/tianchi/srv_mq_msg.h b/storage/tianchi/srv_mq_msg.h index 657d6527ad41444f62e6c9e1ff5607d9a3907631..95d5e0e92769597140ddf226d103c32373001d58 100644 --- a/storage/tianchi/srv_mq_msg.h +++ b/storage/tianchi/srv_mq_msg.h @@ -32,7 +32,6 @@ extern "C" { #define TSE_MQ_MESSAGE_SLICE_LEN 102400 #define MAX_LOB_LOCATOR_SIZE 4000 // 存储引擎存储blob对象结构体最大长度 -#define MAX_MESSAGE_SIZE 491520 // 共享内存最大可申请空间大小 #define REG_MISMATCH_CTC_VERSION 501 #define REG_ALLOC_INST_ID_FAILED 502 @@ -90,7 +89,6 @@ struct update_row_request { uint16_t col_num; int result; dml_flag_t flag; - bool is_mysqld_starting; }; struct delete_row_request { @@ -245,7 +243,6 @@ struct index_read_request { tse_select_mode_t mode; tse_conds *cond; bool is_replace; - bool is_mysqld_starting; }; struct index_end_request { diff --git a/storage/tianchi/tse_cbo.cc b/storage/tianchi/tse_cbo.cc index 95bf3fe48bf81a9edf89d8c2b9c11426fdc3bf1c..b472748a1871e1ca1a30d162c44b61bd106c0b84 100644 --- a/storage/tianchi/tse_cbo.cc +++ b/storage/tianchi/tse_cbo.cc @@ -20,31 +20,43 @@ #include "sql/field.h" #include "tse_srv_mq_module.h" -void r_key2variant(tse_range_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t *ret_val) { - enum_field_types field_type = cur_index_part->field->real_type(); +void r_key2variant(tse_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t *ret_val, cache_variant_t * value, uint32_t key_offset) +{ + ret_val->is_null = 0; + if (rKey->cmp_type == CMP_TYPE_NULL) { + *ret_val = *value; + rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + return; + } - uint16_t offset = 0; + enum_field_types field_type = cur_index_part->field->real_type(); + ret_val->type = field_type; + uint32_t offset = 0; if (cur_index_part->field->is_nullable()) { + /* The first byte in the field tells if this is an SQL NULL value */ + if(*(rKey->key + key_offset) == 1) { + *ret_val = *value; + rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + return; + } offset = 1; } - - ret_val->is_null = 0; - ret_val->type = field_type; + const uchar *key = rKey->key + key_offset + offset; switch(field_type) { case MYSQL_TYPE_TINY: case MYSQL_TYPE_SHORT: case MYSQL_TYPE_LONG: - ret_val->v_int = *(int32_t *)const_cast(rKey->key + offset); - break; + ret_val->v_int = *(int32_t *)const_cast(key); + break; case MYSQL_TYPE_FLOAT: - ret_val->v_real = *(float *)const_cast(rKey->key + offset); + ret_val->v_real = *(float *)const_cast(key); break; case MYSQL_TYPE_DOUBLE: - ret_val->v_real = *(double *)const_cast(rKey->key + offset); + ret_val->v_real = *(double *)const_cast(key); break; case MYSQL_TYPE_LONGLONG: - ret_val->v_bigint = *(int64_t *)const_cast(rKey->key + offset); + ret_val->v_bigint = *(int64_t *)const_cast(key); break; default: ret_val->is_null = 1; @@ -52,180 +64,342 @@ void r_key2variant(tse_range_key *rKey, KEY_PART_INFO *cur_index_part, cache_var } } -double eval_density_result(cache_variant_t *result) +en_tse_compare_type compare(cache_variant_t *right, cache_variant_t *left, enum_field_types field_type) +{ + double compare_value = 0; + switch(field_type) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_LONG: + compare_value = right->v_int - left->v_int; + break; + case MYSQL_TYPE_FLOAT: + case MYSQL_TYPE_DOUBLE: + compare_value = (right->v_real - left->v_real); + break; + case MYSQL_TYPE_LONGLONG: + compare_value = (right->v_bigint - left->v_bigint); + break; + default: + return UNCOMPARABLE; + } + if (abs(compare_value) < REAL_EPSINON) { + return EQUAL; + } else if (compare_value > 0) { + return GREAT; + } + return LESS; +} + +double eval_density_result(double density) { /* * key range is beyond the actual index range, * don't have any records in this range */ - if (result->v_real < 0) { - return 0; + if (density < 0) { + return 0; } /* * key range is larger than the actual index range, * any key with this range shoule be deemed as not selective */ - if (result->v_real > 1) { - return 1; + if (density > 1) { + return 1; } - return result->v_real; + return density; } -int calc_density_low(KEY_PART_INFO *cur_index_part, cache_variant_t *high_val, cache_variant_t *low_val, - cache_variant_t *left_val, cache_variant_t *right_val, cache_variant_t *result) +static double calc_frequency_hist_equal_density(tse_cbo_stats_column_t *col_stat, cache_variant_t *val, + enum_field_types field_type) { - double density = DEFAULT_RANGE_DENSITY; - if (low_val->is_null == 1 || right_val->is_null == 1) { - return ERR_SQL_SYNTAX_ERROR; - } - - enum_field_types field_type = cur_index_part->field->real_type(); + en_tse_compare_type cmp_result; + int64 result = 0; + double density = col_stat->density; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + for (uint32 i = 0; i < col_stat->hist_count; i++) { + cmp_result = compare(&hist_infos[i].ep_value, val, field_type); - double numerator, denominator; - switch(field_type) { - case MYSQL_TYPE_TINY: - case MYSQL_TYPE_SHORT: - case MYSQL_TYPE_LONG: - numerator = (int64_t)left_val->v_int - (int64_t)right_val->v_int; - denominator = (int64_t)high_val->v_int - (int64_t)low_val->v_int; - density = (double)numerator/(double)denominator; + if (cmp_result == EQUAL) { + result = (i == 0) ? hist_infos[i].ep_number : hist_infos[i].ep_number - hist_infos[i - 1].ep_number; break; - case MYSQL_TYPE_FLOAT: - case MYSQL_TYPE_DOUBLE: - numerator = left_val->v_real - right_val->v_real; - denominator = high_val->v_real - low_val->v_real; - density = (double)numerator/(double)denominator; - break; - case MYSQL_TYPE_LONGLONG: - numerator = (int64_t)left_val->v_bigint - (int64_t)right_val->v_bigint; - denominator = (int64_t)high_val->v_bigint - (int64_t)low_val->v_bigint; - density = (double)numerator/(double)denominator; - break; - default: + } else if (cmp_result == GREAT) { break; + } } - result->v_real = density; + uint32 end_pos = col_stat->hist_count - 1; + if (result > 0 && hist_infos[end_pos].ep_number > 0) { + density = (double)result / hist_infos[end_pos].ep_number; + } else if (result == 0) { + density = 0; + //calc_density_by_sample_rate(stmt, entity, &density); + } + return density; +} + +static double calc_balance_hist_equal_density(tse_cbo_stats_column_t *col_stat, cache_variant_t *val, + enum_field_types field_type) +{ + uint32 popular_count = 0; + en_tse_compare_type cmp_result; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + for (uint32 i = 0; i < col_stat->hist_count; i++) { + cmp_result = compare(&hist_infos[i].ep_value, val, field_type); - return CT_SUCCESS; + if (cmp_result == EQUAL) { + // ep_number is different from oracle, when compress balance histogram, need to change this + popular_count++; + } else if (cmp_result == GREAT) { + break; + } + } + if (popular_count > 1) { + return (double)popular_count / col_stat->num_buckets; + } + return col_stat->density; } -double calc_density_by_cond(tianchi_cbo_stats_t *cbo_stats, KEY *cur_index, - tse_range_key *tse_min_key, tse_range_key *tse_max_key, part_info_t part_info) { - double default_density = DEFAULT_RANGE_DENSITY; - KEY_PART_INFO cur_index_part = cur_index->key_part[0]; - uint32 col_id = cur_index_part.field->field_index(); - uint32_t part_id = part_info.part_id; - uint32_t subpart_id = part_info.subpart_id; - uint32_t total_part_count = IS_TSE_PART(subpart_id) ? part_info.part_num * part_info.subpart_num : part_info.part_num; +static double calc_equal_null_density(tse_cbo_stats_table_t cbo_stats, uint32 col_id, bool is_null) +{ + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + double density = (double)col_stat->num_null / cbo_stats.estimate_rows; + return is_null ? density : (double)1 - density; +} - if (!IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.num_distincts[col_id] == 0) { - return DEFAULT_RANGE_DENSITY; +double calc_hist_equal_density(tse_cbo_stats_table_t cbo_stats, cache_variant_t *val, + uint32 col_id, enum_field_types field_type) +{ + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + double density = col_stat->density; + uint32 hist_count = col_stat->hist_count; + if (hist_count == 0) { + return density; + } + if (col_stat->hist_type == FREQUENCY_HIST) { + // HISTOGRAM_FREQUENCY + density = calc_frequency_hist_equal_density(col_stat, val, field_type); + } else { + // HISTOGRAM_BALANCE + density = calc_balance_hist_equal_density(col_stat, val, field_type); } + return density; +} - uint32 index_no = (IS_TSE_PART(part_id) & IS_TSE_PART(subpart_id)) ? - (total_part_count * col_id) + (part_id * part_info.subpart_num + subpart_id) : - total_part_count * col_id + part_id; +static double calc_hist_between_frequency(tse_cbo_stats_table_t cbo_stats, field_stats_val stats_val, enum_field_types field_type, uint32 col_id) +{ + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + double density = col_stat->density; + uint32 hist_count = col_stat->hist_count; + if (hist_count == 0) { + return density; + } - if (IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no] == 0) { - return DEFAULT_RANGE_DENSITY; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + uint32 end_pos = hist_count - 1; + int64 total_nums = hist_infos[end_pos].ep_number; + int64 low_nums = 0; + int64 high_nums = total_nums; + en_tse_compare_type cmp_result; + + // HISTOGRAM_FREQUNCEY + for (uint32 i = 0; i < hist_count; i++) { + + cmp_result = compare(&hist_infos[i].ep_value, stats_val.min_key_val, field_type); + if ((stats_val.min_type == CMP_TYPE_CLOSE_INTERNAL && (cmp_result == GREAT || cmp_result == EQUAL)) + || (stats_val.min_type == CMP_TYPE_OPEN_INTERNAL && cmp_result == GREAT)) { + if (i > 0) { + low_nums = hist_infos[i - 1].ep_number; + } + low_nums = total_nums - low_nums; + break; + } } - cache_variant_t *low_val, *high_val; - cache_variant_t result; + for (uint32 i = 0; i < hist_count; i++) { - if (IS_TSE_PART(part_id)) { - low_val = &(cbo_stats->tse_cbo_stats_table.part_table_low_values[index_no]); - high_val = &(cbo_stats->tse_cbo_stats_table.part_table_high_values[index_no]); - } else { - low_val = cbo_stats->tse_cbo_stats_table.low_values + col_id; - high_val = cbo_stats->tse_cbo_stats_table.high_values + col_id; + cmp_result = compare(&hist_infos[i].ep_value, stats_val.max_key_val, field_type); + + if ((stats_val.max_type == CMP_TYPE_OPEN_INTERNAL && (cmp_result == GREAT || cmp_result == EQUAL)) + || (stats_val.max_type == CMP_TYPE_CLOSE_INTERNAL && cmp_result == GREAT)) { + high_nums = (i == 0) ? 0 : hist_infos[i - 1].ep_number; + break; + } } - if (tse_min_key->cmp_type == CMP_TYPE_GREAT && tse_max_key->cmp_type == CMP_TYPE_LESS) { - cache_variant_t min_key_val, max_key_val; - r_key2variant(tse_min_key, &cur_index_part, &min_key_val); - r_key2variant(tse_max_key, &cur_index_part, &max_key_val); + if (total_nums > 0) { + return ((double)(low_nums + high_nums - total_nums) / total_nums) ; + } else { + return density; + } + +} - if (calc_density_low(&cur_index_part, high_val, low_val, &max_key_val, &min_key_val, &result) != CT_SUCCESS) { - return default_density; - } - return eval_density_result(&result); +static int calc_hist_range_boundary(field_stats_val stats_val, enum_field_types field_type, tse_cbo_stats_column_t *col_stat, +double *percent) +{ + en_tse_compare_type cmp_result; + uint32 i, lo_pos, hi_pos; + uint32 hist_count = col_stat->hist_count; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + + + lo_pos = hi_pos = hist_count - 1; + + for (i = 0; i < hist_count; i++) { + cmp_result = compare(&hist_infos[i].ep_value, stats_val.min_key_val, field_type); + + if (cmp_result == GREAT) { + lo_pos = i; + break; + } } - if (tse_min_key->cmp_type == CMP_TYPE_GREAT) { - cache_variant_t min_key_val; - r_key2variant(tse_min_key, &cur_index_part, &min_key_val); + if (stats_val.min_type == CMP_TYPE_CLOSE_INTERNAL) { + *percent += calc_balance_hist_equal_density(col_stat, stats_val.min_key_val, field_type); + } - if (calc_density_low(&cur_index_part, high_val, low_val, high_val, &min_key_val, &result) != CT_SUCCESS) { - return default_density; - } - return eval_density_result(&result); + for (i = lo_pos; i < hist_count; i++) { + cmp_result = compare(&hist_infos[i].ep_value, stats_val.max_key_val, field_type); + + if (cmp_result == GREAT || cmp_result == EQUAL) { + hi_pos = i; + break; + } + } + + if (stats_val.max_type == CMP_TYPE_CLOSE_INTERNAL) { + *percent += calc_balance_hist_equal_density(col_stat, stats_val.max_key_val, field_type); } - if (tse_max_key->cmp_type == CMP_TYPE_LESS) { - cache_variant_t max_key_val; - r_key2variant(tse_max_key, &cur_index_part, &max_key_val); + return hi_pos - lo_pos; +} - if (calc_density_low(&cur_index_part, high_val, low_val, &max_key_val, low_val, &result) != CT_SUCCESS) { - return default_density; - } - return eval_density_result(&result); +static double calc_hist_between_balance(tse_cbo_stats_table_t cbo_stats, field_stats_val stats_val, enum_field_types field_type, uint32 col_id) +{ + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + double density = col_stat->density; + uint32 hist_count = col_stat->hist_count; + if (hist_count == 0) { + return density; } + double percent = 0; - return default_density; + int bucket_range = calc_hist_range_boundary(stats_val, field_type, col_stat, &percent); + + density = (double)bucket_range / col_stat->num_buckets + percent; + return density; } -double calc_density_one_table(uint16_t idx_id, tse_range_key *min_key, tse_range_key *max_key, - part_info_t part_info, tianchi_cbo_stats_t *cbo_stats, const TABLE &table) +static double calc_hist_between_density(tse_cbo_stats_table_t cbo_stats, + uint32 col_id, enum_field_types field_type, field_stats_val stats_val) { - double density = DEFAULT_RANGE_DENSITY; - if (!cbo_stats->is_updated) { - tse_log_debug("table %s has not been analyzed", table.alias); - return density; + double density; + tse_cbo_stats_column_t *col_stat = &cbo_stats.columns[col_id]; + if (col_stat->hist_type == FREQUENCY_HIST) { + // HISTOGRAM_FREQUENCY + density = calc_hist_between_frequency(cbo_stats, stats_val, field_type, col_id); + } else { + // HISTOGRAM_BALANCE + density = calc_hist_between_balance(cbo_stats, stats_val, field_type, col_id); + } + return density; +} + +double calc_density_by_cond(tse_cbo_stats_table_t cbo_stats, KEY_PART_INFO cur_index_part, tse_range_key *key, + uint32_t key_offset) +{ + double density = DEFAULT_RANGE_DENSITY; + uint32 col_id = cur_index_part.field->field_index(); + tse_key *min_key = key->min_key; + tse_key *max_key = key->max_key; + + if (cur_index_part.field->is_nullable()) { + if (*(min_key->key + key_offset) == 1 && max_key->cmp_type == CMP_TYPE_NULL) { + return calc_equal_null_density(cbo_stats, col_id, false); } - uint32_t part_id = part_info.part_id; - uint32_t subpart_id = part_info.subpart_id; - uint32_t total_part_count = IS_TSE_PART(subpart_id) ? part_info.part_num * part_info.subpart_num : part_info.part_num; - - uint32 col_id; - if (min_key->cmp_type == CMP_TYPE_EQUAL) { - double col_product = 1.0; - uint64_t col_map = min_key->col_map; - KEY cur_index = table.key_info[idx_id]; - /* - * For all columns in used index, - * density = 1.0 / (column[0]->num_distinct * ... * column[n]->num_distinct) - */ - for (uint32_t idx_col_num = 0; idx_col_num < cur_index.actual_key_parts; idx_col_num++) { - if (col_map & ((uint64_t)1 << idx_col_num)) { - KEY_PART_INFO cur_index_part = cur_index.key_part[idx_col_num]; - col_id = cur_index_part.field->field_index(); - if (!IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.num_distincts[col_id] != 0) { - col_product = col_product * cbo_stats->tse_cbo_stats_table.num_distincts[col_id]; - } - - uint32 index_no = (IS_TSE_PART(part_id) & IS_TSE_PART(subpart_id)) ? - (total_part_count * col_id) + (part_id * part_info.subpart_num + subpart_id) : - total_part_count * col_id + part_id; - - if (IS_TSE_PART(part_id) && cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no] != 0) { - col_product = col_product * cbo_stats->tse_cbo_stats_table.part_table_num_distincts[index_no]; - } - } - } - density = 1.0 / col_product; - } else { - KEY cur_index = table.key_info[idx_id]; - density = calc_density_by_cond(cbo_stats, &cur_index, min_key, max_key, part_info); + + if (*(min_key->key + key_offset) == 1 && *(max_key->key + key_offset) == 1) { + return calc_equal_null_density(cbo_stats, col_id, true); } - /* - * This is a safe-guard logic since we don't handle tse call error in this method, - * we need this to make sure that our optimizer continue to work even when we - * miscalculated the density, and it's still prefer index read - */ - if (density < 0.0 || density > 1.0) { - density = PREFER_RANGE_DENSITY; + } + + cache_variant_t *low_val; + cache_variant_t *high_val; + + low_val = &cbo_stats.columns[col_id].low_value; + high_val = &cbo_stats.columns[col_id].high_value; + + cache_variant_t min_key_val; + cache_variant_t max_key_val; + r_key2variant(min_key, &cur_index_part, &min_key_val, low_val, key_offset); + r_key2variant(max_key, &cur_index_part, &max_key_val, high_val, key_offset); + enum_field_types field_type = cur_index_part.field->real_type(); + if (compare(&max_key_val, low_val, field_type) == LESS || compare(&min_key_val, high_val, field_type) == GREAT) { + return 0; + } + en_tse_compare_type comapare_value = compare(&max_key_val, &min_key_val, field_type); + if (comapare_value == EQUAL && min_key->cmp_type == CMP_TYPE_CLOSE_INTERNAL && + max_key->cmp_type == CMP_TYPE_CLOSE_INTERNAL) { + return calc_hist_equal_density(cbo_stats, &max_key_val, col_id, field_type); + } else if (comapare_value == UNCOMPARABLE) { + return DEFAULT_RANGE_DENSITY; + } else if (comapare_value == LESS) { + return 0; + } + + field_stats_val stats_val = {min_key->cmp_type, max_key->cmp_type, &max_key_val, &min_key_val}; + density = calc_hist_between_density(cbo_stats, col_id, field_type, stats_val); + + return density; +} + +double calc_density_one_table(uint16_t idx_id, tse_range_key *key, + tse_cbo_stats_table_t cbo_stats, const TABLE &table) +{ + double density = 1.0; + uint32 col_id; + uint32_t key_offset = 0;//列在索引中的偏移量 + uint64_t col_map = max(key->min_key->col_map, key->max_key->col_map); + uint32_t key_len = max(key->min_key->len, key->max_key->len); + KEY cur_index = table.key_info[idx_id]; + + /* + * For all columns in used index, + * density = 1.0 / (column[0]->num_distinct * ... * column[n]->num_distinct) + */ + for (uint32_t idx_col_num = 0; idx_col_num < cur_index.actual_key_parts; idx_col_num++) { + double col_product = 1.0; + if (col_map & ((uint64_t)1 << idx_col_num)) { + KEY_PART_INFO cur_index_part = cur_index.key_part[idx_col_num]; + col_id = cur_index_part.field->field_index(); + uint32_t offset = cur_index_part.field->is_nullable() ? 1 : 0;//null值标记位 + + if (cbo_stats.columns[col_id].total_rows == 0) { //空表 + col_product = 0; + } else if (key_offset + offset + cur_index_part.field->key_length() == key_len) {// + col_product = calc_density_by_cond(cbo_stats, cur_index_part, key, key_offset); + } else if ((offset == 1) && *(key->min_key->key + key_offset) == 1) { //null值 + col_product = calc_equal_null_density(cbo_stats, col_id, true); + } else { + col_product = calc_density_by_cond(cbo_stats, cur_index_part, key, key_offset);//联合索引 + // col_product = calc_equal_density(part_info, QUERY_TYPE_EQUAL, cbo_stats, col_id); + // col_product = calc_hist_equal_density(cbo_stats, &max_key_val, col_id, field_type); + } + col_product = eval_density_result(col_product); + key_offset += (offset + cur_index_part.field->key_length()); + density = density * col_product; } - return density; -} \ No newline at end of file + } + + /* + * This is a safe-guard logic since we don't handle tse call error in this method, + * we need this to make sure that our optimizer continue to work even when we + * miscalculated the density, and it's still prefer index read + */ + if (density < 0.0 || density > 1.0) { + density = PREFER_RANGE_DENSITY; + } + return density; +} + diff --git a/storage/tianchi/tse_cbo.h b/storage/tianchi/tse_cbo.h index 4fee771cc21de10fdc1e0cfca1d6d679dee0d01d..ee2452c444dfd8a0a2cd8b2579bcab22ad4425b5 100644 --- a/storage/tianchi/tse_cbo.h +++ b/storage/tianchi/tse_cbo.h @@ -23,22 +23,46 @@ #include "sql/dd/types/table.h" #include "srv_mq_msg.h" +#define REAL_EPSINON 0.00001 + +typedef enum en_tse_compare_type { + GREAT = 0, + EQUAL, + LESS, + UNCOMPARABLE +} compare_type; + +typedef enum en_tse_query_type { + QUERY_TYPE_NULL = 0, + QUERY_TYPE_NOT_NULL, + QUERY_TYPE_EQUAL +} query_type; typedef enum en_tse_cmp_type { - CMP_TYPE_UNKNOWN = 0, - CMP_TYPE_EQUAL, - CMP_TYPE_GREAT, - CMP_TYPE_LESS + CMP_TYPE_NULL = 0, + CMP_TYPE_OPEN_INTERNAL, + CMP_TYPE_CLOSE_INTERNAL } tse_cmp_type_t; - -/* range key type */ + typedef struct { - const char *key; + const uchar *key; uint len; tse_cmp_type_t cmp_type; uint64_t col_map; +} tse_key; + +typedef struct { + tse_key *min_key; + tse_key *max_key; } tse_range_key; +typedef struct { + tse_cmp_type_t min_type; + tse_cmp_type_t max_type; + cache_variant_t *max_key_val; + cache_variant_t *min_key_val; +} field_stats_val; + typedef struct { uint32_t part_id; uint32_t subpart_id; @@ -46,7 +70,7 @@ typedef struct { uint32_t subpart_num; } part_info_t; -double calc_density_one_table(uint16_t idx_id, tse_range_key *min_key, tse_range_key *max_key, - part_info_t part_info, tianchi_cbo_stats_t *cbo_stats, const TABLE &table); +double calc_density_one_table(uint16_t idx_id, tse_range_key *key, + tse_cbo_stats_table_t cbo_stats, const TABLE &table); #endif diff --git a/storage/tianchi/tse_srv.h b/storage/tianchi/tse_srv.h index 4779a5e0bc0c047e579457d6dd17b0550d2302b5..eb8ec85f0ab43a2427a9314922503851a188a7c1 100644 --- a/storage/tianchi/tse_srv.h +++ b/storage/tianchi/tse_srv.h @@ -33,6 +33,8 @@ extern "C" { #define SENSI_INFO #endif +#define CBO_PART_MEM_RESIDUAL 1000 +#define STATS_HISTGRAM_MAX_SIZE 254 #define SMALL_RECORD_SIZE 128 // 表名、库名等长度不会特别大,取128 #define ERROR_MESSAGE_LEN 512 #define MAX_DDL_SQL_LEN_CONTEXT (129024) // 126kb, 预留2kb @@ -55,6 +57,7 @@ extern "C" { #define IS_TSE_PART(part_id) ((part_id) < (PART_CURSOR_NUM)) #define MAX_BULK_INSERT_PART_ROWS 128 #define SESSION_CURSOR_NUM (8192 * 2) +#define MAX_MESSAGE_SIZE 52428800 // 共享内存最大可申请空间大小 // for broadcast_req.options #define TSE_SET_VARIABLE_PERSIST (0x1 << 8) @@ -122,39 +125,50 @@ typedef struct cache_st_variant { }; } cache_variant_t; +typedef enum { + FREQUENCY_HIST = 0, + HEIGHT_BALANCED_HIST = 1, +} tse_cbo_hist_type_t; + +typedef struct { + cache_variant_t ep_value; + int ep_number; +} tse_cbo_column_hist_t; + +typedef struct { + uint32_t total_rows; + uint32_t num_buckets; + uint32_t num_distinct; + uint32_t num_null; + double density; + tse_cbo_hist_type_t hist_type; + uint32_t hist_count; + tse_cbo_column_hist_t column_hist[STATS_HISTGRAM_MAX_SIZE]; // Column histogram statistics (array) + cache_variant_t low_value; + cache_variant_t high_value; +} tse_cbo_stats_column_t; + /** * cache info that can expand this struct * if need more cbo stats cache */ typedef struct { - uint32_t max_col_id; - uint32_t *num_distincts; - cache_variant_t *low_values; - cache_variant_t *high_values; - uint32_t max_part_no; // the part no of max rows num part - uint32_t *part_table_num_distincts; - cache_variant_t *part_table_low_values; - cache_variant_t *part_table_high_values; + uint32_t estimate_rows; + tse_cbo_stats_column_t *columns; } tse_cbo_stats_table_t; -typedef struct { - uint32_t rows_and_blocks_size; - uint32_t num_distinct_size; - uint32_t low_value_size; - uint32_t high_value_size; -} tianchi_part_table_cbo_info_t; - /* * statistics information that mysql optimizer need * expand this struct if need more cbo stats */ typedef struct { - uint32_t estimate_rows; - uint32_t estimate_blocks; + uint16_t first_partid; + uint16_t num_part_fetch; + uint32_t part_cnt; + uint32_t msg_len; bool is_updated; tse_cbo_stats_table_t tse_cbo_stats_table; - tianchi_part_table_cbo_info_t part_table_info; - uint32_t estimate_part_rows_and_blocks[0]; + tse_cbo_stats_table_t *tse_cbo_stats_part_table; } tianchi_cbo_stats_t; #pragma pack() @@ -289,7 +303,6 @@ enum TSE_FUNC_TYPE { TSE_FUNC_TYPE_POSITION, TSE_FUNC_TYPE_DELETE_ALL_ROWS, TSE_FUNC_TYPE_GET_CBO_STATS, - TSE_FUNC_TYPE_GET_HUGE_PART_TABLE_CBO_STATS, TSE_FUNC_TYPE_WRITE_LOB, TSE_FUNC_TYPE_READ_LOB, TSE_FUNC_TYPE_CREATE_TABLE, @@ -517,7 +530,7 @@ int tse_write_through_row(tianchi_handler_t *tch, const record_info_t *record_in int tse_bulk_write(tianchi_handler_t *tch, const record_info_t *record_info, uint64_t rec_num, uint32_t *err_pos, dml_flag_t flag, ctc_part_t *part_ids); int tse_update_row(tianchi_handler_t *tch, uint16_t new_record_len, const uint8_t *new_record, - const uint16_t *upd_cols, uint16_t col_num, dml_flag_t flag, bool *is_mysqld_starting); + const uint16_t *upd_cols, uint16_t col_num, dml_flag_t flag); int tse_delete_row(tianchi_handler_t *tch, uint16_t record_len, dml_flag_t flag); int tse_rnd_init(tianchi_handler_t *tch, expected_cursor_action_t action, tse_select_mode_t mode, tse_conds *cond); @@ -607,6 +620,7 @@ int tse_update_mysql_dd_cache(char *sql_str); int tse_set_cluster_role_by_cantian(bool is_slave); int ctc_record_sql_for_cantian(tianchi_handler_t *tch, tse_ddl_broadcast_request *broadcast_req, bool allow_fail); +int tse_query_cluster_role(bool *is_slave, bool *cantian_cluster_ready); #ifdef __cplusplus } #endif diff --git a/storage/tianchi/tse_srv_mq_stub.cc b/storage/tianchi/tse_srv_mq_stub.cc index 310fb47c3de703605b89838608d76cca57bd086e..56a3bb44e8858c1c343a4a44349ed1f2158a4c84 100644 --- a/storage/tianchi/tse_srv_mq_stub.cc +++ b/storage/tianchi/tse_srv_mq_stub.cc @@ -35,7 +35,7 @@ } while (0) // 双进程模式在 tse_init 中已经提前获取 inst_id -int tse_alloc_inst_id(uint32_t *inst_id) { +int tse_alloc_inst_id(uint32_t *inst_id) { *inst_id = ha_tse_get_inst_id(); return tse_mq_register_func(); } @@ -189,7 +189,7 @@ int tse_bulk_write(tianchi_handler_t *tch, const record_info_t *record_info, uin } int tse_update_row(tianchi_handler_t *tch, uint16_t new_record_len, const uint8_t *new_record, - const uint16_t *upd_cols, uint16_t col_num, dml_flag_t flag, bool *is_mysqld_starting) { + const uint16_t *upd_cols, uint16_t col_num, dml_flag_t flag) { assert(new_record_len < BIG_RECORD_SIZE); assert(col_num <= TSE_MAX_COLUMNS); void *shm_inst = get_one_shm_inst(tch); @@ -203,15 +203,6 @@ int tse_update_row(tianchi_handler_t *tch, uint16_t new_record_len, const uint8_ req->col_num = col_num; req->new_record = const_cast(new_record); req->flag = flag; - req->is_mysqld_starting = *is_mysqld_starting; - /* - The MySQL would try to update tables in starting progress: - 1. mysql.charset 2. mysql.resource_groups - If the Cantian is in slave-cluster (read-only) this would obviously cause error. - So we should not allow the MySQL update tables in strating progress on a slave-cantian-cluster. - The newly added flag is to inform cantian, the mysqld is starting . - Cantian would return CT_SUCCESS when it is in slave-cluster and mysqld is starting, - */ memcpy(req->upd_cols, upd_cols, sizeof(uint16_t) * col_num); int result = ERR_CONNECTION_FAILED; int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_TYPE_UPDATE_ROW, req, tch->msg_buf); @@ -522,7 +513,6 @@ int tse_index_read(tianchi_handler_t *tch, record_info_t *record_info, index_key req->cond = cond; req->is_replace = is_replace; req->result = 0; - req->is_mysqld_starting = is_starting(); int result = ERR_CONNECTION_FAILED; int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_TYPE_INDEX_READ, req, tch->msg_buf); @@ -749,64 +739,65 @@ int tse_analyze_table(tianchi_handler_t *tch, const char *db_name, const char *t return result; } -int tse_get_huge_part_table_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats) { - void *shm_inst = get_one_shm_inst(tch); - uint32_t request_size = sizeof(get_cbo_stats_request) + sizeof(tianchi_cbo_stats_t) - + stats->part_table_info.rows_and_blocks_size; - uint32_t req_size = request_size + stats->part_table_info.high_value_size - + stats->part_table_info.num_distinct_size - + stats->part_table_info.low_value_size; - uint8_t *req_buf = new uint8_t[req_size]; - get_cbo_stats_request *req = (get_cbo_stats_request *)req_buf; - req->tch = *tch; - memcpy(req_buf + sizeof(get_cbo_stats_request), stats, - sizeof(tianchi_cbo_stats_t) + stats->part_table_info.rows_and_blocks_size); - uint8_t *stats_offset = req_buf + sizeof(get_cbo_stats_request); - req->stats = (tianchi_cbo_stats_t *)stats_offset; - - int result = ERR_CONNECTION_FAILED; - int ret = tse_mq_batch_send_message(shm_inst, TSE_FUNC_TYPE_GET_HUGE_PART_TABLE_CBO_STATS, req_buf, - request_size, req_size); - if (ret != CT_SUCCESS) { - result = ret; - tse_log_error("tse_mq_batch_send_message failed in get_huge_part_table_cbo_stats: %d", ret); - } else if (req->result == CT_SUCCESS) { - // 此时req指向的参天区域,需要将其指向mysql数据区 - req->stats = (tianchi_cbo_stats_t *)stats_offset; - req->stats->tse_cbo_stats_table.part_table_num_distincts = stats->tse_cbo_stats_table.part_table_num_distincts; - req->stats->tse_cbo_stats_table.part_table_low_values = stats->tse_cbo_stats_table.part_table_low_values; - req->stats->tse_cbo_stats_table.part_table_high_values = stats->tse_cbo_stats_table.part_table_high_values; - - *tch = req->tch; - memcpy(stats, req_buf + sizeof(get_cbo_stats_request), req_size - sizeof(get_cbo_stats_request)); - result = req->result; - } - delete[] req_buf; - return result; -} - int tse_get_cbo_stats(tianchi_handler_t *tch, tianchi_cbo_stats_t *stats) { - if (stats->part_table_info.low_value_size > MAX_MESSAGE_SIZE) { - return tse_get_huge_part_table_cbo_stats(tch, stats); - } void *shm_inst = get_one_shm_inst(tch); get_cbo_stats_request *req = (get_cbo_stats_request*)alloc_share_mem(shm_inst, sizeof(get_cbo_stats_request)); + if (req == NULL) { tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst, sizeof(get_cbo_stats_request)); return ERR_ALLOC_MEMORY; } - req->tch = *tch; - req->stats = stats; + req->stats = (tianchi_cbo_stats_t *)alloc_share_mem(shm_inst, sizeof(tianchi_cbo_stats_t)); + if (req->stats == NULL) { + tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst, sizeof(get_cbo_stats_request)); + return ERR_ALLOC_MEMORY; + } + + bool is_part_table = stats->tse_cbo_stats_part_table != nullptr ? true : false; + req->stats->msg_len = stats->msg_len; + if (!is_part_table) { + req->stats->tse_cbo_stats_table.columns = (tse_cbo_stats_column_t*)alloc_share_mem(shm_inst, req->stats->msg_len); + } else { + req->stats->first_partid = stats->first_partid; + req->stats->num_part_fetch = stats->num_part_fetch; + req->stats->tse_cbo_stats_part_table = + (tse_cbo_stats_table_t*)alloc_share_mem(shm_inst, stats->num_part_fetch * sizeof(tse_cbo_stats_table_t)); + for (uint i = 0; i < stats->num_part_fetch; i++) { + req->stats->tse_cbo_stats_part_table[i].columns = + (tse_cbo_stats_column_t*)alloc_share_mem(shm_inst, stats->msg_len); + } + } + + req->tch = *tch; int result = ERR_CONNECTION_FAILED; int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_TYPE_GET_CBO_STATS, req, tch->msg_buf); if (ret == CT_SUCCESS) { if (req->result == CT_SUCCESS) { - *tch = req->tch; - stats = req->stats; + if (!is_part_table) { + *tch = req->tch; + memcpy(stats->tse_cbo_stats_table.columns, req->stats->tse_cbo_stats_table.columns, stats->msg_len); + stats->is_updated = req->stats->is_updated; + stats->tse_cbo_stats_table.estimate_rows = req->stats->tse_cbo_stats_table.estimate_rows; + } else { + stats->is_updated = req->stats->is_updated; + for (uint i = 0; i < stats->num_part_fetch; i++) { + stats->tse_cbo_stats_part_table[i+stats->first_partid].estimate_rows = req->stats->tse_cbo_stats_part_table[i].estimate_rows; + memcpy(stats->tse_cbo_stats_part_table[i+stats->first_partid].columns, req->stats->tse_cbo_stats_part_table[i].columns, stats->msg_len); + } + } } result = req->result; } + if (!is_part_table) { + free_share_mem(shm_inst, req->stats->tse_cbo_stats_table.columns); + } else { + for (uint i = 0; i < stats->num_part_fetch; i++) { + free_share_mem(shm_inst, req->stats->tse_cbo_stats_part_table[i].columns); + } + free_share_mem(shm_inst, req->stats->tse_cbo_stats_part_table); + } + free_share_mem(shm_inst, req->stats); free_share_mem(shm_inst, req); return result; } @@ -1466,3 +1457,24 @@ int ctc_record_sql_for_cantian(tianchi_handler_t *tch, tse_ddl_broadcast_request free_share_mem(shm_inst, req); return result; } + +int tse_query_cluster_role(bool *is_slave, bool *cantian_cluster_ready) { + void *shm_inst = get_one_shm_inst(NULL); + query_cluster_role_request *req = (query_cluster_role_request*) alloc_share_mem(shm_inst, sizeof(query_cluster_role_request)); + DBUG_EXECUTE_IF("check_init_shm_oom", { req = NULL; }); + if (req == NULL) { + tse_log_error("alloc shm mem error, shm_inst(%p), size(%lu)", shm_inst, sizeof(query_cluster_role_request)); + return ERR_ALLOC_MEMORY; + } + + int result = ERR_CONNECTION_FAILED; + int ret = tse_mq_deal_func(shm_inst, TSE_FUNC_QUERY_CLUSTER_ROLE, req, nullptr); + if (ret == CT_SUCCESS) { + result = req->result; + *is_slave = req->is_slave; + *cantian_cluster_ready = req->cluster_ready; + } + free_share_mem(shm_inst, req); + + return result; +} diff --git a/storage/tianchi/tse_stats.cc b/storage/tianchi/tse_stats.cc index 90827113109bd7770a95c18dea39af7e9302a314..5c1560cdc78bd43aad73784e657ddf6d7b222ba7 100644 --- a/storage/tianchi/tse_stats.cc +++ b/storage/tianchi/tse_stats.cc @@ -49,7 +49,6 @@ const char *ctc_interface_strs[] = { "TSE_FUNC_TYPE_POSITION", "TSE_FUNC_TYPE_DELETE_ALL_ROWS", "TSE_FUNC_TYPE_GET_CBO_STATS", - "TSE_FUNC_TYPE_GET_HUGE_PART_TABLE_CBO_STATS", "TSE_FUNC_TYPE_WRITE_LOB", "TSE_FUNC_TYPE_READ_LOB", "TSE_FUNC_TYPE_CREATE_TABLE",