From 0a5e01d7f2bb266c2eda217558ae4b401c0108ef Mon Sep 17 00:00:00 2001 From: cnnblike Date: Mon, 10 Mar 2025 20:24:04 +0800 Subject: [PATCH 1/3] fix(DDL): improve nested functional index support --- mysql-test/mysql-source-code-meta.patch | 52 ++- .../suite/ctc/r/ctc_ddl_func_index.result | 40 +++ .../suite/ctc/t/ctc_ddl_func_index.test | 15 +- storage/ctc/ha_ctc_ddl.cc | 306 ++++++++++++++---- storage/ctc/ha_ctc_ddl.h | 18 ++ 5 files changed, 364 insertions(+), 67 deletions(-) diff --git a/mysql-test/mysql-source-code-meta.patch b/mysql-test/mysql-source-code-meta.patch index 5794762..b0c1936 100644 --- a/mysql-test/mysql-source-code-meta.patch +++ b/mysql-test/mysql-source-code-meta.patch @@ -1120,6 +1120,56 @@ index 0530ca15..efeaf90e 100644 enum_binlog_command binlog_command, const char *query, size_t query_length, const char *db, const char *table_name); +diff --git a/sql/item_json_func.cc b/sql/item_json_func.cc +index 31aade1c..8a1012e4 100644 +--- a/sql/item_json_func.cc ++++ b/sql/item_json_func.cc +@@ -3656,7 +3656,7 @@ bool Item_func_array_cast::fix_fields(THD *thd, Item **ref) { + @param item the Item in which the cast operation is performed + @param[out] str the string to print to + */ +-static void print_cast_type(Cast_target cast_type, const Item *item, ++void print_cast_type(Cast_target cast_type, const Item *item, + String *str) { + const unsigned decimals = item->decimals; + switch (cast_type) { +diff --git a/sql/item_json_func.h b/sql/item_json_func.h +index 791e9b3e..7bc26c04 100644 +--- a/sql/item_json_func.h ++++ b/sql/item_json_func.h +@@ -1103,6 +1103,9 @@ class Item_func_json_value final : public Item_func { + my_decimal *val_decimal(my_decimal *value) override; + bool get_date(MYSQL_TIME *ltime, my_time_flags_t flags) override; + bool get_time(MYSQL_TIME *ltime) override; ++ Json_on_response_type m_on_empty; ++ Json_on_response_type m_on_error; ++ Cast_target m_cast_target; + + private: + /// Represents a default value given in JSON_VALUE's DEFAULT xxx ON EMPTY or +@@ -1112,15 +1115,12 @@ class Item_func_json_value final : public Item_func { + /// Parsed path. + Json_path m_path_json; + /// Type of the ON EMPTY clause. +- Json_on_response_type m_on_empty; + /// Type of the ON ERROR clause. +- Json_on_response_type m_on_error; + /// The default value for ON EMPTY (if not ERROR or NULL ON EMPTY). + unique_ptr_destroy_only m_default_empty; + /// The default value for ON EMPTY (if not ERROR or NULL ON EMPTY). + unique_ptr_destroy_only m_default_error; + /// The target data type. +- Cast_target m_cast_target; + + /** + Creates a Json_value_default object representing the default value given in +@@ -1244,4 +1244,6 @@ bool sort_and_remove_dups(const Json_wrapper &orig, Sorted_index_array *v); + + bool save_json_to_field(THD *thd, Field *field, const Json_wrapper *w, + bool no_error); ++void print_cast_type(Cast_target cast_type, const Item *item, ++ String *str); + #endif /* ITEM_JSON_FUNC_INCLUDED */ diff --git a/sql/locking_service.cc b/sql/locking_service.cc index 9ca8e21a..ef215f3d 100644 --- a/sql/locking_service.cc @@ -3892,6 +3942,6 @@ index 19110c7a..131a4c69 100644 ELSE() # This is a fall-back. FILE(WRITE ${INFO_SRC} "\nMySQL source ${VERSION}\n") -+ FILE(APPEND ${INFO_SRC} "\nCantian patch source 1.0.1\n") ++ FILE(APPEND ${INFO_SRC} "\nCantian patch source 1.0.2\n") ENDIF() ENDMACRO(CREATE_INFO_SRC) \ No newline at end of file diff --git a/mysql-test/suite/ctc/r/ctc_ddl_func_index.result b/mysql-test/suite/ctc/r/ctc_ddl_func_index.result index d8e34ff..c61b0af 100644 --- a/mysql-test/suite/ctc/r/ctc_ddl_func_index.result +++ b/mysql-test/suite/ctc/r/ctc_ddl_func_index.result @@ -49,8 +49,48 @@ id select_type table partitions type possible_keys key key_len ref rows filtered 1 SIMPLE t1 NULL ref func_index_2 func_index_2 7 const 4 100.00 NULL Warnings: Note 1003 /* select#1 */ select `db1`.`t1`.`c1` AS `c1`,`db1`.`t1`.`c2` AS `c2`,`db1`.`t1`.`c3` AS `c3`,`db1`.`t1`.`c4` AS `c4` from `db1`.`t1` where (substr(`c4`,1,1) = 'a') +create index func_index_3 on t1 ((substr(upper(c4), 1, 1))); +select * from t1 where substr(upper(c4), 1, 1) = 'a'; +c1 c2 c3 c4 +1 1 aaa aaa +2 2 aaA aBB +4 4 aaBa Aaa +explain select * from t1 where substr(upper(c4), 1, 1) = 'a'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ref func_index_3 func_index_3 7 const 1 100.00 NULL +Warnings: +Note 1003 /* select#1 */ select `db1`.`t1`.`c1` AS `c1`,`db1`.`t1`.`c2` AS `c2`,`db1`.`t1`.`c3` AS `c3`,`db1`.`t1`.`c4` AS `c4` from `db1`.`t1` where (substr(upper(`c4`),1,1) = 'a') +analyze table t1; +Table Op Msg_type Msg_text +db1.t1 analyze status OK +explain select * from t1 where substr(upper(c4), 1, 1) = 'a'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ref func_index_3 func_index_3 7 const 4 100.00 NULL +Warnings: +Note 1003 /* select#1 */ select `db1`.`t1`.`c1` AS `c1`,`db1`.`t1`.`c2` AS `c2`,`db1`.`t1`.`c3` AS `c3`,`db1`.`t1`.`c4` AS `c4` from `db1`.`t1` where (substr(upper(`c4`),1,1) = 'a') +create index func_index_4 on t1 ((upper(substr(c4, 1, 1)))); +select * from t1 where upper(substr(c4, 1, 1)) = 'a'; +c1 c2 c3 c4 +1 1 aaa aaa +2 2 aaA aBB +4 4 aaBa Aaa +explain select * from t1 where upper(substr(c4, 1, 1)) = 'a'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ref func_index_4 func_index_4 7 const 1 100.00 NULL +Warnings: +Note 1003 /* select#1 */ select `db1`.`t1`.`c1` AS `c1`,`db1`.`t1`.`c2` AS `c2`,`db1`.`t1`.`c3` AS `c3`,`db1`.`t1`.`c4` AS `c4` from `db1`.`t1` where (upper(substr(`c4`,1,1)) = 'a') +analyze table t1; +Table Op Msg_type Msg_text +db1.t1 analyze status OK +explain select * from t1 where upper(substr(c4, 1, 1)) = 'a'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ref func_index_4 func_index_4 7 const 4 100.00 NULL +Warnings: +Note 1003 /* select#1 */ select `db1`.`t1`.`c1` AS `c1`,`db1`.`t1`.`c2` AS `c2`,`db1`.`t1`.`c3` AS `c3`,`db1`.`t1`.`c4` AS `c4` from `db1`.`t1` where (upper(substr(`c4`,1,1)) = 'a') alter table t1 drop index func_index_1; drop index func_index_2 on t1; +drop index func_index_3 on t1; +drop index func_index_4 on t1; create table t2 (c1 int, c2 int, c3 varchar(10)); insert into t2 values (1, 1, 'aaa'), (2, 2, 'aaA'), (3, 3, 'AAA'), (4, 4, 'aaBa'); select * from t2; diff --git a/mysql-test/suite/ctc/t/ctc_ddl_func_index.test b/mysql-test/suite/ctc/t/ctc_ddl_func_index.test index 879c0c2..df44be2 100644 --- a/mysql-test/suite/ctc/t/ctc_ddl_func_index.test +++ b/mysql-test/suite/ctc/t/ctc_ddl_func_index.test @@ -20,10 +20,23 @@ explain select * from t1 where substr(c4, 1, 1) = 'a'; analyze table t1; explain select * from t1 where substr(c4, 1, 1) = 'a'; +# 创建嵌套函数索引场景 +create index func_index_3 on t1 ((substr(upper(c4), 1, 1))); +select * from t1 where substr(upper(c4), 1, 1) = 'a'; +explain select * from t1 where substr(upper(c4), 1, 1) = 'a'; +analyze table t1; +explain select * from t1 where substr(upper(c4), 1, 1) = 'a'; +create index func_index_4 on t1 ((upper(substr(c4, 1, 1)))); +select * from t1 where upper(substr(c4, 1, 1)) = 'a'; +explain select * from t1 where upper(substr(c4, 1, 1)) = 'a'; +analyze table t1; +explain select * from t1 where upper(substr(c4, 1, 1)) = 'a'; + # 删除函数索引 alter table t1 drop index func_index_1; drop index func_index_2 on t1; - +drop index func_index_3 on t1; +drop index func_index_4 on t1; # 创建函数索引异常场景 create table t2 (c1 int, c2 int, c3 varchar(10)); diff --git a/storage/ctc/ha_ctc_ddl.cc b/storage/ctc/ha_ctc_ddl.cc index 30205b9..e4ab871 100644 --- a/storage/ctc/ha_ctc_ddl.cc +++ b/storage/ctc/ha_ctc_ddl.cc @@ -48,6 +48,9 @@ #include "sql/sql_table.h" // primary_key_name #include "sql/sql_partition.h" #include "sql/item_func.h" +#include "sql/item_json_func.h" +#include "sql/table_function.h" + #include "my_time.h" #include "decimal.h" @@ -1239,83 +1242,256 @@ static const FuncMapping mysql_func_name_to_ctc_map[] = { static const size_t func_map_size = sizeof(mysql_func_name_to_ctc_map) / sizeof(mysql_func_name_to_ctc_map[0]); -static int ctc_check_func_name(std::string func_name) +static int ctc_check_func_name(Item_func *func_expr_item) { - size_t func_name_len = func_name.length(); - - for (size_t i = 0; i < func_map_size; ++i) { - const std::string mysql_func_name = mysql_func_name_to_ctc_map[i].mysql_func_name; - if (func_name_len >= mysql_func_name.length() && - func_name.compare(0, mysql_func_name.length(), mysql_func_name) == 0) { - my_printf_error(ER_DISALLOWED_OPERATION, "Function %s is not indexable", MYF(0), - mysql_func_name_to_ctc_map[i].ctc_func_name.c_str()); - return CT_ERROR; + std::string func_name = func_expr_item->func_name(); + size_t func_name_len = func_name.length(); + + for (size_t i = 0; i < func_map_size; i++) { + const std::string mysql_func_name = mysql_func_name_to_ctc_map[i].mysql_func_name; + if (func_name_len >= mysql_func_name.length() && + func_name.compare(0, mysql_func_name.length(), mysql_func_name) == 0) { + my_printf_error(ER_DISALLOWED_OPERATION, "Function %s is not indexable", MYF(0), + mysql_func_name_to_ctc_map[i].ctc_func_name.c_str()); + return CT_ERROR; + } } - } - return CT_SUCCESS; + + for (uint32_t i = 0; i < func_expr_item->arg_count; i++) { + if (func_expr_item->get_arg(i)->type() == Item::FUNC_ITEM) { + // nested function, need to do check recursively, error emit has been done on-the-spot + CTC_RETURN_IF_NOT_ZERO(ctc_check_func_name((Item_func *) func_expr_item->get_arg(i))); + } + } + return CT_SUCCESS; } -static uint32_t ctc_fill_func_key_part(TABLE *form, THD *thd, TcDb__CtcDDLTableKeyPart *req_key_part, Value_generator *gcol_info) +static int recursively_get_dependency_item(TABLE *form, Item_func *item_func, TcDb__CtcDDLTableKeyPart *req_key_part, + uint32_t *col_item_count) { - Item_func *func_expr_item = dynamic_cast(gcol_info->expr_item); - if (func_expr_item == nullptr) { - my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), - "[CTC_CREATE_TABLE]: CTC do not support this functional index."); - return CT_ERROR; - } + for (uint32_t i = 0; i < item_func->arg_count; i++) { + if (item_func->get_arg(i)->type() == Item::FUNC_ITEM) { + // nested function, would go recursively to do detection + int result = recursively_get_dependency_item(form, (Item_func *) item_func->get_arg(i), + req_key_part, col_item_count); + if (result != CT_SUCCESS) { + return result; + } + } - uint32_t arg_count = func_expr_item->arg_count; - if (arg_count == 0) { - my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), - "[CTC_CREATE_TABLE]: There is no functional index."); - return CT_ERROR; - } + if (item_func->get_arg(i)->type() == Item::FIELD_ITEM) { + // the field* args[i] contains don't have proper m_field_index when it's a alter table scenario. + // thus we have to look up by name through metadata on the new TABLE* + Item_field *arg_item_field = (Item_field*) item_func->get_arg(i); + Field *field = ctc_get_field_by_name(form, arg_item_field->field->field_name); + if (field && field->is_virtual_gcol()) { + my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), + "Cantian does not support index on virtual generated column."); + return CT_ERROR; + } + + if (*col_item_count >= 1) { + my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), + "Cantian does not support function indexes with multiple columns of arguments."); + return CT_ERROR; + } + req_key_part->name = const_cast(item_func->get_arg(i)->item_name.ptr()); + (*col_item_count)++; + } + } + return CT_SUCCESS; +} - if (ctc_check_func_name(std::string(func_expr_item->func_name())) != CT_SUCCESS) { - return CT_ERROR; - } - - req_key_part->is_func = true; - req_key_part->func_name = const_cast(func_expr_item->func_name()); - Item **args = func_expr_item->arguments(); - uint32_t col_item_count = 0; - Field *field = nullptr; - for (uint32_t i = 0; i < arg_count; i++) { - field = ctc_get_field_by_name(form, const_cast(args[i]->item_name.ptr())); - if (field && field->is_gcol()) { - my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), - "Cantian does not support index on generated column."); - return CT_ERROR; +void ctc_rewrite_expression_clause(TABLE *form, const THD *thd, Item *item, String *out); + +void ctc_print_on_empty_or_error(TABLE *form, const THD *thd, String *out, bool on_empty, + Json_on_response_type response_type, Item *default_string) +{ + switch (response_type) { + case Json_on_response_type::ERROR: + out->append(STRING_WITH_LEN(" error")); + break; + case Json_on_response_type::NULL_VALUE: + out->append(STRING_WITH_LEN(" null")); + break; + case Json_on_response_type::DEFAULT: + out->append(STRING_WITH_LEN(" default ")); + ctc_rewrite_expression_clause(form, thd, default_string, out); + break; + case Json_on_response_type::IMPLICIT: + // Nothing to print when the clause was implicit. + return; + }; + + if (on_empty) { + out->append(STRING_WITH_LEN(" on empty")); + } else { + out->append(STRING_WITH_LEN(" on error")); } - if (args[i]->type() == Item::FIELD_ITEM) { - if (col_item_count >= 1) { - my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), - "Cantian does not support function indexes with multiple columns of arguments."); - return CT_ERROR; - } - req_key_part->name = const_cast(args[i]->item_name.ptr()); - col_item_count++; +} + +#ifdef METADATA_NORMALIZED +#define JSON_VALUE_ON_EMPTY_ARG_IDX 2 +#define JSON_VALUE_ON_ERROR_ARG_IDX 3 + +// equivalent to Item_func_json_value::print, but stripped charset info +void ctc_item_print_json_value(TABLE *form, const THD *thd, Item* item, String *out) +{ + Item_func_json_value *item_func = (Item_func_json_value *) item; + out->append(STRING_WITH_LEN("json_value(")); + ctc_rewrite_expression_clause(form, thd, item_func->get_arg(0), out); + out->append(STRING_WITH_LEN(", ")); + ctc_rewrite_expression_clause(form, thd, item_func->get_arg(1), out); + out->append(STRING_WITH_LEN(" returning ")); + if (item_func->m_cast_target == ITEM_CAST_CHAR && item_func->collation.collation != &my_charset_bin) { + // don't add char, use CLOB instead + out->append(STRING_WITH_LEN("CLOB")); + } else { + print_cast_type(item_func->m_cast_target, item_func, out); } - } - - char buffer[FUNC_TEXT_MAX_LEN] = {0}; - String gc_expr(buffer, sizeof(buffer), &my_charset_bin); - gcol_info->print_expr(thd, &gc_expr); - string expr_str(buffer); - expr_str.erase(remove(expr_str.begin(), expr_str.end(), '`'), expr_str.end()); - // 处理json_value建索引,只允许returning char - // 不带returning默认char512 - if (strcmp(req_key_part->func_name, "json_value") == 0) { + ctc_print_on_empty_or_error(form, thd, out, true, item_func->m_on_empty, + item_func->get_arg(JSON_VALUE_ON_EMPTY_ARG_IDX)); + ctc_print_on_empty_or_error(form, thd, out, false, item_func->m_on_error, + item_func->get_arg(JSON_VALUE_ON_ERROR_ARG_IDX)); + out->append(')'); +}; +#else +// with not normalized mode, we don't have patch on mysql source code, so fallback to the old implmentation route +// based on regular expression replacement. the conversion for parameter of json_value won't be precise as normal path +void ctc_item_print_json_value(TABLE *form MY_ATTRIBUTE((unused)), const THD *thd, Item* item, String *out) +{ + Item_func_json_value *item_func = (Item_func_json_value *) item; + char buffer[FUNC_TEXT_MAX_LEN] = {0}; + String gc_expr(buffer, sizeof(buffer), &my_charset_bin); + item_func->print(thd, &gc_expr, enum_query_type(QT_WITHOUT_INTRODUCERS | QT_NO_DB | QT_NO_TABLE)); + string expr_str(buffer); + expr_str.erase(remove(expr_str.begin(), expr_str.end(), '`'), expr_str.end()); + // 处理json_value建索引,只允许returning char + // 不带returning默认char512 std::regex reg_char("returning[ ]char[(]\\d+[)]"); std::regex reg_charset("[_][a-z]+[0-9]*[a-z]*[0-9]*['$]"); std::regex reg_charset2("[ ]character[ ]set[ ][a-z]+[0-9]*[a-z]*[0-9]"); - expr_str =std::regex_replace(expr_str, reg_char, "returning CLOB"); - expr_str =std::regex_replace(expr_str, reg_charset, "'"); - //处理char带charset设置 - expr_str =std::regex_replace(expr_str, reg_charset2, ""); - } - strncpy(req_key_part->func_text, expr_str.c_str(), FUNC_TEXT_MAX_LEN - 1); - return CT_SUCCESS; + expr_str = std::regex_replace(expr_str, reg_char, "returning CLOB"); + expr_str = std::regex_replace(expr_str, reg_charset, "'"); + // 处理char带charset设置 + expr_str = std::regex_replace(expr_str, reg_charset2, ""); + out->append(expr_str.c_str()); + return ; +}; +#endif + +std::map item_func_printer = { + {"json_value", (ctc_item_print_t) ctc_item_print_json_value} +}; + +static void ctc_print_op(TABLE *form, const THD *thd, Item_func *item_func, String *out) +{ + out->append('('); + for (uint i = 0; i < item_func->arg_count - 1; i++) { + ctc_rewrite_expression_clause(form, thd, item_func->get_arg(i), out); + out->append(' '); + out->append(item_func->func_name()); + out->append(' '); + } + ctc_rewrite_expression_clause(form, thd, item_func->get_arg(item_func->arg_count - 1), out); + out->append(')'); +} + +static void ctc_print_func(TABLE *form, const THD *thd, Item_func *item_func, String *out) +{ + out->append(item_func->func_name()); + out->append('('); + for (uint i = 0; i < item_func->arg_count; i++) { + if (i != 0) out->append(','); + ctc_rewrite_expression_clause(form, thd, item_func->get_arg(i), out); + } + out->append(')'); +} + +// the origin implementation of Item_field::print would add extra back quote (`) when it's called, +// due to append_identifier() would explicitly use get_quote_char_for_identifier() to get quote char. +// meanwhile ctsql don't have support for that, ctc_print_field shall be used instead. + +static void ctc_print_field(TABLE *form MY_ATTRIBUTE((unused)), const THD *thd MY_ATTRIBUTE((unused)), + Item_ident *item_ident, String *out) +{ + // equivalent to append_identifier(thd, out, item_ident->field_name, strlen(item_ident->field_name), NULL, NULL) + // but without quote_char support + out->append(item_ident->field_name, strlen(item_ident->field_name), system_charset_info); +} + +// this expression rewrite process may only happen once during DDL, performance is not major concern +void ctc_rewrite_expression_clause(TABLE *form, const THD *thd, Item *item, String *out) +{ + if (item == nullptr) { + return; + } + Item::Type item_type = item->type(); + if (item_type == Item::Type::FIELD_ITEM) { + ctc_print_field(form, thd, (Item_ident*) item, out); + return; + } + + // for function item, in order to intercept all print() call to its args, a ctc_print_func is used instead of + // the built-in print() + if (item_type == Item::Type::FUNC_ITEM) { + std::string func_name_string = std::string(((Item_func *)item)->func_name()); + if (print_op_func_name.find(func_name_string) != print_op_func_name.end()) { + ctc_print_op(form, thd, (Item_func*) item, out); + } else if (item_func_printer.find(func_name_string) != item_func_printer.end()) { + ctc_item_print_t printer = item_func_printer[func_name_string]; + printer(form, thd, (Item_func *)item, out); + } else { + ctc_print_func(form, thd, (Item_func *)item, out); + } + return; + } + + // otherwise, it should be int literals / string literals etc, use the built-in print + // but with QT_WITHOUT_INTRODUCERS so we don't have introducers like _utf8mb4, which is not supported by ctsql + item->print(thd, out, enum_query_type(QT_WITHOUT_INTRODUCERS | QT_NO_DB | QT_NO_TABLE)); +} + +static int ctc_fill_func_key_part(TABLE *form, THD *thd, + TcDb__CtcDDLTableKeyPart *req_key_part, Value_generator *gcol_info) +{ + Item_func *func_expr_item = dynamic_cast(gcol_info->expr_item); + if (func_expr_item == nullptr) { + my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), + "[CTC_CREATE_TABLE]: CTC do not support this functional index."); + return CT_ERROR; + } + + uint32_t arg_count = func_expr_item->arg_count; + if (arg_count == 0) { + my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), + "[CTC_CREATE_TABLE]: There is no functional index."); + return CT_ERROR; + } + + if (ctc_check_func_name(func_expr_item) != CT_SUCCESS) { + return CT_ERROR; + } + + req_key_part->is_func = true; + req_key_part->func_name = const_cast(func_expr_item->func_name()); + + uint32_t col_item_count = 0; + // recursively_get_dependency_item fill req_key_part->name + int result = recursively_get_dependency_item(form, func_expr_item, req_key_part, &col_item_count); + if (result != CT_SUCCESS || col_item_count != 1) { + my_printf_error(ER_DISALLOWED_OPERATION, "%s", MYF(0), + "[CTC_CREATE_TABLE]: CTC do not support this functional index."); + return CT_ERROR; + } + + char buffer[FUNC_TEXT_MAX_LEN] = {0}; + String gc_expr(buffer, sizeof(buffer), &my_charset_bin); + + gc_expr.length(0); + ctc_rewrite_expression_clause(form, thd, gcol_info->expr_item, &gc_expr); + strncpy(req_key_part->func_text, gc_expr.c_ptr(), FUNC_TEXT_MAX_LEN - 1); + return CT_SUCCESS; } static inline longlong get_session_level_create_index_parallelism(THD *thd) diff --git a/storage/ctc/ha_ctc_ddl.h b/storage/ctc/ha_ctc_ddl.h index 8d6bc1a..af3bc48 100644 --- a/storage/ctc/ha_ctc_ddl.h +++ b/storage/ctc/ha_ctc_ddl.h @@ -21,6 +21,8 @@ #include #include #include "storage/ctc/ha_ctcpart.h" +#include "sql/item_json_func.h" +#include "sql/table_function.h" #define UN_SUPPORT_DDL "ddl statement" /** Max table name length as defined in CT_MAX_NAME_LEN */ #define CTC_MAX_TABLE_NAME_LEN 64 @@ -172,6 +174,22 @@ static map mysql_collate_num_to_ctc_type = { {76, COLLATE_UTF8_TOLOWER_CI}, }; +typedef void (*ctc_item_print_t) (TABLE *form, const THD *thd, Item* item, String *out); +// the mapping relationship between rewroten Item sub-class print() + +// as for MySQL 8.0.26, only following sub class of Item would be using print_op +// 1. Item_func_int_div ("DIV"), +// 2. sub class of Item_num_op, +// * Item_func_div: / ; Item_func_mul: * ; Item_func_mod: % ; +// * Item_func_additive_op: Item_func_plus: + ; Item_func_minus: - +// 3. sub class of Item_func_bit, +// * Item_func_bit_two_param: Item_func_bit_or: | ; Item_func_bit_and: & ; Item_func_bit_xor: ^ +// * sub class of Item_func_shift: Item_func_shift_left: << ; Item_func_shift_right: >> +// * Item_func_bit_neg: ~ +static std::set print_op_func_name = { + "DIV", "/", "*", "%", "+", "-", "|", "&", "^", "<<", ">>", "~" +}; + typedef struct { char base_name[SMALL_RECORD_SIZE]; char var_name[SMALL_RECORD_SIZE]; -- Gitee From 3299ff3ce82e54cb182c479be46b729e11dbc4c9 Mon Sep 17 00:00:00 2001 From: qichang Date: Tue, 11 Mar 2025 17:10:12 +0800 Subject: [PATCH 2/3] fix bug for lock tables --- storage/ctc/ctc_meta_data.cc | 44 +++++++++++++++++++++++----------- storage/ctc/ctc_meta_data.h | 3 ++- storage/ctc/ctc_mysql_proxy.cc | 3 ++- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/storage/ctc/ctc_meta_data.cc b/storage/ctc/ctc_meta_data.cc index 6fdbfd9..4932a11 100644 --- a/storage/ctc/ctc_meta_data.cc +++ b/storage/ctc/ctc_meta_data.cc @@ -676,7 +676,24 @@ void ctc_mdl_unlock_thd_by_ticket(THD* thd, MDL_request *ctc_release_request) { } } -void ctc_mdl_unlock_tables_thd(ctc_handler_t *tch) { +void ctc_release_mdl_tickets(THD *thd, std::map* ticket_map, string* filter_key) +{ + for (auto iter = ticket_map->begin(); iter != ticket_map->end();) { + MDL_ticket* ticket = iter->second; + bool key_match = (filter_key == nullptr) || (iter->first == *filter_key); + bool type_match = (ticket->get_type() == MDL_SHARED_READ_ONLY) || + (ticket->get_type() == MDL_SHARED_NO_READ_WRITE); + if (key_match && type_match) { + thd->mdl_context.release_lock(ticket); + iter = ticket_map->erase(iter); + } else { + iter++; + } + } +} + +void ctc_mdl_unlock_tables_thd(ctc_handler_t *tch, MDL_request *mdl_request) +{ bool is_same_node = (tch->inst_id == ctc_instance_id); uint64_t mdl_thd_key = ctc_get_conn_key(tch->inst_id, tch->thd_id, true); @@ -706,14 +723,14 @@ void ctc_mdl_unlock_tables_thd(ctc_handler_t *tch) { return; } - for (auto iter = ctc_mdl_ticket_map->begin(); iter != ctc_mdl_ticket_map->end();) { - MDL_ticket *ticket = iter->second; - if (ticket->get_type() == MDL_SHARED_READ_ONLY || ticket->get_type() == MDL_SHARED_NO_READ_WRITE) { - thd->mdl_context.release_lock(ticket); - ctc_mdl_ticket_map->erase(iter++); - } else { - iter++; - } + if (mdl_request == nullptr) { + // Unlock tables in this thd. + ctc_release_mdl_tickets(thd, ctc_mdl_ticket_map, nullptr); + } else { + // Re-lock a table that already has MDL_SHARED_READ_ONLY or MDL_SHARED_NO_READ_WRITE. + string mdl_ticket_key; + mdl_ticket_key.assign(((const char*)(mdl_request->key.ptr())), mdl_request->key.length()); + ctc_release_mdl_tickets(thd, ctc_mdl_ticket_map, &mdl_ticket_key); } if (ctc_mdl_ticket_map->empty()) { @@ -973,8 +990,10 @@ int ctc_set_sys_var(ctc_set_opt_request *broadcast_req) { } int ctc_ddl_execute_lock_tables_by_req(ctc_handler_t *tch, ctc_lock_table_info *lock_info, int *err_code) { -// unlock tables before locking tables - ctc_mdl_unlock_tables_thd(tch); + MDL_request ctc_mdl_request; + ctc_init_mdl_request(lock_info, &ctc_mdl_request, MDL_key::TABLE); + // For lock tables write/read operations, prevent multiple locks to one table + ctc_mdl_unlock_tables_thd(tch, &ctc_mdl_request); bool is_same_node = (tch->inst_id == ctc_instance_id); uint64_t mdl_thd_key = ctc_get_conn_key(tch->inst_id, tch->thd_id, true); @@ -986,9 +1005,6 @@ int ctc_ddl_execute_lock_tables_by_req(ctc_handler_t *tch, ctc_lock_table_info * THD *thd = nullptr; ctc_init_thd(&thd, mdl_thd_key); - MDL_request ctc_mdl_request; - ctc_init_mdl_request(lock_info, &ctc_mdl_request, MDL_key::TABLE); - if (thd->mdl_context.acquire_lock(&ctc_mdl_request, 1)) { *err_code = ER_LOCK_WAIT_TIMEOUT; return true; diff --git a/storage/ctc/ctc_meta_data.h b/storage/ctc/ctc_meta_data.h index 56ebae7..2141040 100644 --- a/storage/ctc/ctc_meta_data.h +++ b/storage/ctc/ctc_meta_data.h @@ -20,6 +20,7 @@ #include #include "ctc_srv.h" +#include "sql/mdl.h" #pragma GCC visibility push(default) @@ -30,7 +31,7 @@ int ctc_mdl_lock_thd(ctc_handler_t *tch, ctc_lock_table_info *lock_info, int *er void ctc_mdl_unlock_thd(ctc_handler_t *tch, ctc_lock_table_info *lock_info); int ctc_set_sys_var(ctc_set_opt_request *broadcast_req); int ctc_ddl_execute_lock_tables_by_req(ctc_handler_t *tch, ctc_lock_table_info *lock_info, int *err_code); -void ctc_mdl_unlock_tables_thd(ctc_handler_t *tch); +void ctc_mdl_unlock_tables_thd(ctc_handler_t *tch, MDL_request *mdl_request); int ctc_invalidate_mysql_dd_cache_req(ctc_handler_t *tch, ctc_invalidate_broadcast_request *broadcast_req, int *err_code); diff --git a/storage/ctc/ctc_mysql_proxy.cc b/storage/ctc/ctc_mysql_proxy.cc index a7546fc..a668b1b 100644 --- a/storage/ctc/ctc_mysql_proxy.cc +++ b/storage/ctc/ctc_mysql_proxy.cc @@ -653,7 +653,8 @@ __attribute__((visibility("default"))) int ctc_ddl_execute_unlock_tables(ctc_han if (IS_METADATA_NORMALIZATION()) { UNUSED_PARAM(mysql_inst_id); if (lock_info->sql_type == SQLCOM_UNLOCK_TABLES) { - ctc_mdl_unlock_tables_thd(tch); + // The operation is unlock tables; + ctc_mdl_unlock_tables_thd(tch, nullptr); } ctc_mdl_unlock_thd(tch, lock_info); return 0; -- Gitee From 1b75a96b62df65367311c0c04c9317a33e5769de Mon Sep 17 00:00:00 2001 From: qichang Date: Thu, 14 Nov 2024 14:53:50 +0800 Subject: [PATCH 3/3] IO_COST --- mysql-test/suite/ctc/r/ctc_bit_analyze.result | 2 +- storage/ctc/ctc_srv.h | 3 + storage/ctc/ctc_srv_mq_stub.cc | 5 ++ storage/ctc/ha_ctc.cc | 60 ++++++++++++++++++- storage/ctc/ha_ctc.h | 47 +++++---------- storage/ctc/ha_ctcpart.cc | 50 +++++++++++++--- storage/ctc/ha_ctcpart.h | 2 + 7 files changed, 124 insertions(+), 45 deletions(-) diff --git a/mysql-test/suite/ctc/r/ctc_bit_analyze.result b/mysql-test/suite/ctc/r/ctc_bit_analyze.result index 7d18c30..fccc1c3 100644 --- a/mysql-test/suite/ctc/r/ctc_bit_analyze.result +++ b/mysql-test/suite/ctc/r/ctc_bit_analyze.result @@ -44,7 +44,7 @@ Warnings: Note 1003 /* select#1 */ select `test`.`tbl_bit`.`num` AS `num` from `test`.`tbl_bit` where ((`test`.`tbl_bit`.`num` > 0x02) and (`test`.`tbl_bit`.`num` < 0x0d)) explain select * from tbl_bit where num >= b'0010' and num <= b'1101'; id select_type table partitions type possible_keys key key_len ref rows filtered Extra -1 SIMPLE tbl_bit NULL index idx_num idx_num 2 NULL 16 75.00 Using where; Using index +1 SIMPLE tbl_bit NULL range idx_num idx_num 2 NULL 12 100.00 Using where; Using index Warnings: Note 1003 /* select#1 */ select `test`.`tbl_bit`.`num` AS `num` from `test`.`tbl_bit` where ((`test`.`tbl_bit`.`num` >= 0x02) and (`test`.`tbl_bit`.`num` <= 0x0d)) drop table tbl_bit; diff --git a/storage/ctc/ctc_srv.h b/storage/ctc/ctc_srv.h index 345be8e..f65df4b 100644 --- a/storage/ctc/ctc_srv.h +++ b/storage/ctc/ctc_srv.h @@ -145,6 +145,8 @@ typedef struct { typedef struct { uint32_t estimate_rows; ctc_cbo_stats_column_t *columns; + uint32_t blocks; + uint32_t avg_row_len; } ctc_cbo_stats_table_t; /* @@ -161,6 +163,7 @@ typedef struct { uint32_t num_str_cols; bool *col_type; ctc_cbo_stats_table_t *ctc_cbo_stats_table; + uint32_t page_size; } ctc_cbo_stats_t; #pragma pack() diff --git a/storage/ctc/ctc_srv_mq_stub.cc b/storage/ctc/ctc_srv_mq_stub.cc index db4d8bd..c7e9952 100644 --- a/storage/ctc/ctc_srv_mq_stub.cc +++ b/storage/ctc/ctc_srv_mq_stub.cc @@ -771,16 +771,21 @@ void ctc_cbo_stats_copy_from_shm(ctc_handler_t *tch, ctc_cbo_stats_table_t *ctc_ bool is_part_table = stats->part_cnt ? true : false; stats->is_updated = req->stats->is_updated; stats->records = req->stats->records; + stats->page_size = req->stats->page_size; memcpy(stats->ndv_keys, req->stats->ndv_keys, stats->key_len); uint num_columns = req->stats->msg_len / sizeof(ctc_cbo_stats_column_t); if (!is_part_table) { *tch = req->tch; ctc_cbo_stats_table->estimate_rows = req->ctc_cbo_stats_table->estimate_rows; + ctc_cbo_stats_table->avg_row_len = req->ctc_cbo_stats_table->avg_row_len; + ctc_cbo_stats_table->blocks = req->ctc_cbo_stats_table->blocks; ctc_cbo_stats_columns_copy(ctc_cbo_stats_table->columns, req->ctc_cbo_stats_table->columns, stats, num_columns); } else { for (uint i = 0; i < req->num_part_fetch; i++) { ctc_cbo_stats_table[i].estimate_rows = req->ctc_cbo_stats_table[i].estimate_rows; + ctc_cbo_stats_table[i].avg_row_len = req->ctc_cbo_stats_table[i].avg_row_len; + ctc_cbo_stats_table[i].blocks = req->ctc_cbo_stats_table[i].blocks; ctc_cbo_stats_columns_copy(ctc_cbo_stats_table[i].columns, req->ctc_cbo_stats_table[i].columns, stats, num_columns); } diff --git a/storage/ctc/ha_ctc.cc b/storage/ctc/ha_ctc.cc index 178c2ae..49604b2 100644 --- a/storage/ctc/ha_ctc.cc +++ b/storage/ctc/ha_ctc.cc @@ -3369,6 +3369,59 @@ EXTER_ATTACK int ha_ctc::rnd_pos(uchar *buf, uchar *pos) { return ret; } +/** +The number of pages of table data is used as +the IO seek count for a full table scan. +*/ +double ha_ctc::scan_time() { + double scan_time = 0.0; + if (m_share && m_share->cbo_stats != nullptr) { + scan_time = m_share->cbo_stats->ctc_cbo_stats_table->blocks; + } + /* The min seek times */ + if (scan_time < 2.0) { + scan_time = 2.0; + } + return scan_time; +} + +/** +Calculate cost of 'index only' scan for given covering_index +and number of records to be scan. +*/ +double ha_ctc::index_only_read_time(uint keynr, double records_to_scan) { + double index_read_time; + uint32_t keys_per_block = (stats.block_size / 2 / + (table_share->key_info[keynr].key_length + ref_length) + 1); + index_read_time = ((double)(records_to_scan + keys_per_block - 1) / (double)keys_per_block); + return index_read_time; +} + +/** +Calculate the time it takes to read a set of ranges +through an non_covering_index. +*/ +double ha_ctc::read_time(uint index, uint ranges, ha_rows rows) { + if (index != table->s->primary_key) { + return (handler::read_time(index, ranges, rows)); + } + + if (rows <= 2) { + return (double)rows; + } + + /* + If the estimated records to be scanned exceeds the + total records in the table, use full_table_scan. + */ + double time_for_scan = scan_time(); + if (stats.records < rows) { + return time_for_scan; + } + + return (ranges + (double)rows / (double)stats.records * time_for_scan); +} + /** @brief ::info() is used to return information to the optimizer. See my_base.h for @@ -3410,7 +3463,10 @@ EXTER_ATTACK int ha_ctc::rnd_pos(uchar *buf, uchar *pos) { void ha_ctc::info_low() { if (m_share && m_share->cbo_stats != nullptr) { - stats.records = m_share->cbo_stats->ctc_cbo_stats_table->estimate_rows; + ctc_cbo_stats_table_t* stats_table = m_share->cbo_stats->ctc_cbo_stats_table; + stats.records = stats_table->estimate_rows; + stats.mean_rec_length = stats_table->avg_row_len; + stats.block_size = m_share->cbo_stats->page_size; } } @@ -5609,7 +5665,7 @@ int ha_ctc::initialize_cbo_stats() END_RECORD_STATS(EVENT_TYPE_INITIALIZE_DBO) return ERR_ALLOC_MEMORY; } - *m_share->cbo_stats = {0, 0, 0, 0, 0, nullptr, 0, nullptr, nullptr}; + *m_share->cbo_stats = {0, 0, 0, 0, 0, nullptr, 0, nullptr, nullptr, 0}; m_share->cbo_stats->ctc_cbo_stats_table = (ctc_cbo_stats_table_t*)my_malloc(PSI_NOT_INSTRUMENTED, sizeof(ctc_cbo_stats_table_t), MYF(MY_WME)); if (m_share->cbo_stats->ctc_cbo_stats_table == nullptr) { diff --git a/storage/ctc/ha_ctc.h b/storage/ctc/ha_ctc.h index 44399d5..9c1fd85 100644 --- a/storage/ctc/ha_ctc.h +++ b/storage/ctc/ha_ctc.h @@ -515,43 +515,24 @@ public: uint max_supported_key_part_length( HA_CREATE_INFO *create_info) const override; - /** @brief - Called in test_quick_select to determine if indexes should be used. + /** + @brief Called in test_quick_select to determine if indexes should be used. + we assume that the data pages is equal to the disk scans times. + @return How many seeks it will take to read through the whole table. */ - virtual double scan_time() override { - DBUG_TRACE; - return (ulonglong)(stats.records + stats.deleted) / 100 + 2; - } + double scan_time() override; - /** @brief - This method will never be called if you do not implement indexes. + /** + @brief This method will never be called if you do not implement indexes. + @return estimated cost of 'index only' scan */ - virtual double read_time( - uint index, /*!< in: key number */ - uint ranges, /*!< in: how many ranges */ - ha_rows rows) /*!< in: estimated number of rows in the ranges */ override { - DBUG_TRACE; - - if (index != table->s->primary_key) { - /* Not clustered */ - return (handler::read_time(index, ranges, rows)); - } + virtual double index_only_read_time(uint keynr, double records) override; - if (rows <= 2) { - return ((double)rows); - } - - /* Assume that the read time is proportional to the scan time for all - rows + at most one seek per range. */ - - double time_for_scan = scan_time(); - - if (stats.records < rows) { - return (time_for_scan); - } - - return (ranges + (double)rows / (double)stats.records * time_for_scan); - } + /** + @brief This method will never be called if you do not implement indexes. + @return estimated time measured in disk seeks + */ + double read_time(uint index, uint ranges, ha_rows rows) override; bool inplace_alter_table( TABLE *altered_table MY_ATTRIBUTE((unused)), diff --git a/storage/ctc/ha_ctcpart.cc b/storage/ctc/ha_ctcpart.cc index 0db2188..30174a0 100644 --- a/storage/ctc/ha_ctcpart.cc +++ b/storage/ctc/ha_ctcpart.cc @@ -843,16 +843,46 @@ enum row_type ha_ctcpart::get_partition_row_type(const dd::Table *partition_tabl return ROW_TYPE_NOT_USED; } +double ha_ctcpart::scan_time() { + double scan_time = 0.0; + if (m_part_share && m_part_share->cbo_stats != nullptr) { + /* + For partitioned tables, queries can specify partitions to scan. + The cost of a full table scan should only include the specified partitions. + */ + uint part_num = m_is_sub_partitioned ? + table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; + for (uint i = m_part_info->get_first_used_partition(); i < part_num; + i = m_part_info->get_next_used_partition(i)) { + scan_time += m_part_share->cbo_stats->ctc_cbo_stats_table[i].blocks; + } + } + return scan_time; +} + void ha_ctcpart::info_low() { - stats.records = 0; - if (m_part_share->cbo_stats != nullptr) { - uint part_num = m_is_sub_partitioned ? table->part_info->num_parts * table->part_info->num_subparts : - table->part_info->num_parts; - for (uint part_id = m_part_info->get_first_used_partition(); part_id < part_num; - part_id = m_part_info->get_next_used_partition(part_id)) { - stats.records += m_part_share->cbo_stats->ctc_cbo_stats_table[part_id].estimate_rows; + stats.records = 0; + uint total_row_len = 0; + if (m_part_share && m_part_share->cbo_stats != nullptr) { + uint total_part_num = m_is_sub_partitioned ? + table->part_info->num_parts * table->part_info->num_subparts : + table->part_info->num_parts; + stats.block_size = m_part_share->cbo_stats->page_size; + uint curr_part_num = 0; + for (uint part_id = m_part_info->get_first_used_partition(); part_id < total_part_num; + part_id = m_part_info->get_next_used_partition(part_id)) { + stats.records += m_part_share->cbo_stats->ctc_cbo_stats_table[part_id].estimate_rows; + if (m_part_share->cbo_stats->ctc_cbo_stats_table[part_id].avg_row_len != 0) { + total_row_len += m_part_share->cbo_stats->ctc_cbo_stats_table[part_id].avg_row_len; + curr_part_num++; + } + } + + if (curr_part_num > 0) { + stats.mean_rec_length = total_row_len / curr_part_num; + } } - } } int ha_ctcpart::info(uint flag) { @@ -1037,7 +1067,7 @@ int ha_ctcpart::initialize_cbo_stats() { ctc_log_error("alloc mem failed, m_part_share->cbo_stats size(%lu)", sizeof(ctc_cbo_stats_t)); return ERR_ALLOC_MEMORY; } - *m_part_share->cbo_stats = {0, 0, 0, 0, 0, nullptr, 0, nullptr, nullptr}; + *m_part_share->cbo_stats = {0, 0, 0, 0, 0, nullptr, 0, nullptr, nullptr, 0}; m_part_share->cbo_stats->part_cnt = part_num; @@ -1059,6 +1089,8 @@ int ha_ctcpart::initialize_cbo_stats() { for (uint i = 0; i < part_num; i++) { m_part_share->cbo_stats->ctc_cbo_stats_table[i].estimate_rows = 0; + m_part_share->cbo_stats->ctc_cbo_stats_table[i].avg_row_len = 0; + m_part_share->cbo_stats->ctc_cbo_stats_table[i].blocks = 0; m_part_share->cbo_stats->ctc_cbo_stats_table[i].columns = (ctc_cbo_stats_column_t*)my_malloc(PSI_NOT_INSTRUMENTED, table->s->fields * sizeof(ctc_cbo_stats_column_t), MYF(MY_WME)); if (m_part_share->cbo_stats->ctc_cbo_stats_table[i].columns == nullptr) { diff --git a/storage/ctc/ha_ctcpart.h b/storage/ctc/ha_ctcpart.h index ec4f903..e4c99ac 100644 --- a/storage/ctc/ha_ctcpart.h +++ b/storage/ctc/ha_ctcpart.h @@ -398,6 +398,8 @@ class ha_ctcpart : public ha_ctc, bool equal_range_on_part_field(const key_range *start_key, const key_range *end_key); + double scan_time() override; + void info_low() override; int info(uint) override; -- Gitee