From 20e9a9b16ca1fcc871e112e0cce8ddfaa5f0b321 Mon Sep 17 00:00:00 2001 From: qichang Date: Thu, 21 Nov 2024 21:03:11 +0800 Subject: [PATCH] fix union keys row estimate --- mysql-test/enableCases.list | 1 + .../suite/ctc/r/ctc_dml_estimate_rows.result | 147 ++++++++++++++++ .../suite/ctc/t/ctc_dml_estimate_rows.test | 85 +++++++++ storage/ctc/ctc_cbo.cc | 163 ++++++++++++------ storage/ctc/ctc_cbo.h | 11 +- storage/ctc/ha_ctc.cc | 2 +- 6 files changed, 358 insertions(+), 51 deletions(-) create mode 100644 mysql-test/suite/ctc/r/ctc_dml_estimate_rows.result create mode 100644 mysql-test/suite/ctc/t/ctc_dml_estimate_rows.test diff --git a/mysql-test/enableCases.list b/mysql-test/enableCases.list index ff25a32..c6fe92b 100644 --- a/mysql-test/enableCases.list +++ b/mysql-test/enableCases.list @@ -20,6 +20,7 @@ ctc_set_analyze ctc_enum_analyze ctc_bit_analyze ctc_string_analyze +ctc_dml_estimate_rows # ------ END TEST CASES OF DML ------ # ------ BEGIN TEST CASES OF TRANSACTION ------ diff --git a/mysql-test/suite/ctc/r/ctc_dml_estimate_rows.result b/mysql-test/suite/ctc/r/ctc_dml_estimate_rows.result new file mode 100644 index 0000000..68a2ac8 --- /dev/null +++ b/mysql-test/suite/ctc/r/ctc_dml_estimate_rows.result @@ -0,0 +1,147 @@ +drop table if exists t1; +CREATE TABLE IF NOT EXISTS t1 ( +a INT AUTO_INCREMENT PRIMARY KEY, +b INT NOT NULL, +c VARCHAR(20) DEFAULT NULL, +d DECIMAL NOT NULL +); +CREATE INDEX idx_b ON t1(b); +CREATE INDEX idx_c ON t1(c); +CREATE INDEX idx_d ON t1(d); +CREATE INDEX idx_bcd ON t1(b, c, d); +DROP PROCEDURE IF EXISTS insert_data_to_t1; +Warnings: +Note 1305 PROCEDURE test.insert_data_to_t1 does not exist +CREATE PROCEDURE insert_data_to_t1() +BEGIN +DECLARE i INT DEFAULT 0; +WHILE i < 500 DO +INSERT INTO t1 (b, c, d) VALUES (i, NULL, i + 2); +INSERT INTO t1 (b, c, d) VALUES (i + 1, i + 2, i + 3); +INSERT INTO t1 (b, c, d) VALUES (i + 2, i + 3, i + 4); +INSERT INTO t1 (b, c, d) VALUES (111, NULL, i); +INSERT INTO t1 (b, c, d) VALUES (121, 121, i); +INSERT INTO t1 (b, c, d) VALUES (131, i + 4, i + 5); +SET i = i + 1; +END WHILE; +END // +CALL insert_data_to_t1(); +DROP PROCEDURE IF EXISTS insert_data_to_t1; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +SELECT COUNT(1) FROM t1 WHERE b < 500; +COUNT(1) +2997 +SELECT COUNT(1) FROM t1 WHERE b >= 200; +COUNT(1) +903 +SELECT COUNT(1) FROM t1 WHERE b >= 200 AND c < 500; +COUNT(1) +598 +SELECT COUNT(1) FROM t1 WHERE c < '700'; +COUNT(1) +1907 +SELECT COUNT(1) FROM t1 WHERE c <= '200'; +COUNT(1) +837 +SELECT COUNT(1) FROM t1 WHERE c >= '100'; +COUNT(1) +1997 +SELECT COUNT(1) FROM t1 WHERE c >= '100' AND c <= '200'; +COUNT(1) +834 +SELECT COUNT(1) FROM t1 WHERE c IS NULL; +COUNT(1) +1000 +SELECT COUNT(1) FROM t1 WHERE c IS NOT NULL; +COUNT(1) +2000 +SELECT COUNT(1) FROM t1 WHERE d >= 100.00; +COUNT(1) +2414 +SELECT COUNT(1) FROM t1 WHERE d < 400.00; +COUNT(1) +2386 +SELECT COUNT(1) FROM t1 WHERE d >= 100.00 AND d < 400.00; +COUNT(1) +1800 +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_b) WHERE b < 500; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_b idx_b 4 NULL 3000 100.00 Using where; Using pushed condition (`test`.`t1`.`b` < 500) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_b) WHERE b >= 200; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_b idx_b 4 NULL 912 100.00 Using where; Using pushed condition (`test`.`t1`.`b` >= 200) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_b) WHERE b >= 200 AND c < 500; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_b idx_b 4 NULL 912 33.33 Using where; Using pushed condition (`test`.`t1`.`b` >= 200) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c < '700'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_c idx_c 83 NULL 2864 100.00 Using where; Using pushed condition (`test`.`t1`.`c` < '700') +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c <= '200'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_c idx_c 83 NULL 1263 100.00 Using where; Using pushed condition (`test`.`t1`.`c` <= '200') +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c >= '100'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_c idx_c 83 NULL 3000 100.00 Using where; Using pushed condition (`test`.`t1`.`c` >= '100') +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c >= '100' AND c <= '200'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_c idx_c 83 NULL 1263 100.00 Using where; Using pushed condition ((`test`.`t1`.`c` >= '100') and (`test`.`t1`.`c` <= '200')) +EXPLAIN SELECT * FROM t1 WHERE c IS NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ref idx_c idx_c 83 const 1000 100.00 Using where; Using pushed condition (`test`.`t1`.`c` is null) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c IS NOT NULL; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_c idx_c 83 NULL 2000 100.00 Using where; Using pushed condition (`test`.`t1`.`c` is not null) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_d) WHERE d >= 100.00; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_d idx_d 5 NULL 2421 100.00 Using where; Using pushed condition (`test`.`t1`.`d` >= 100) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_d) WHERE d < 400.00; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_d idx_d 5 NULL 2397 100.00 Using where; Using pushed condition (`test`.`t1`.`d` < 400) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_d) WHERE d >= 100.00 AND d < 400.00; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_d idx_d 5 NULL 1807 100.00 Using where; Using pushed condition ((`test`.`t1`.`d` >= 100) and (`test`.`t1`.`d` < 400)) +SELECT COUNT(1) FROM t1 WHERE b = 111 AND c IS NULL AND d < 100; +COUNT(1) +100 +SELECT COUNT(1) FROM t1 WHERE b = 121 AND c = '121' AND d >= 10; +COUNT(1) +490 +SELECT COUNT(1) FROM t1 WHERE b < 300 AND c > '300'; +COUNT(1) +428 +SELECT COUNT(1) FROM t1 WHERE b = 121 AND c IS NULL AND d >= 100 AND d < 400; +COUNT(1) +1 +SELECT COUNT(1) FROM t1 WHERE b = 121 AND c IS NOT NULL and d > 200; +COUNT(1) +299 +SELECT COUNT(1) FROM t1 WHERE b = 121 AND d >= 100.00; +COUNT(1) +403 +SELECT COUNT(1) FROM t1 WHERE b = 131 AND c >= '100' AND c <= '400'; +COUNT(1) +334 +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_bcd) WHERE b = 111 AND c IS NULL AND d < 100; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_bcd idx_bcd 92 NULL 32 100.00 Using where; Using pushed condition ((`test`.`t1`.`b` = 111) and (`test`.`t1`.`c` is null) and (`test`.`t1`.`d` < 100)) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_bcd) WHERE b = 121 AND c = '121' AND d >= 100; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_bcd idx_bcd 92 NULL 103 100.00 Using where; Using pushed condition ((`test`.`t1`.`c` = '121') and (`test`.`t1`.`b` = 121) and (`test`.`t1`.`d` >= 100)) +EXPLAIN SELECT * FROM t1 WHERE b < 300 AND c > '300'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ALL idx_b,idx_c,idx_bcd NULL NULL NULL 3000 33.31 Using where; Using pushed condition ((`test`.`t1`.`b` < 300) and (`test`.`t1`.`c` > '300')) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_bcd) WHERE b = 121 AND c IS NULL AND d >= 100 AND d < 400; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_bcd idx_bcd 92 NULL 101 100.00 Using where; Using pushed condition ((`test`.`t1`.`b` = 121) and (`test`.`t1`.`c` is null) and (`test`.`t1`.`d` >= 100) and (`test`.`t1`.`d` < 400)) +EXPLAIN SELECT * FROM t1 WHERE b = 121 AND c IS NOT NULL and d > 200; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ref idx_b,idx_c,idx_d,idx_bcd idx_b 4 const 507 40.29 Using where; Using pushed condition ((`test`.`t1`.`c` is not null) and (`test`.`t1`.`d` > 200)) +EXPLAIN SELECT * FROM t1 WHERE b = 121 AND d >= 100.00; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL ref idx_b,idx_d,idx_bcd idx_b 4 const 507 80.70 Using where; Using pushed condition (`test`.`t1`.`d` >= 100) +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_bcd) WHERE b = 131 AND c >= '100' AND c <= '400'; +id select_type table partitions type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 NULL range idx_bcd idx_bcd 87 NULL 381 100.00 Using where; Using pushed condition ((`test`.`t1`.`b` = 131) and (`test`.`t1`.`c` >= '100') and (`test`.`t1`.`c` <= '400')) +drop table t1; diff --git a/mysql-test/suite/ctc/t/ctc_dml_estimate_rows.test b/mysql-test/suite/ctc/t/ctc_dml_estimate_rows.test new file mode 100644 index 0000000..38eb207 --- /dev/null +++ b/mysql-test/suite/ctc/t/ctc_dml_estimate_rows.test @@ -0,0 +1,85 @@ +--disable_warnings +drop table if exists t1; +--enable_warnings + +CREATE TABLE IF NOT EXISTS t1 ( + a INT AUTO_INCREMENT PRIMARY KEY, + b INT NOT NULL, + c VARCHAR(20) DEFAULT NULL, + d DECIMAL NOT NULL +); + +CREATE INDEX idx_b ON t1(b); +CREATE INDEX idx_c ON t1(c); +CREATE INDEX idx_d ON t1(d); +CREATE INDEX idx_bcd ON t1(b, c, d); + +DROP PROCEDURE IF EXISTS insert_data_to_t1; +DELIMITER //; +CREATE PROCEDURE insert_data_to_t1() +BEGIN + DECLARE i INT DEFAULT 0; + WHILE i < 500 DO + INSERT INTO t1 (b, c, d) VALUES (i, NULL, i + 2); + INSERT INTO t1 (b, c, d) VALUES (i + 1, i + 2, i + 3); + INSERT INTO t1 (b, c, d) VALUES (i + 2, i + 3, i + 4); + INSERT INTO t1 (b, c, d) VALUES (111, NULL, i); + INSERT INTO t1 (b, c, d) VALUES (121, 121, i); + INSERT INTO t1 (b, c, d) VALUES (131, i + 4, i + 5); + SET i = i + 1; + END WHILE; +END // +DELIMITER ;// +CALL insert_data_to_t1(); + +DROP PROCEDURE IF EXISTS insert_data_to_t1; + +ANALYZE TABLE t1; + +SELECT COUNT(1) FROM t1 WHERE b < 500; +SELECT COUNT(1) FROM t1 WHERE b >= 200; +SELECT COUNT(1) FROM t1 WHERE b >= 200 AND c < 500; +SELECT COUNT(1) FROM t1 WHERE c < '700'; +SELECT COUNT(1) FROM t1 WHERE c <= '200'; +SELECT COUNT(1) FROM t1 WHERE c >= '100'; +SELECT COUNT(1) FROM t1 WHERE c >= '100' AND c <= '200'; +SELECT COUNT(1) FROM t1 WHERE c IS NULL; +SELECT COUNT(1) FROM t1 WHERE c IS NOT NULL; +SELECT COUNT(1) FROM t1 WHERE d >= 100.00; +SELECT COUNT(1) FROM t1 WHERE d < 400.00; +SELECT COUNT(1) FROM t1 WHERE d >= 100.00 AND d < 400.00; + +--disable_warnings +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_b) WHERE b < 500; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_b) WHERE b >= 200; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_b) WHERE b >= 200 AND c < 500; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c < '700'; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c <= '200'; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c >= '100'; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c >= '100' AND c <= '200'; +EXPLAIN SELECT * FROM t1 WHERE c IS NULL; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_c) WHERE c IS NOT NULL; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_d) WHERE d >= 100.00; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_d) WHERE d < 400.00; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_d) WHERE d >= 100.00 AND d < 400.00; +--enable_warnings + +SELECT COUNT(1) FROM t1 WHERE b = 111 AND c IS NULL AND d < 100; +SELECT COUNT(1) FROM t1 WHERE b = 121 AND c = '121' AND d >= 10; +SELECT COUNT(1) FROM t1 WHERE b < 300 AND c > '300'; +SELECT COUNT(1) FROM t1 WHERE b = 121 AND c IS NULL AND d >= 100 AND d < 400; +SELECT COUNT(1) FROM t1 WHERE b = 121 AND c IS NOT NULL and d > 200; +SELECT COUNT(1) FROM t1 WHERE b = 121 AND d >= 100.00; +SELECT COUNT(1) FROM t1 WHERE b = 131 AND c >= '100' AND c <= '400'; + +--disable_warnings +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_bcd) WHERE b = 111 AND c IS NULL AND d < 100; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_bcd) WHERE b = 121 AND c = '121' AND d >= 100; +EXPLAIN SELECT * FROM t1 WHERE b < 300 AND c > '300'; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_bcd) WHERE b = 121 AND c IS NULL AND d >= 100 AND d < 400; +EXPLAIN SELECT * FROM t1 WHERE b = 121 AND c IS NOT NULL and d > 200; +EXPLAIN SELECT * FROM t1 WHERE b = 121 AND d >= 100.00; +EXPLAIN SELECT * FROM t1 FORCE INDEX (idx_bcd) WHERE b = 131 AND c >= '100' AND c <= '400'; +--enable_warnings + +drop table t1; \ No newline at end of file diff --git a/storage/ctc/ctc_cbo.cc b/storage/ctc/ctc_cbo.cc index ba5a15c..d9ec830 100644 --- a/storage/ctc/ctc_cbo.cc +++ b/storage/ctc/ctc_cbo.cc @@ -59,43 +59,54 @@ static void convert_set_key_to_variant(const uchar *set_key, cache_variant_t *va } } -void r_key2variant(ctc_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t *ret_val, cache_variant_t * value, uint32_t key_offset) +bool handle_null_and_single_boundary(ctc_key *rKey, Field *field, cache_variant_t *ret_val, + cache_variant_t * value, uint32_t key_offset) { + bool access_boundary = true; + if (rKey->cmp_type == CMP_TYPE_NULL) { *ret_val = *value; rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; - return; + return access_boundary; + } + + if (field->is_nullable() && *(rKey->key + key_offset) == 1) { + /*The curr field is nullable, and first byte value indicates that it is an SQL NULL value*/ + *ret_val = *value; + rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + return access_boundary; } + return !access_boundary; +} + +void extract_boundary_value(ctc_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t *ret_val, + cache_variant_t * value, uint32_t key_offset) +{ Field *field = cur_index_part->field; - uint32_t offset = 0; - if (cur_index_part->field->is_nullable()) { - /* The first byte in the field tells if this is an SQL NULL value */ - if(*(rKey->key + key_offset) == 1) { - *ret_val = *value; - rKey->cmp_type = CMP_TYPE_CLOSE_INTERNAL; - return; - } - offset = 1; + + if (handle_null_and_single_boundary(rKey, field, ret_val, value, key_offset)) { + return; } - // calculate string copy length + uint32_t str_key_len = 0; + uint32_t varchar_offset = 0; + uint32_t null_offset = field->is_nullable() ? 1 : 0; if (field->real_type() == MYSQL_TYPE_VARCHAR) { - str_key_len = *(uint16_t *)const_cast(rKey->key + offset); - } - if (field->real_type() == MYSQL_TYPE_STRING) { + str_key_len = *(uint16_t *)const_cast(rKey->key + key_offset + null_offset); + varchar_offset = OFFSET_VARCHAR_TYPE; + } else if (field->real_type() == MYSQL_TYPE_STRING) { uint32_t data_len = field->key_length(); - while (data_len > 0 && rKey->key[data_len + offset - 1] == field->charset()->pad_char) { + while (data_len > 0 && rKey->key[data_len + key_offset + null_offset - 1] == field->charset()->pad_char) { data_len--; } str_key_len = data_len; } uint32_t str_copy_len = str_key_len > CBO_STRING_MAX_LEN-1 ? CBO_STRING_MAX_LEN-1 : str_key_len; - - key_offset += field->real_type() == MYSQL_TYPE_VARCHAR ? OFFSET_VARCHAR_TYPE : 0; - const uchar *key = rKey->key + key_offset + offset; + const uchar *key = rKey->key + key_offset + varchar_offset + null_offset; uchar tmp_ptr[CTC_BYTE_8] = {0}; const field_cnvrt_aux_t *mysql_info = get_auxiliary_for_field_convert(field, field->type()); + switch (field->real_type()) { case MYSQL_TYPE_SET: convert_set_key_to_variant(key, ret_val, (capacity_usage)field->pack_length()); @@ -133,11 +144,11 @@ void r_key2variant(ctc_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t ret_val->v_date = *(date_t *)const_cast(key); break; case MYSQL_TYPE_DATETIME2: - cnvrt_datetime_decimal(const_cast(key), cur_index_part->field->decimals(), tmp_ptr, DATETIME_MAX_DECIMALS, CTC_BYTE_8); + cnvrt_datetime_decimal(const_cast(key), field->decimals(), tmp_ptr, DATETIME_MAX_DECIMALS, CTC_BYTE_8); ret_val->v_date = *(date_t *)tmp_ptr; break; case MYSQL_TYPE_TIME2: - cnvrt_time_decimal(const_cast(key), cur_index_part->field->decimals(), tmp_ptr, DATETIME_MAX_DECIMALS, CTC_BYTE_8); + cnvrt_time_decimal(const_cast(key), field->decimals(), tmp_ptr, DATETIME_MAX_DECIMALS, CTC_BYTE_8); ret_val->v_date = *(date_t *)tmp_ptr; break; case MYSQL_TYPE_DECIMAL: @@ -588,8 +599,8 @@ double calc_density_by_cond(ctc_cbo_stats_table_t *cbo_stats, KEY_PART_INFO cur_ min_key_val.v_str = v_str_min; max_key_val.v_str = v_str_max; } - r_key2variant(min_key, &cur_index_part, &min_key_val, low_val, key_offset); - r_key2variant(max_key, &cur_index_part, &max_key_val, high_val, key_offset); + extract_boundary_value(min_key, &cur_index_part, &min_key_val, low_val, key_offset); + extract_boundary_value(max_key, &cur_index_part, &max_key_val, high_val, key_offset); if (compare(&max_key_val, low_val, cur_index_part.field, cs) == LESS || compare(&min_key_val, high_val, cur_index_part.field, cs) == GREAT) { return 0; @@ -621,48 +632,102 @@ void calc_accumulate_gcol_num(uint num_fields, Field** field, uint32_t *acc_gcol } } +double calculate_column_selectivity(ctc_cbo_stats_table_t *cbo_stats, ctc_range_key* key, KEY_PART_INFO *cur_index_part, + uint32_t ctc_col_id, uint32_t key_offset, bool is_nullable, const CHARSET_INFO *cs) +{ + double col_selectivity = 0.0; + + if (is_nullable && + (key->min_key->key != nullptr && *(key->min_key->key + key_offset) == 1) && + (key->max_key->key == nullptr || key_offset >= key->max_key->len)) { + /*when the col of min_key has null-val and max_key is not exists --> select * from table where col is not null*/ + col_selectivity = (double)1 - calc_equal_null_density(cbo_stats, ctc_col_id); + } else if (is_nullable && + (key->min_key->key != nullptr && *(key->min_key->key + key_offset) == 1) && + (key->max_key->key != nullptr && *(key->max_key->key + key_offset) == 1)) { + /*when the col of min_key and max_key both have null-val --> select * from table where col is null*/ + col_selectivity = calc_equal_null_density(cbo_stats, ctc_col_id); + } else { + col_selectivity = calc_density_by_cond(cbo_stats, *cur_index_part, key, key_offset, ctc_col_id, cs); + } + + return col_selectivity; +} + +void set_key_boundary_types(ctc_range_key *key, uint32_t id, ctc_cmp_type_t min_key_cmp_type, ctc_cmp_type_t max_key_cmp_type) +{ + uint64_t min_key_map = key->min_key->col_map; + uint64_t max_key_map = key->max_key->col_map; + + uint64_t min_key_field_valid = min_key_map & ((uint64_t)1 << id); + uint64_t max_key_field_valid = max_key_map & ((uint64_t)1 << id); + uint64_t min_key_field_next = min_key_map & ((uint64_t)1 << (id + 1)); + uint64_t max_key_field_next = max_key_map & ((uint64_t)1 << (id + 1)); + + if (min_key_field_valid && max_key_field_valid) { + /*The upper and lower boundarys of current field both exist*/ + if (min_key_field_next || max_key_field_next) { + /*The current field is not the last valid field, the boundary type is set as closed*/ + key->min_key->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + key->max_key->cmp_type = CMP_TYPE_CLOSE_INTERNAL; + return; + } + } + /*The current field is the last valid field in the condition, + 1) Field has only one boundary value (e.g., >=, <=, <, >, is not null) + 2) The upper and lower boundarys of current field both exist (e.g., column > 3 and column <= 5)*/ + key->min_key->cmp_type = min_key_cmp_type; + key->max_key->cmp_type = max_key_cmp_type; +} + double calc_density_one_table(uint16_t idx_id, ctc_range_key *key, ctc_cbo_stats_table_t *cbo_stats, const TABLE &table) { if (cbo_stats->estimate_rows == 0) { // no stats or empty table return 0; } - double density = 1.0; - uint32_t my_col_id; - uint32_t ct_col_id; uint32_t acc_gcol_num[CTC_MAX_COLUMNS] = {0}; calc_accumulate_gcol_num(table.s->fields, table.s->field, acc_gcol_num); - uint32_t key_offset = 0;//列在索引中的偏移量 - uint64_t col_map = max(key->min_key->col_map, key->max_key->col_map); KEY cur_index = table.key_info[idx_id]; + double density = 1.0; + uint32_t mysql_col_id; + uint32_t ctc_col_id; + uint32_t key_offset = 0; // column offset in the index + uint64_t valid_map = max(key->min_key->col_map, key->max_key->col_map); + + /*cmp_type of the last valid field in min_key and max_key*/ + ctc_cmp_type_t min_key_cmp_type = key->min_key->cmp_type; + ctc_cmp_type_t max_key_cmp_type = key->max_key->cmp_type; + /* * For all columns in used index, * density = 1.0 / (column[0]->num_distinct * ... * column[n]->num_distinct) */ for (uint32_t idx_col_num = 0; idx_col_num < cur_index.actual_key_parts; idx_col_num++) { - double col_product = 1.0; - if (col_map & ((uint64_t)1 << idx_col_num)) { - KEY_PART_INFO cur_index_part = cur_index.key_part[idx_col_num]; - if (cur_index_part.field->is_virtual_gcol()) { - continue; - } - my_col_id = cur_index_part.field->field_index(); - ct_col_id = my_col_id - acc_gcol_num[my_col_id]; - uint32_t offset = cur_index_part.field->is_nullable() ? 1 : 0; - if ((offset == 1) && *(key->min_key->key + key_offset) == 1 && key->max_key->key == nullptr) { - // select * from table where col is not null - col_product = (double)1 - calc_equal_null_density(cbo_stats, ct_col_id); - } else if ((offset == 1) && *(key->min_key->key + key_offset) == 1 && *(key->max_key->key + key_offset) == 1) { - // select * from table where col is null - col_product = calc_equal_null_density(cbo_stats, ct_col_id); - } else { - col_product = calc_density_by_cond(cbo_stats, cur_index_part, key, key_offset, ct_col_id, table.field[my_col_id]->charset()); - } - col_product = eval_density_result(col_product); - key_offset += (offset + cur_index_part.field->key_length()); - density = density * col_product; + double col_selectivity = 0.0; + if ((valid_map & ((uint64_t)1 << idx_col_num)) == 0) { + break; + } + + KEY_PART_INFO cur_index_part = cur_index.key_part[idx_col_num]; + if (cur_index_part.field->is_virtual_gcol()) { + continue; } + + mysql_col_id = cur_index_part.field->field_index(); + ctc_col_id = mysql_col_id - acc_gcol_num[mysql_col_id]; + + uint32_t null_offset = cur_index_part.field->is_nullable() ? 1 : 0; + uint32_t varchar_offset = cur_index_part.field->real_type() == MYSQL_TYPE_VARCHAR ? OFFSET_VARCHAR_TYPE : 0; + + set_key_boundary_types(key, idx_col_num, min_key_cmp_type, max_key_cmp_type); + col_selectivity = calculate_column_selectivity(cbo_stats, key, &cur_index_part, ctc_col_id, + key_offset, null_offset, table.field[mysql_col_id]->charset()); + col_selectivity = eval_density_result(col_selectivity); + + key_offset += (varchar_offset + null_offset + cur_index_part.field->key_length()); + density = density * col_selectivity; } /* diff --git a/storage/ctc/ctc_cbo.h b/storage/ctc/ctc_cbo.h index fea6ee7..1cccb4d 100644 --- a/storage/ctc/ctc_cbo.h +++ b/storage/ctc/ctc_cbo.h @@ -74,4 +74,13 @@ double calc_density_one_table(uint16_t idx_id, ctc_range_key *key, ctc_cbo_stats_table_t *cbo_stats, const TABLE &table); void calc_accumulate_gcol_num(uint num_fields, Field** field, uint32_t *acc_gcol_num); void ctc_index_stats_update(TABLE *table, ctc_cbo_stats_t *cbo_stats); -#endif \ No newline at end of file +double calc_density_by_cond(ctc_cbo_stats_table_t *cbo_stats, KEY_PART_INFO cur_index_part, ctc_range_key *key, + uint32_t key_offset, uint32_t col_id, const CHARSET_INFO *cs); +void set_key_boundary_types(ctc_range_key *key, uint32_t id, ctc_cmp_type_t min_key_cmp_type, ctc_cmp_type_t max_key_cmp_type); +double calculate_column_selectivity(ctc_cbo_stats_table_t *cbo_stats, ctc_range_key* key, KEY_PART_INFO *cur_index_part, + uint32_t ctc_col_id, uint32_t key_offset, bool is_nullable, const CHARSET_INFO *cs); +void extract_boundary_value(ctc_key *rKey, KEY_PART_INFO *cur_index_part, cache_variant_t *ret_val, + cache_variant_t * value, uint32_t key_offset); +bool handle_null_and_single_boundary(ctc_key *rKey, Field *field, cache_variant_t *ret_val, + cache_variant_t * value, uint32_t key_offset); +#endif diff --git a/storage/ctc/ha_ctc.cc b/storage/ctc/ha_ctc.cc index 78c6c98..03702fc 100644 --- a/storage/ctc/ha_ctc.cc +++ b/storage/ctc/ha_ctc.cc @@ -4046,7 +4046,7 @@ ha_rows ha_ctc::records_in_range(uint inx, key_range *min_key, * we need this to make sure that our optimizer continue to work even when we * miscalculated the density, and it's still prefer index read */ - n_rows += m_share->cbo_stats->ctc_cbo_stats_table->estimate_rows * density; + n_rows = m_share->cbo_stats->ctc_cbo_stats_table->estimate_rows * density; } /* -- Gitee