diff --git a/storage/tianchi/tse_cbo.cc b/storage/tianchi/tse_cbo.cc index f96f8be075885a75bcc864cd5925e0063384f09e..7c8538dd35f46a84e79afd0af7790264197290b2 100644 --- a/storage/tianchi/tse_cbo.cc +++ b/storage/tianchi/tse_cbo.cc @@ -207,16 +207,35 @@ static double calc_frequency_hist_equal_density(tse_cbo_stats_column_t *col_stat en_tse_compare_type cmp_result; int64 result = 0; double density = col_stat->density; - tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; - for (uint32 i = 0; i < col_stat->hist_count; i++) { - cmp_result = compare(&hist_infos[i].ep_value, val, field_type, cs); - - if (cmp_result == EQUAL) { - result = (i == 0) ? hist_infos[i].ep_number : hist_infos[i].ep_number - hist_infos[i - 1].ep_number; + tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; + int32 lo = 0; + int32 hi = col_stat->hist_count - 1; + int32 leftmost = -1; + bool should_end_loop = false; + while (lo <= hi) { + int32 mid = (lo + hi) / 2; + cmp_result = compare(&hist_infos[mid].ep_value, val, field_type, cs); + + switch (cmp_result) { + case EQUAL: + leftmost = mid; + hi = mid - 1; + break; + case LESS: + lo = mid + 1; + break; + case GREAT: + hi = mid - 1; break; - } else if (cmp_result == GREAT) { + case UNCOMPARABLE: + should_end_loop = true; break; } + + if (should_end_loop) { break; } + } + if (leftmost != -1) { + result = (leftmost == 0) ? hist_infos[leftmost].ep_number : hist_infos[leftmost].ep_number - hist_infos[leftmost - 1].ep_number; } uint32 end_pos = col_stat->hist_count - 1; @@ -235,16 +254,62 @@ static double calc_balance_hist_equal_density(tse_cbo_stats_column_t *col_stat, uint32 popular_count = 0; en_tse_compare_type cmp_result; tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; - for (uint32 i = 0; i < col_stat->hist_count; i++) { - cmp_result = compare(&hist_infos[i].ep_value, val, field_type, cs); + int32 lo = 0; + int32 hi = col_stat->hist_count - 1; + int32 leftmost = -1; + int32 rightmost = -1; + bool should_end_loop = false; + while (lo <= hi) { + int32 mid = (lo + hi) / 2; + cmp_result = compare(&hist_infos[mid].ep_value, val, field_type, cs); + + switch (cmp_result) { + case EQUAL: + leftmost = mid; + hi = mid - 1; + break; + case LESS: + lo = mid + 1; + break; + case GREAT: + hi = mid - 1; + break; + case UNCOMPARABLE: + should_end_loop = true; + break; + } - if (cmp_result == EQUAL) { - // ep_number is different from oracle, when compress balance histogram, need to change this - popular_count++; - } else if (cmp_result == GREAT) { - break; + if (should_end_loop) { break; } + } + lo = 0; + hi = col_stat->hist_count - 1; + should_end_loop = false; + while (lo <= hi) { + int32 mid = (lo + hi) / 2; + cmp_result = compare(&hist_infos[mid].ep_value, val, field_type, cs); + + switch (cmp_result) { + case EQUAL: + rightmost = mid; + lo = mid + 1; + break; + case LESS: + lo = mid + 1; + break; + case GREAT: + hi = mid - 1; + break; + case UNCOMPARABLE: + should_end_loop = true; + break; } + + if (should_end_loop) { break; } } + if (leftmost != -1 && rightmost != -1) { + popular_count += rightmost - leftmost + 1; + } + if (popular_count > 1 && col_stat->num_buckets > 0) { return (double)popular_count / col_stat->num_buckets; } @@ -297,35 +362,102 @@ static double calc_hist_between_frequency(tse_cbo_stats_table_t *cbo_stats, fiel en_tse_compare_type cmp_result; // HISTOGRAM_FREQUNCEY - for (uint32 i = 0; i < hist_count; i++) { - - cmp_result = compare(&hist_infos[i].ep_value, stats_val.min_key_val, field_type, cs); - if ((stats_val.min_type == CMP_TYPE_CLOSE_INTERNAL && (cmp_result == GREAT || cmp_result == EQUAL)) - || (stats_val.min_type == CMP_TYPE_OPEN_INTERNAL && cmp_result == GREAT)) { - if (i > 0) { - low_nums = hist_infos[i - 1].ep_number; - } - low_nums = total_nums - low_nums; - break; + int32 lo = 0; + int32 hi = hist_count - 1; + int32 leftmost = -1; + bool should_end_loop = false; + while (lo <= hi) { + int32 mid = (lo + hi) / 2; + cmp_result = compare(&hist_infos[mid].ep_value, stats_val.min_key_val, field_type, cs); + + if (stats_val.min_type == CMP_TYPE_CLOSE_INTERNAL) { + switch (cmp_result) { + case EQUAL: + case GREAT: + leftmost = mid; + hi = mid - 1; + break; + case LESS: + lo = mid + 1; + break; + case UNCOMPARABLE: + should_end_loop = true; + break; + } + } else if (stats_val.min_type == CMP_TYPE_OPEN_INTERNAL) { + switch (cmp_result) { + case GREAT: + leftmost = mid; + hi = mid - 1; + break; + case EQUAL: + case LESS: + lo = mid + 1; + break; + case UNCOMPARABLE: + should_end_loop = true; + break; + } } - } - for (uint32 i = 0; i < hist_count; i++) { - - cmp_result = compare(&hist_infos[i].ep_value, stats_val.max_key_val, field_type, cs); - - if ((stats_val.max_type == CMP_TYPE_OPEN_INTERNAL && (cmp_result == GREAT || cmp_result == EQUAL)) - || (stats_val.max_type == CMP_TYPE_CLOSE_INTERNAL && cmp_result == GREAT)) { - high_nums = (i == 0) ? 0 : hist_infos[i - 1].ep_number; - break; + if (should_end_loop) { break; } + } + if (leftmost != -1) { + if (leftmost > 0) { + low_nums = hist_infos[leftmost - 1].ep_number; + } + low_nums = total_nums - low_nums; + } + + lo = 0; + hi = hist_count - 1; + leftmost = -1; + should_end_loop = false; + while (lo <= hi) { + int32 mid = (lo + hi) / 2; + cmp_result = compare(&hist_infos[mid].ep_value, stats_val.max_key_val, field_type, cs); + + if (stats_val.max_type == CMP_TYPE_OPEN_INTERNAL) { + switch (cmp_result) { + case EQUAL: + case GREAT: + leftmost = mid; + hi = mid - 1; + break; + case LESS: + lo = mid + 1; + break; + case UNCOMPARABLE: + should_end_loop = true; + break; + } + } else if (stats_val.max_type == CMP_TYPE_CLOSE_INTERNAL) { + switch (cmp_result) { + case GREAT: + leftmost = mid; + hi = mid - 1; + break; + case EQUAL: + case LESS: + lo = mid + 1; + break; + case UNCOMPARABLE: + should_end_loop = true; + break; + } } + + if (should_end_loop) { break; } + } + if (leftmost != -1) { + high_nums = (leftmost == 0) ? 0 : hist_infos[leftmost - 1].ep_number; } - if (total_nums > 0) { - return ((double)(low_nums + high_nums - total_nums) / total_nums) ; - } else { - return density; - } + if (total_nums > 0) { + return ((double)(low_nums + high_nums - total_nums) / total_nums) ; + } else { + return density; + } } @@ -424,33 +556,68 @@ static int calc_hist_range_boundary(field_stats_val stats_val, enum_field_types double *percent, const CHARSET_INFO *cs) { en_tse_compare_type cmp_result; - uint32 i, lo_pos, hi_pos; + uint32 lo_pos, hi_pos; uint32 hist_count = col_stat->hist_count; tse_cbo_column_hist_t *hist_infos = col_stat->column_hist; - + lo_pos = hi_pos = hist_count - 1; - - for (i = 0; i < hist_count; i++) { - cmp_result = compare(&hist_infos[i].ep_value, stats_val.min_key_val, field_type, cs); - if (cmp_result == GREAT) { - lo_pos = i; - break; + int32 lo = 0; + int32 hi = hist_count - 1; + int32 leftmost = -1; + bool should_end_loop = false; + while (lo <= hi) { + int32 mid = (lo + hi) / 2; + cmp_result = compare(&hist_infos[mid].ep_value, stats_val.min_key_val, field_type, cs); + + switch (cmp_result) { + case GREAT: + leftmost = mid; + hi = mid - 1; + break; + case EQUAL: + case LESS: + lo = mid + 1; + break; + case UNCOMPARABLE: + should_end_loop = true; + break; } - } + if (should_end_loop) { break; } + } + if (leftmost != -1) { lo_pos = leftmost; } + // calc the part of value below lo_pos - *percent += percent_in_bucket(col_stat, i, stats_val.min_key_val, field_type); - - for (i = lo_pos; i < hist_count; i++) { - cmp_result = compare(&hist_infos[i].ep_value, stats_val.max_key_val, field_type, cs); - if (cmp_result == GREAT || cmp_result == EQUAL) { - hi_pos = i; - break; + *percent += percent_in_bucket(col_stat, lo_pos, stats_val.min_key_val, field_type); + + lo = lo_pos; + hi = hist_count - 1; + leftmost = -1; + should_end_loop = false; + while (lo <= hi) { + int32 mid = (lo + hi) / 2; + cmp_result = compare(&hist_infos[mid].ep_value, stats_val.max_key_val, field_type, cs); + + switch (cmp_result) { + case EQUAL: + case GREAT: + leftmost = mid; + hi = mid - 1; + break; + case LESS: + lo = mid + 1; + break; + case UNCOMPARABLE: + should_end_loop = true; + break; } - } + if (should_end_loop) { break; } + } + if (leftmost != -1) { hi_pos = leftmost; } + // calc the part of value below hi_pos - *percent -= percent_in_bucket(col_stat, i, stats_val.max_key_val, field_type); + *percent -= percent_in_bucket(col_stat, hi_pos, stats_val.max_key_val, field_type); if (col_stat->num_buckets > 0) { *percent = *percent / col_stat->num_buckets; @@ -628,4 +795,4 @@ void tse_index_stats_update(TABLE *table, tianchi_cbo_stats_t *cbo_stats) sk.set_records_per_key(j, rec_per_key); } } -} \ No newline at end of file +}