revision-id: 998560e253d0bc1f251ca106f67b31f6684157df (mariadb-10.5.4-468-g998560e253d) parent(s): 6d1f1b61b59310027698a92ccf533a3093f1ce04 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-01-28 21:43:55 +0300 message: MDEV-9750: Quick memory exhaustion with 'extended_keys=on' ... (Variant #4, full patch) Do not produce SEL_ARG graphs that would yield huge numbers of ranges. Introduce a concept of SEL_ARG graph's "weight". If we are about to produce a graph whose "weight" exceeds the limit, remove the parts of SEL_ARG graph that represent the biggest key parts. Do so until the graph's is within the limit. Includes - debug code to verify SEL_ARG graph weight - A user-visible @@optimizer_max_sel_arg_weight to control the optimization - Logging the optimization into the optimizer trace. --- mysql-test/main/mysqld--help.result | 4 + mysql-test/main/range_notembedded.result | 179 ++++++++++++ mysql-test/main/range_notembedded.test | 66 +++++ .../sys_vars/r/sysvars_server_embedded.result | 10 + .../sys_vars/r/sysvars_server_notembedded.result | 10 + sql/opt_range.cc | 302 ++++++++++++++++++++- sql/opt_range.h | 63 ++++- sql/sql_class.h | 1 + sql/sys_vars.cc | 6 + 9 files changed, 634 insertions(+), 7 deletions(-) diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 7671dfeef59..141ef94ad59 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -681,6 +681,9 @@ The following specify which files/extra groups are read (specified before remain max_connections*5 or max_connections + table_cache*2 (whichever is larger) number of file descriptors (Automatically configured unless set explicitly) + --optimizer-max-sel-arg-weight=# + The maximum weight of the SEL_ARG graph. Set to 0 for no + limit --optimizer-prune-level=# Controls the heuristic(s) applied during query optimization to prune less-promising partial plans from @@ -1637,6 +1640,7 @@ old-alter-table DEFAULT old-mode old-passwords FALSE old-style-user-limits FALSE +optimizer-max-sel-arg-weight 32000 optimizer-prune-level 1 optimizer-search-depth 62 optimizer-selectivity-sampling-limit 100 diff --git a/mysql-test/main/range_notembedded.result b/mysql-test/main/range_notembedded.result index 4973f449631..1f6db102624 100644 --- a/mysql-test/main/range_notembedded.result +++ b/mysql-test/main/range_notembedded.result @@ -35,3 +35,182 @@ json_detailed(JSON_EXTRACT(trace, '$**.ranges')) ] set optimizer_trace=@tmp_21958; drop table t2; +# +# MDEV-9750: Quick memory exhaustion with 'extended_keys=on'... +# +create table t1 ( +kp1 int, +kp2 int, +kp3 int, +kp4 int, +key key1(kp1, kp2, kp3,kp4) +); +insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +show variables like 'optimizer_max_sel_arg_weight'; +Variable_name Value +optimizer_max_sel_arg_weight 32000 +set @tmp_9750=@@optimizer_trace; +set optimizer_trace=1; +explain select * from t1 where +kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and +kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and +kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and +kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) +; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index +set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives')) +from information_schema.optimizer_trace); +# This will show 3-component ranges. +# The ranges were produced, but the optimizer has cut away kp4 +# to keep the number of ranges at manageable level: +select left(@json, 500); +left(@json, 500) +[ + + [ + + { + "index": "key1", + "ranges": + [ + "(1,1,1) <= (kp1,kp2,kp3) <= (1,1,1)", + "(1,1,2) <= (kp1,kp2,kp3) <= (1,1,2)", + "(1,1,3) <= (kp1,kp2,kp3) <= (1,1,3)", + "(1,1,4) <= (kp1,kp2,kp3) <= (1,1,4)", + "(1,1,5) <= (kp1,kp2,kp3) <= (1,1,5)", + "(1,1,6) <= (kp1,kp2,kp3) <= (1,1,6)", + "(1,1,7) <= (kp1,kp2,kp3) <= (1,1,7)", + " +## Repeat the above with low max_weight: +set @tmp9750_weight=@@optimizer_max_sel_arg_weight; +set optimizer_max_sel_arg_weight=20; +explain select * from t1 where +kp1 in (1,2,3,4,5,6,7,8,9,10) and +kp2 in (1,2,3,4,5,6,7,8,9,10) and +kp3 in (1,2,3,4,5,6,7,8,9,10) and +kp4 in (1,2,3,4,5,6,7,8,9,10) +; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index +set @trace= (select trace from information_schema.optimizer_trace); +set @json= json_detailed(json_extract(@trace, '$**.range_scan_alternatives')); +select left(@json, 500); +left(@json, 500) +[ + + [ + + { + "index": "key1", + "ranges": + [ + "(1) <= (kp1) <= (1)", + "(2) <= (kp1) <= (2)", + "(3) <= (kp1) <= (3)", + "(4) <= (kp1) <= (4)", + "(5) <= (kp1) <= (5)", + "(6) <= (kp1) <= (6)", + "(7) <= (kp1) <= (7)", + "(8) <= (kp1) <= (8)", + "(9) <= (kp1) <= (9)", + "(10) <= (kp1) <= (10)" + +set @json= json_detailed(json_extract(@trace, '$**.setup_range_conditions')); +select left(@json, 2500); +left(@json, 2500) +[ + + [ + + { + "sel_arg_weight_heuristic": + { + "key1_field": "kp1", + "key2_field": "kp2", + "key1_weight": 10, + "key2_weight": 10 + } + }, + + { + "sel_arg_weight_heuristic": + { + "key1_field": "kp1", + "key2_field": "kp3", + "key1_weight": 10, + "key2_weight": 10 + } + }, + + { + "sel_arg_weight_heuristic": + { + "key1_field": "kp1", + "key2_field": "kp4", + "key1_weight": 10, + "key2_weight": 10 + } + } + ] +] +## Repeat the above with a bit higher max_weight: +set @tmp9750_weight=@@optimizer_max_sel_arg_weight; +set optimizer_max_sel_arg_weight=120; +explain select * from t1 where +kp1 in (1,2,3,4,5,6,7,8,9,10) and +kp2 in (1,2,3,4,5,6,7,8,9,10) and +kp3 in (1,2,3,4,5,6,7,8,9,10) and +kp4 in (1,2,3,4,5,6,7,8,9,10) +; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index +set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives')) +from information_schema.optimizer_trace); +select left(@json, 1500); +left(@json, 1500) +[ + + [ + + { + "index": "key1", + "ranges": + [ + "(1,1) <= (kp1,kp2) <= (1,1)", + "(1,2) <= (kp1,kp2) <= (1,2)", + "(1,3) <= (kp1,kp2) <= (1,3)", + "(1,4) <= (kp1,kp2) <= (1,4)", + "(1,5) <= (kp1,kp2) <= (1,5)", + "(1,6) <= (kp1,kp2) <= (1,6)", + "(1,7) <= (kp1,kp2) <= (1,7)", + "(1,8) <= (kp1,kp2) <= (1,8)", + "(1,9) <= (kp1,kp2) <= (1,9)", + "(1,10) <= (kp1,kp2) <= (1,10)", + "(2,1) <= (kp1,kp2) <= (2,1)", + "(2,2) <= (kp1,kp2) <= (2,2)", + "(2,3) <= (kp1,kp2) <= (2,3)", + "(2,4) <= (kp1,kp2) <= (2,4)", + "(2,5) <= (kp1,kp2) <= (2,5)", + "(2,6) <= (kp1,kp2) <= (2,6)", + "(2,7) <= (kp1,kp2) <= (2,7)", + "(2,8) <= (kp1,kp2) <= (2,8)", + "(2,9) <= (kp1,kp2) <= (2,9)", + "(2,10) <= (kp1,kp2) <= (2,10)", + "(3,1) <= (kp1,kp2) <= (3,1)", + "(3,2) <= (kp1,kp2) <= (3,2)", + "(3,3) <= (kp1,kp2) <= (3,3)", + "(3,4) <= (kp1,kp2) <= (3,4)", + "(3,5) <= (kp1,kp2) <= (3,5)", + "(3,6) <= (kp1,kp2) <= (3,6)", + "(3,7) <= (kp1,kp2) <= (3,7)", + "(3,8) <= (kp1,kp2) <= (3,8)", + "(3,9) <= (kp1,kp2) <= (3,9)", + "(3,10) <= (kp1,kp2 +set optimizer_max_sel_arg_weight= @tmp9750_weight; +set optimizer_trace=@tmp_9750; +drop table t1; diff --git a/mysql-test/main/range_notembedded.test b/mysql-test/main/range_notembedded.test index 4dc49429ff1..d50bec23148 100644 --- a/mysql-test/main/range_notembedded.test +++ b/mysql-test/main/range_notembedded.test @@ -31,3 +31,69 @@ from information_schema.optimizer_trace; set optimizer_trace=@tmp_21958; drop table t2; +--echo # +--echo # MDEV-9750: Quick memory exhaustion with 'extended_keys=on'... +--echo # + +create table t1 ( + kp1 int, + kp2 int, + kp3 int, + kp4 int, + key key1(kp1, kp2, kp3,kp4) +); + +insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3); +analyze table t1; + +show variables like 'optimizer_max_sel_arg_weight'; + +# 20 * 20 * 20 *20 = 400*400 = 160,000 ranges +set @tmp_9750=@@optimizer_trace; +set optimizer_trace=1; +explain select * from t1 where + kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and + kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and + kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and + kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) +; + +set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives')) + from information_schema.optimizer_trace); +--echo # This will show 3-component ranges. +--echo # The ranges were produced, but the optimizer has cut away kp4 +--echo # to keep the number of ranges at manageable level: +select left(@json, 500); + +--echo ## Repeat the above with low max_weight: +set @tmp9750_weight=@@optimizer_max_sel_arg_weight; +set optimizer_max_sel_arg_weight=20; +explain select * from t1 where + kp1 in (1,2,3,4,5,6,7,8,9,10) and + kp2 in (1,2,3,4,5,6,7,8,9,10) and + kp3 in (1,2,3,4,5,6,7,8,9,10) and + kp4 in (1,2,3,4,5,6,7,8,9,10) +; +set @trace= (select trace from information_schema.optimizer_trace); +set @json= json_detailed(json_extract(@trace, '$**.range_scan_alternatives')); +select left(@json, 500); + +set @json= json_detailed(json_extract(@trace, '$**.setup_range_conditions')); +select left(@json, 2500); + +--echo ## Repeat the above with a bit higher max_weight: +set @tmp9750_weight=@@optimizer_max_sel_arg_weight; +set optimizer_max_sel_arg_weight=120; +explain select * from t1 where + kp1 in (1,2,3,4,5,6,7,8,9,10) and + kp2 in (1,2,3,4,5,6,7,8,9,10) and + kp3 in (1,2,3,4,5,6,7,8,9,10) and + kp4 in (1,2,3,4,5,6,7,8,9,10) +; +set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives')) + from information_schema.optimizer_trace); +select left(@json, 1500); + +set optimizer_max_sel_arg_weight= @tmp9750_weight; +set optimizer_trace=@tmp_9750; +drop table t1; diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 0648284e580..e0b4e3565bc 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -2233,6 +2233,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY YES COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_MAX_SEL_ARG_WEIGHT +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT The maximum weight of the SEL_ARG graph. Set to 0 for no limit +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_PRUNE_LEVEL VARIABLE_SCOPE SESSION VARIABLE_TYPE BIGINT UNSIGNED diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 78b8ca0263d..10773514295 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -2393,6 +2393,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY YES COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME OPTIMIZER_MAX_SEL_ARG_WEIGHT +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT The maximum weight of the SEL_ARG graph. Set to 0 for no limit +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_PRUNE_LEVEL VARIABLE_SCOPE SESSION VARIABLE_TYPE BIGINT UNSIGNED diff --git a/sql/opt_range.cc b/sql/opt_range.cc index f146fc25126..e04a1e2753f 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -398,6 +398,11 @@ static SEL_ARG *key_or(RANGE_OPT_PARAM *param, static SEL_ARG *key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag); +static SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *key1, SEL_ARG *key2); +static SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *key1, SEL_ARG *key2, + uint clone_flag); static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1); bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key, SEL_ARG *key_tree, uchar *min_key,uint min_key_flag, @@ -409,6 +414,13 @@ static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length); static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); +static +SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *sel_arg); +static +bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1, + SEL_ARG *key2); + #include "opt_range_mrr.cc" static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2, @@ -706,7 +718,8 @@ int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_ARG *key1= (*or_tree)->keys[key_no]; SEL_ARG *key2= tree->keys[key_no]; key2->incr_refs(); - if ((result->keys[key_no]= key_or(param, key1, key2))) + if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1, + key2))) { result_keys.set_bit(key_no); @@ -1872,9 +1885,13 @@ SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc() next_key_part=arg.next_key_part; max_part_no= arg.max_part_no; use_count=1; elements=1; + weight=1; next= 0; if (next_key_part) + { ++next_key_part->use_count; + weight += next_key_part->weight; + } } @@ -1891,7 +1908,7 @@ SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg, :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()), elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg), max_value((uchar*) max_value_arg), next(0),prev(0), - next_key_part(0), color(BLACK), type(KEY_RANGE) + next_key_part(0), color(BLACK), type(KEY_RANGE), weight(1) { left=right= &null_element; max_part_no= 1; @@ -1903,7 +1920,7 @@ SEL_ARG::SEL_ARG(Field *field_,uint8 part_, :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_), part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1), field(field_), min_value(min_value_), max_value(max_value_), - next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE) + next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE), weight(1) { max_part_no= part+1; left=right= &null_element; @@ -5447,7 +5464,7 @@ TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge, if ((*tree)->keys[key_idx]) (*tree)->keys[key_idx]->incr_refs(); if (((*changed_tree)->keys[key_idx]= - key_or(param, key, (*tree)->keys[key_idx]))) + key_or_with_limit(param, key_idx, key, (*tree)->keys[key_idx]))) (*changed_tree)->keys_map.set_bit(key_idx); *tree= NULL; removed_cnt++; @@ -9116,7 +9133,8 @@ int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2, key2->incr_refs(); } SEL_ARG *key; - if ((result->keys[key_no]= key =key_and(param, key1, key2, flag))) + if ((result->keys[key_no]= key= key_and_with_limit(param, key_no, + key1, key2, flag))) { if (key && key->type == SEL_ARG::IMPOSSIBLE) { @@ -9678,7 +9696,7 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) key1->incr_refs(); key2->incr_refs(); } - if ((result->keys[key_no]= key_or(param, key1, key2))) + if ((result->keys[key_no]= key_or_with_limit(param, key_no, key1, key2))) result->keys_map.set_bit(key_no); } result->type= tree1->type; @@ -9752,6 +9770,9 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, SEL_ARG *next; ulong use_count=key1->use_count; + if (sel_arg_and_weight_heuristic(param, key1, key2)) + return key1; + if (key1->elements != 1) { key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p? @@ -9764,6 +9785,8 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, key1->right= key1->left= &null_element; key1->next= key1->prev= 0; } + uint new_weight= 0; + for (next=key1->first(); next ; next=next->next) { if (next->next_key_part) @@ -9775,17 +9798,22 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, continue; } next->next_key_part=tmp; + new_weight += 1 + tmp->weight; if (use_count) next->increment_use_count(use_count); if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS) break; } else + { + new_weight += 1 + key2->weight; next->next_key_part=key2; + } } if (!key1) return &null_element; // Impossible ranges key1->use_count++; + key1->weight= new_weight; key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1); return key1; } @@ -9821,6 +9849,10 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) clone_flag=swap_clone_flag(clone_flag); } // key1->part < key2->part + + if (sel_arg_and_weight_heuristic(param, key1, key2)) + return key1; + key1->use_count--; if (key1->use_count > 0) if (!(key1= key1->clone_tree(param))) @@ -9851,6 +9883,9 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) { // Both are maybe key key1->next_key_part=key_and(param, key1->next_key_part, key2->next_key_part, clone_flag); + + key1->weight= 1 + (key1->next_key_part? key1->next_key_part->weight : 0); + if (key1->next_key_part && key1->next_key_part->type == SEL_ARG::IMPOSSIBLE) return key1; @@ -9901,6 +9936,9 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) if (!new_arg) return &null_element; // End of memory new_arg->next_key_part=next; + if (new_arg->next_key_part) + new_arg->weight += new_arg->next_key_part->weight; + if (!new_tree) { new_tree=new_arg; @@ -9939,6 +9977,72 @@ get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1) return 0; } +#ifndef DBUG_OFF +/* + Verify SEL_TREE's weight. + + Recompute the weight and compare +*/ +uint SEL_ARG::verify_weight() +{ + uint computed_weight= 0; + SEL_ARG *first_arg= first(); + + if (first_arg) + { + for (SEL_ARG *arg= first_arg; arg; arg= arg->next) + { + computed_weight++; + if (arg->next_key_part) + computed_weight+= arg->next_key_part->verify_weight(); + } + } + else + { + // first()=NULL means this is a special kind of SEL_ARG, e.g. + // SEL_ARG with type=MAYBE_KEY + computed_weight= 1; + if (next_key_part) + computed_weight += next_key_part->verify_weight(); + } + + if (computed_weight != weight) + { + sql_print_error("SEL_ARG weight mismatch: computed %u have %u\n", + computed_weight, weight); + DBUG_ASSERT(computed_weight == weight); // Fail an assertion + } + return computed_weight; +} +#endif + +static +SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *key1, SEL_ARG *key2) +{ + SEL_ARG *res= key_or(param, key1, key2); + res= enforce_sel_arg_weight_limit(param, keyno, res); +#ifndef DBUG_OFF + if (res) + res->verify_weight(); +#endif + return res; +} + + +static +SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *key1, SEL_ARG *key2, uint clone_flag) +{ + SEL_ARG *res= key_and(param, key1, key2, clone_flag); + res= enforce_sel_arg_weight_limit(param, keyno, res); +#ifndef DBUG_OFF + if (res) + res->verify_weight(); +#endif + return res; +} + /** Combine two range expression under a common OR. On a logical level, the @@ -10595,6 +10699,19 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) } key1->use_count++; + /* Re-compute the result tree's weight. */ + { + uint new_weight= 0; + const SEL_ARG *sl; + for (sl= key1->first(); sl ; sl= sl->next) + { + new_weight++; + if (sl->next_key_part) + new_weight += sl->next_key_part->weight; + } + key1->weight= new_weight; + } + key1->max_part_no= max_part_no; return key1; } @@ -10632,6 +10749,160 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b) } +/* + Compute the MAX(key part) in this SEL_ARG graph. +*/ +uint SEL_ARG::get_max_key_part() const +{ + const SEL_ARG *cur; + uint max_part= part; + for (cur= first(); cur ; cur=cur->next) + { + if (cur->next_key_part) + { + uint mp= cur->next_key_part->get_max_key_part(); + max_part= MY_MAX(part, mp); + } + } + return max_part; +} + + +/* + Remove the SEL_ARG graph elements which have part > max_part. + + @detail + Also update weight for the graph and any modified subgraphs. +*/ + +void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part) +{ + SEL_ARG *cur; + DBUG_ASSERT(max_part >= sel_arg->part); + + for (cur= sel_arg->first(); cur ; cur=cur->next) + { + if (cur->next_key_part) + { + if (cur->next_key_part->part > max_part) + { + // Remove cur->next_key_part. + sel_arg->weight -= cur->next_key_part->weight; + cur->next_key_part= NULL; + } + else + { + uint old_weight= cur->next_key_part->weight; + prune_sel_arg_graph(cur->next_key_part, max_part); + sel_arg->weight -= (old_weight - cur->next_key_part->weight); + } + } + } +} + + +/* + @brief + Make sure the passed SEL_ARG graph's weight is below SEL_ARG::MAX_WEIGHT, + by cutting off branches if necessary. + + @detail + @see declaration of SEL_ARG::weight for definition of weight. + + This function attempts to reduce the graph's weight by cutting off + SEL_ARG::next_key_part connections if necessary. + + We start with maximum used keypart and then remove one keypart after + another until the graph's weight is within the limit. + + @seealso + sel_arg_and_weight_heuristic(); + + @return + tree pointer The tree after processing, + NULL If it was not possible to reduce the weight of the tree below the + limit. +*/ + +SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno, + SEL_ARG *sel_arg) +{ + if (!sel_arg || sel_arg->type != SEL_ARG::KEY_RANGE || + !param->thd->variables.optimizer_max_sel_arg_weight) + return sel_arg; + + Field *field= sel_arg->field; + uint weight1= sel_arg->weight; + + while (1) + { + if (likely(sel_arg->weight <= param->thd->variables. + optimizer_max_sel_arg_weight)) + break; + + uint max_part= sel_arg->get_max_key_part(); + if (max_part == sel_arg->part) + { + /* + We don't return NULL right away as we want to have the information + about the changed tree in the optimizer trace. + */ + sel_arg= NULL; + break; + } + + max_part--; + prune_sel_arg_graph(sel_arg, max_part); + } + + uint weight2= sel_arg? sel_arg->weight : 0; + + if (weight2 != weight1) + { + Json_writer_object wrapper(param->thd); + Json_writer_object obj(param->thd, "enforce_sel_arg_weight_limit"); + if (param->using_real_indexes) + obj.add("index", param->table->key_info[param->real_keynr[keyno]].name); + else + obj.add("pseudo_index", field->field_name); + + obj.add("old_weight", (longlong)weight1); + obj.add("new_weight", (longlong)weight2); + } + return sel_arg; +} + + +/* + @detail + Do not combine the trees if their total weight is likely to exceed the + MAX_WEIGHT. + (It is possible that key1 has next_key_part that has empty overlap with + key2. In this case, the combined tree will have a smaller weight than we + predict. We assume this is rare.) +*/ + +static +bool sel_arg_and_weight_heuristic(RANGE_OPT_PARAM *param, SEL_ARG *key1, + SEL_ARG *key2) +{ + DBUG_ASSERT(key1->part < key2->part); + + ulong max_weight= param->thd->variables.optimizer_max_sel_arg_weight; + if (max_weight && key1->weight + key1->elements*key2->weight > max_weight) + { + Json_writer_object wrapper(param->thd); + Json_writer_object obj(param->thd, "sel_arg_weight_heuristic"); + obj.add("key1_field", key1->field->field_name); + obj.add("key2_field", key2->field->field_name); + obj.add("key1_weight", (longlong)key1->weight); + obj.add("key2_weight", (longlong)key2->weight); + return true; // Discard key2 + } + return false; +} + + SEL_ARG * SEL_ARG::insert(SEL_ARG *key) { @@ -10670,6 +10941,13 @@ SEL_ARG::insert(SEL_ARG *key) SEL_ARG *root=rb_insert(key); // rebalance tree root->use_count=this->use_count; // copy root info root->elements= this->elements+1; + /* + The new weight is: + old root's weight + +1 for the weight of the added element + + next_key_part's weight of the added element + */ + root->weight = weight + 1 + (key->next_key_part? key->next_key_part->weight: 0); root->maybe_flag=this->maybe_flag; return root; } @@ -10727,6 +11005,17 @@ SEL_ARG::tree_delete(SEL_ARG *key) root=this; this->parent= 0; + /* + Compute the weight the tree will have after the element is removed. + We remove the element itself (weight=1) + and the sub-graph connected to its next_key_part. + */ + uint new_weight= root->weight - (1 + (key->next_key_part? + key->next_key_part->weight : 0)); + + DBUG_ASSERT(root->weight >= (1 + (key->next_key_part ? + key->next_key_part->weight : 0))); + /* Unlink from list */ if (key->prev) key->prev->next=key->next; @@ -10778,6 +11067,7 @@ SEL_ARG::tree_delete(SEL_ARG *key) test_rb_tree(root,root->parent); root->use_count=this->use_count; // Fix root counters + root->weight= new_weight; root->elements=this->elements-1; root->maybe_flag=this->maybe_flag; DBUG_RETURN(root); diff --git a/sql/opt_range.h b/sql/opt_range.h index 664e821f8d1..50cd43c0e85 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -223,6 +223,50 @@ class RANGE_OPT_PARAM; We avoid consuming too much memory by setting a limit on the number of SEL_ARG object we can construct during one range analysis invocation. + + 5. SEL_ARG GRAPH WEIGHT + + A SEL_ARG graph has a property we call weight, and we define it as follows: + + <definition> + If the SEL_ARG graph does not have any node with multiple incoming + next_key_part edges, then its weight is the number of SEL_ARG objects used. + + If there is a node with multiple incoming next_key_part edges, clone that + node, (and the nodes connected to it via prev/next links) and redirect one + of the incoming next_key_part edges to the clone. + + Continue with cloning until we get a graph that has no nodes with multiple + incoming next_key_part edges. Then, the number of SEL_ARG objects in the + graph is the weight of the original graph. + </definition> + + Example: + + kp1 $ kp2 $ kp3 + $ $ + | +-------+ $ $ + \->| kp1=2 |--$--------------$-+ + +-------+ $ $ | +--------+ + | $ $ ==>| kp3=11 | + +-------+ $ $ | +--------+ + | kp1>3 |--$--------------$-+ | + +-------+ $ $ +--------+ + $ $ | kp3=14 | + $ $ +--------+ + $ $ | + $ $ +--------+ + $ $ | kp3=14 | + $ $ +--------+ + + Here, the weight is 2 + 2*3=8. + + The rationale behind using this definition of weight is: + - it has the same order-of-magnitude as the number of ranges that the + SEL_ARG graph is describing, + - it is a lot easier to compute than computing the number of ranges, + - it can be updated incrementally when performing AND/OR operations on + parts of the graph. */ class SEL_ARG :public Sql_alloc @@ -236,6 +280,9 @@ class SEL_ARG :public Sql_alloc /* The ordinal number the least significant component encountered in the ranges of the SEL_ARG tree (the first component has number 1) + + Note: this number is currently not precise, it is an upper bound. + @seealso SEL_ARG::get_max_key_part() */ uint16 max_part_no; /* @@ -263,6 +310,17 @@ class SEL_ARG :public Sql_alloc enum leaf_color { BLACK,RED } color; enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type; + /* + For R-B root nodes only: the graph weight, as defined above in the + SEL_ARG GRAPH WEIGHT section. + */ + uint weight; + enum { MAX_WEIGHT = 32000 }; +#ifndef DBUG_OFF + uint verify_weight(); +#endif + + /* See RANGE_OPT_PARAM::alloced_sel_args */ enum { MAX_SEL_ARGS = 16000 }; SEL_ARG() {} @@ -273,7 +331,7 @@ class SEL_ARG :public Sql_alloc SEL_ARG(enum Type type_arg) :min_flag(0), max_part_no(0) /* first key part means 1. 0 mean 'no parts'*/, elements(1),use_count(1),left(0),right(0), - next_key_part(0), color(BLACK), type(type_arg) + next_key_part(0), color(BLACK), type(type_arg), weight(1) {} /** returns true if a range predicate is equal. Use all_same() @@ -287,6 +345,9 @@ class SEL_ARG :public Sql_alloc return true; return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0; } + + uint get_max_key_part() const; + /** returns true if all the predicates in the keypart tree are equal */ diff --git a/sql/sql_class.h b/sql/sql_class.h index 79dcd47bbc2..eaa97c2778d 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -815,6 +815,7 @@ typedef struct system_variables uint column_compression_threshold; uint column_compression_zlib_level; uint in_subquery_conversion_threshold; + ulong optimizer_max_sel_arg_weight; ulonglong max_rowid_filter_size; vers_asof_timestamp_t vers_asof_timestamp; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 12b6ea96182..f56bd5d8875 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -6693,6 +6693,12 @@ static Sys_var_uint Sys_in_subquery_conversion_threshold( SESSION_VAR(in_subquery_conversion_threshold), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, UINT_MAX), DEFAULT(IN_SUBQUERY_CONVERSION_THRESHOLD), BLOCK_SIZE(1)); +static Sys_var_ulong Sys_optimizer_max_sel_arg_weight( + "optimizer_max_sel_arg_weight", + "The maximum weight of the SEL_ARG graph. Set to 0 for no limit", + SESSION_VAR(optimizer_max_sel_arg_weight), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, ULONG_MAX), DEFAULT(SEL_ARG::MAX_WEIGHT), BLOCK_SIZE(1)); + static Sys_var_enum Sys_secure_timestamp( "secure_timestamp", "Restricts direct setting of a session " "timestamp. Possible levels are: YES - timestamp cannot deviate from "