[Commits] c58811a: An attempt to fix MDEV-21633
revision-id: c58811a5b05a26428c8bdb8dfd984f97caa8f4c1 (mariadb-10.4.10-7-gc58811a) parent(s): 0308de94ee806c21b6776ecab73396da75282596 author: Igor Babaev committer: Igor Babaev timestamp: 2022-06-28 17:40:08 -0700 message: An attempt to fix MDEV-21633 --- mysql-test/main/group_min_max.result | 8 +- mysql-test/main/innodb_ext_key.result | 4 +- mysql-test/main/opt_trace.result | 188 +-------- mysql-test/main/opt_trace_index_merge.result | 5 - .../main/opt_trace_index_merge_innodb.result | 14 - mysql-test/main/opt_tvc.result | 4 +- mysql-test/main/rowid_filter_innodb.result | 118 +++--- mysql-test/main/selectivity.result | 2 +- mysql-test/main/selectivity.test | 72 ++++ mysql-test/main/subselect.result | 4 +- mysql-test/main/subselect_no_exists_to_in.result | 4 +- mysql-test/main/subselect_no_mat.result | 4 +- mysql-test/main/subselect_no_opts.result | 4 +- mysql-test/main/subselect_no_scache.result | 4 +- mysql-test/main/subselect_no_semijoin.result | 2 +- mysql-test/main/subselect_sj2.result | 4 +- mysql-test/main/subselect_sj2_jcl6.result | 4 +- mysql-test/main/subselect_sj2_mat.result | 4 +- sql/field.h | 2 + sql/multi_range_read.cc | 3 + sql/opt_range.cc | 315 ++++++++------- sql/opt_range.h | 3 +- sql/opt_range_mrr.cc | 1 + sql/sql_select.cc | 438 +++++++++------------ sql/sql_statistics.cc | 2 +- sql/structs.h | 1 + sql/table.h | 1 + 27 files changed, 527 insertions(+), 688 deletions(-) diff --git a/mysql-test/main/group_min_max.result b/mysql-test/main/group_min_max.result index a28cc41..b9b4a9b 100644 --- a/mysql-test/main/group_min_max.result +++ b/mysql-test/main/group_min_max.result @@ -2078,19 +2078,19 @@ id select_type table type possible_keys key key_len ref rows Extra explain extended select a1,a2,min(b),max(b) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 77 99.22 Using where; Using index +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 77 85.71 Using where; Using index Warnings: Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,min(`test`.`t1`.`b`) AS `min(b)`,max(`test`.`t1`.`b`) AS `max(b)` from `test`.`t1` where (`test`.`t1`.`a1` = 'b' or `test`.`t1`.`a1` = 'd' or `test`.`t1`.`a1` = 'a' or `test`.`t1`.`a1` = 'c') and `test`.`t1`.`a2` > 'a' and `test`.`t1`.`c` > 'a111' group by `test`.`t1`.`a1`,`test`.`t1`.`a2` explain extended select a1,a2,b,min(c),max(c) from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL idx_t1_0,idx_t1_1,idx_t1_2 NULL NULL NULL 128 45.12 Using where; Using temporary; Using filesort +1 SIMPLE t1 ALL idx_t1_0,idx_t1_1,idx_t1_2 NULL NULL NULL 128 74.41 Using where; Using temporary; Using filesort Warnings: Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,min(`test`.`t1`.`c`) AS `min(c)`,max(`test`.`t1`.`c`) AS `max(c)` from `test`.`t1` where (`test`.`t1`.`a1` = 'b' or `test`.`t1`.`a1` = 'd' or `test`.`t1`.`a1` = 'a' or `test`.`t1`.`a1` = 'c') and `test`.`t1`.`a2` > 'a' and `test`.`t1`.`d` > 'xy2' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b` explain extended select a1,a2,b,c from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (d > 'xy2') group by a1,a2,b,c; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL idx_t1_0,idx_t1_1,idx_t1_2 NULL NULL NULL 128 45.12 Using where; Using temporary; Using filesort +1 SIMPLE t1 ALL idx_t1_0,idx_t1_1,idx_t1_2 NULL NULL NULL 128 74.41 Using where; Using temporary; Using filesort Warnings: Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where (`test`.`t1`.`a1` = 'b' or `test`.`t1`.`a1` = 'd' or `test`.`t1`.`a1` = 'a' or `test`.`t1`.`a1` = 'c') and `test`.`t1`.`a2` > 'a' and `test`.`t1`.`d` > 'xy2' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`,`test`.`t1`.`c` explain select a1,a2,b,max(c),min(c) from t2 where (a2 = 'a') and (b = 'b') or (b < 'b') group by a1; @@ -2098,7 +2098,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 index NULL idx_t2_1 163 NULL 164 Using where; Using index explain extended select a1,a2,b from t1 where (a1 = 'b' or a1 = 'd' or a1 = 'a' or a1 = 'c') and (a2 > 'a') and (c > 'a111') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 77 99.22 Using where; Using index +1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 130 NULL 77 85.71 Using where; Using index Warnings: Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b` from `test`.`t1` where (`test`.`t1`.`a1` = 'b' or `test`.`t1`.`a1` = 'd' or `test`.`t1`.`a1` = 'a' or `test`.`t1`.`a1` = 'c') and `test`.`t1`.`a2` > 'a' and `test`.`t1`.`c` > 'a111' group by `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b` explain select a1,a2,min(b),c from t2 where (a2 = 'a') and (c = 'a111') group by a1; diff --git a/mysql-test/main/innodb_ext_key.result b/mysql-test/main/innodb_ext_key.result index e0fa557..a484a98 100644 --- a/mysql-test/main/innodb_ext_key.result +++ b/mysql-test/main/innodb_ext_key.result @@ -1186,7 +1186,7 @@ EXPLAIN "key_length": "3070", "used_key_parts": ["f2", "pk1"], "rows": 1, - "filtered": 50, + "filtered": 100, "index_condition": "t1.pk1 <= 5 and t1.pk2 <= 5 and t1.f2 = 'abc'", "attached_condition": "t1.f1 <= '3'" } @@ -1216,7 +1216,7 @@ EXPLAIN "key_length": "3011", "used_key_parts": ["pk1", "f2", "pk2"], "rows": 1, - "filtered": 50, + "filtered": 100, "index_condition": "t1.f2 <= 5 and t1.pk2 <= 5 and t1.pk1 = 'abc'", "attached_condition": "t1.f1 <= '3'" } diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 72cee2a..07bff70 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -104,16 +104,6 @@ select * from v1 { { "rows_estimation": [ { - "selectivity_for_indexes": [], - "selectivity_for_columns": [ - { - "column_name": "a", - "selectivity_from_histogram": 0.5 - } - ], - "cond_selectivity": 0.5 - }, - { "table": "t1", "table_scan": { "rows": 2, @@ -250,16 +240,6 @@ select * from (select * from t1 where t1.a=1)q { { "rows_estimation": [ { - "selectivity_for_indexes": [], - "selectivity_for_columns": [ - { - "column_name": "a", - "selectivity_from_histogram": 0.5 - } - ], - "cond_selectivity": 0.5 - }, - { "table": "t1", "table_scan": { "rows": 2, @@ -401,16 +381,6 @@ select * from v2 { { "rows_estimation": [ { - "selectivity_for_indexes": [], - "selectivity_for_columns": [ - { - "column_name": "a", - "selectivity_from_histogram": 0.5 - } - ], - "cond_selectivity": 0.5 - }, - { "table": "t1", "table_scan": { "rows": 2, @@ -1420,20 +1390,6 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a { "analyzing_index_merge_union": [] } } - }, - { - "selectivity_for_indexes": [], - "selectivity_for_columns": [ - { - "column_name": "b", - "selectivity_from_histogram": 0.2891 - }, - { - "column_name": "c", - "selectivity_from_histogram": 0.2891 - } - ], - "cond_selectivity": 0.0836 } ] }, @@ -1631,11 +1587,6 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id { "analyzing_index_merge_union": [] } } - }, - { - "selectivity_for_indexes": [], - "selectivity_for_columns": [], - "cond_selectivity": 1 } ] }, @@ -1822,11 +1773,6 @@ EXPLAIN SELECT * FROM t1 WHERE a = 20010104e0 GROUP BY id { "analyzing_index_merge_union": [] } } - }, - { - "selectivity_for_indexes": [], - "selectivity_for_columns": [], - "cond_selectivity": 1 } ] }, @@ -1933,7 +1879,7 @@ test.t1 analyze status OK set optimizer_trace='enabled=on'; explain select * from t1 where a=1 and b=2 order by c limit 1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range a_c,a_b a_c 5 NULL 180 Using where +1 SIMPLE t1 ref a_c,a_b a_b 10 const,const 21 Using where; Using filesort select * from INFORMATION_SCHEMA.OPTIMIZER_TRACE; QUERY TRACE MISSING_BYTES_BEYOND_MAX_MEM_SIZE INSUFFICIENT_PRIVILEGES explain select * from t1 where a=1 and b=2 order by c limit 1 { @@ -2075,25 +2021,6 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 { "chosen": true } } - }, - { - "selectivity_for_indexes": [ - { - "index_name": "a_b", - "selectivity_from_index": 0.021 - } - ], - "selectivity_for_columns": [ - { - "column_name": "a", - "selectivity_from_histogram": 0.1797 - }, - { - "column_name": "b", - "selectivity_from_histogram": 0.0156 - } - ], - "cond_selectivity": 0.021 } ] }, @@ -2166,19 +2093,19 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 { { "index": "a_a", "can_resolve_order": true, - "updated_limit": 47, - "index_scan_time": 47, + "updated_limit": 500, + "index_scan_time": 500, "usable": false, "cause": "cost" }, { "index": "a_c", "can_resolve_order": true, - "updated_limit": 47, - "range_scan_time": 4.324, - "index_scan_time": 4.324, - "records": 180, - "chosen": true + "updated_limit": 500, + "range_scan_time": 46, + "index_scan_time": 46, + "usable": false, + "cause": "cost" }, { "index": "a_b", @@ -2187,66 +2114,6 @@ explain select * from t1 where a=1 and b=2 order by c limit 1 { } ] } - }, - { - "table": "t1", - "range_analysis": { - "table_scan": { - "rows": 1000, - "cost": 2e308 - }, - "potential_range_indexes": [ - { - "index": "a_a", - "usable": false, - "cause": "not applicable" - }, - { - "index": "a_c", - "usable": true, - "key_parts": ["a", "c"] - }, - { - "index": "a_b", - "usable": false, - "cause": "not applicable" - } - ], - "setup_range_conditions": [], - "group_index_range": { - "chosen": false, - "cause": "no group by or distinct" - }, - "analyzing_range_alternatives": { - "range_scan_alternatives": [ - { - "index": "a_c", - "ranges": ["(1) <= (a) <= (1)"], - "rowid_ordered": false, - "using_mrr": false, - "index_only": false, - "rows": 180, - "cost": 231.72, - "chosen": true - } - ], - "analyzing_roworder_intersect": { - "cause": "too few roworder scans" - }, - "analyzing_index_merge_union": [] - }, - "chosen_range_access_summary": { - "range_access_plan": { - "type": "range_scan", - "index": "a_c", - "rows": 180, - "ranges": ["(1) <= (a) <= (1)"] - }, - "rows_for_plan": 180, - "cost_for_plan": 231.72, - "chosen": true - } - } } ] } @@ -3283,25 +3150,6 @@ explain select * from t1 where pk = 2 and a=5 and b=1 { "chosen": true } } - }, - { - "selectivity_for_indexes": [ - { - "index_name": "pk_a_b", - "selectivity_from_index": 0.1 - } - ], - "selectivity_for_columns": [ - { - "column_name": "a", - "selectivity_from_histogram": 0.1 - }, - { - "column_name": "b", - "selectivity_from_histogram": 0.1 - } - ], - "cond_selectivity": 0.1 } ] }, @@ -3860,16 +3708,6 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { } }, { - "selectivity_for_indexes": [ - { - "index_name": "a", - "selectivity_from_index": 0.3 - } - ], - "selectivity_for_columns": [], - "cond_selectivity": 0.3 - }, - { "table": "t1", "range_analysis": { "table_scan": { @@ -3923,16 +3761,6 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { "chosen": true } } - }, - { - "selectivity_for_indexes": [ - { - "index_name": "a", - "selectivity_from_index": 0.3 - } - ], - "selectivity_for_columns": [], - "cond_selectivity": 0.3 } ] }, diff --git a/mysql-test/main/opt_trace_index_merge.result b/mysql-test/main/opt_trace_index_merge.result index 7f1bd51..ed1a5a7 100644 --- a/mysql-test/main/opt_trace_index_merge.result +++ b/mysql-test/main/opt_trace_index_merge.result @@ -191,11 +191,6 @@ explain select * from t1 where a=1 or b=1 { "chosen": true } } - }, - { - "selectivity_for_indexes": [], - "selectivity_for_columns": [], - "cond_selectivity": 0.002 } ] }, diff --git a/mysql-test/main/opt_trace_index_merge_innodb.result b/mysql-test/main/opt_trace_index_merge_innodb.result index afcf0b0..952d828 100644 --- a/mysql-test/main/opt_trace_index_merge_innodb.result +++ b/mysql-test/main/opt_trace_index_merge_innodb.result @@ -171,20 +171,6 @@ explain select * from t1 where pk1 != 0 and key1 = 1 { "chosen": true } } - }, - { - "selectivity_for_indexes": [ - { - "index_name": "PRIMARY", - "selectivity_from_index": 1 - }, - { - "index_name": "key1", - "selectivity_from_index": 0.001 - } - ], - "selectivity_for_columns": [], - "cond_selectivity": 0.001 } ] }, diff --git a/mysql-test/main/opt_tvc.result b/mysql-test/main/opt_tvc.result index 5329a9f..7886a99 100644 --- a/mysql-test/main/opt_tvc.result +++ b/mysql-test/main/opt_tvc.result @@ -444,7 +444,7 @@ where b in (3,5) group by b ) as dr_table; id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY <derived2> ALL NULL NULL NULL NULL 12 100.00 +1 PRIMARY <derived2> ALL NULL NULL NULL NULL 6 100.00 2 DERIVED t1 ALL NULL NULL NULL NULL 6 100.00 Using temporary; Using filesort 2 DERIVED <subquery3> eq_ref distinct_key distinct_key 4 func 1 100.00 3 MATERIALIZED <derived4> ALL NULL NULL NULL NULL 2 100.00 @@ -464,7 +464,7 @@ as tvc_0 group by b ) as dr_table; id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY <derived2> ALL NULL NULL NULL NULL 12 100.00 +1 PRIMARY <derived2> ALL NULL NULL NULL NULL 6 100.00 2 DERIVED t1 ALL NULL NULL NULL NULL 6 100.00 Using temporary; Using filesort 2 DERIVED <subquery3> eq_ref distinct_key distinct_key 4 func 1 100.00 3 MATERIALIZED <derived4> ALL NULL NULL NULL NULL 2 100.00 diff --git a/mysql-test/main/rowid_filter_innodb.result b/mysql-test/main/rowid_filter_innodb.result index 390c783..9d7775f 100644 --- a/mysql-test/main/rowid_filter_innodb.result +++ b/mysql-test/main/rowid_filter_innodb.result @@ -561,8 +561,8 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND l_quantity > 45 AND o_totalprice between 180000 and 230000; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 144 Using where; Using index -1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where +1 SIMPLE lineitem range|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_shipdate|i_l_quantity 4|9 NULL 510 (10%) Using index condition; Using where; Using rowid filter +1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND @@ -573,20 +573,8 @@ EXPLAIN "query_block": { "select_id": 1, "table": { - "table_name": "orders", - "access_type": "range", - "possible_keys": ["PRIMARY", "i_o_totalprice"], - "key": "i_o_totalprice", - "key_length": "9", - "used_key_parts": ["o_totalprice"], - "rows": 144, - "filtered": 100, - "attached_condition": "orders.o_totalprice between 180000 and 230000", - "using_index": true - }, - "table": { "table_name": "lineitem", - "access_type": "ref", + "access_type": "range", "possible_keys": [ "PRIMARY", "i_l_shipdate", @@ -594,13 +582,33 @@ EXPLAIN "i_l_orderkey_quantity", "i_l_quantity" ], + "key": "i_l_shipdate", + "key_length": "4", + "used_key_parts": ["l_shipDATE"], + "rowid_filter": { + "range": { + "key": "i_l_quantity", + "used_key_parts": ["l_quantity"] + }, + "rows": 605, + "selectivity_pct": 10.075 + }, + "rows": 510, + "filtered": 10.075, + "index_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'", + "attached_condition": "lineitem.l_quantity > 45" + }, + "table": { + "table_name": "orders", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY", "i_o_totalprice"], "key": "PRIMARY", "key_length": "4", - "used_key_parts": ["l_orderkey"], - "ref": ["dbt3_s001.orders.o_orderkey"], - "rows": 4, - "filtered": 0.8557, - "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45" + "used_key_parts": ["o_orderkey"], + "ref": ["dbt3_s001.lineitem.l_orderkey"], + "rows": 1, + "filtered": 9.6, + "attached_condition": "orders.o_totalprice between 180000 and 230000" } } } @@ -610,8 +618,8 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND l_quantity > 45 AND o_totalprice between 180000 and 230000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 144 144.00 100.00 100.00 Using where; Using index -1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.62 0.86 1.68 Using where +1 SIMPLE lineitem range|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_shipdate|i_l_quantity 4|9 NULL 510 (10%) 60.00 (11%) 10.07 100.00 Using index condition; Using where; Using rowid filter +1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 9.60 26.67 Using where set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND @@ -624,24 +632,8 @@ ANALYZE "r_loops": 1, "r_total_time_ms": "REPLACED", "table": { - "table_name": "orders", - "access_type": "range", - "possible_keys": ["PRIMARY", "i_o_totalprice"], - "key": "i_o_totalprice", - "key_length": "9", - "used_key_parts": ["o_totalprice"], - "r_loops": 1, - "rows": 144, - "r_rows": 144, - "r_total_time_ms": "REPLACED", - "filtered": 100, - "r_filtered": 100, - "attached_condition": "orders.o_totalprice between 180000 and 230000", - "using_index": true - }, - "table": { "table_name": "lineitem", - "access_type": "ref", + "access_type": "range", "possible_keys": [ "PRIMARY", "i_l_shipdate", @@ -649,17 +641,45 @@ ANALYZE "i_l_orderkey_quantity", "i_l_quantity" ], + "key": "i_l_shipdate", + "key_length": "4", + "used_key_parts": ["l_shipDATE"], + "rowid_filter": { + "range": { + "key": "i_l_quantity", + "used_key_parts": ["l_quantity"] + }, + "rows": 605, + "selectivity_pct": 10.075, + "r_rows": 605, + "r_selectivity_pct": 11.765, + "r_buffer_size": "REPLACED", + "r_filling_time_ms": "REPLACED" + }, + "r_loops": 1, + "rows": 510, + "r_rows": 60, + "r_total_time_ms": "REPLACED", + "filtered": 10.075, + "r_filtered": 100, + "index_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'", + "attached_condition": "lineitem.l_quantity > 45" + }, + "table": { + "table_name": "orders", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY", "i_o_totalprice"], "key": "PRIMARY", "key_length": "4", - "used_key_parts": ["l_orderkey"], - "ref": ["dbt3_s001.orders.o_orderkey"], - "r_loops": 144, - "rows": 4, - "r_rows": 6.625, + "used_key_parts": ["o_orderkey"], + "ref": ["dbt3_s001.lineitem.l_orderkey"], + "r_loops": 60, + "rows": 1, + "r_rows": 1, "r_total_time_ms": "REPLACED", - "filtered": 0.8557, - "r_filtered": 1.6771, - "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45" + "filtered": 9.6, + "r_filtered": 26.667, + "attached_condition": "orders.o_totalprice between 180000 and 230000" } } } @@ -2105,7 +2125,7 @@ EXPLAIN } }, "rows": 1, - "filtered": 1.5873, + "filtered": 100, "attached_condition": "t1.f1 is null and t1.f2 is null and (t1.f2 between 'a' and 'z' or t1.f1 = 'a')" } } @@ -2132,7 +2152,7 @@ EXPLAIN } }, "rows": 1, - "filtered": 1.5873, + "filtered": 100, "attached_condition": "t1.f1 is null and t1.f2 is null and (t1.f2 between 'a' and 'z' or t1.f1 = 'a')" } } diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result index c548748..1ee927a 100644 --- a/mysql-test/main/selectivity.result +++ b/mysql-test/main/selectivity.result @@ -1790,7 +1790,7 @@ set optimizer_use_condition_selectivity=2; explain extended select t1.b,t2.a,t3.a,t3.b from t1,t2,t3 where t1.c = t2.a AND t1.d = t3.a and t1.a = 50 and t1.b <= 100; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 range a a 10 NULL 9 9.00 Using index condition; Using where +1 SIMPLE t1 range a a 10 NULL 9 10.00 Using index condition; Using where 1 SIMPLE t3 eq_ref PRIMARY PRIMARY 4 test.t1.d 1 100.00 1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.c 1 100.00 Using index Warnings: diff --git a/mysql-test/main/selectivity.test b/mysql-test/main/selectivity.test index 3cf4f32..48e4a21 100644 --- a/mysql-test/main/selectivity.test +++ b/mysql-test/main/selectivity.test @@ -1235,6 +1235,78 @@ set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; drop table t1,t2,t3; + +--echo # +--echo # MDEV-20743: selectivity of equality conditions +--echo # + +create table ta (a int); +insert into ta select seq from seq_1_to_1000 order by rand(3); +alter table ta add pk int primary key auto_increment; +create table tb (b int); +insert into tb select seq % 10 from seq_1_to_1000 order by rand(1009); +alter table tb add pk int primary key auto_increment; +create table tc (c int); +insert into tc select seq % 10 from seq_1_to_1000 order by rand(2003); +alter table tc add pk int primary key auto_increment; +create table td (d int); +insert into td select seq % 15 from seq_1_to_1000 order by rand(3019); +alter table td add pk int primary key auto_increment; +create table t1 (pk int primary key, a int, b int, c int, d int); +insert into t1 +select ta.pk, a, b, c, d from ta, tb, tc, td where + ta.pk=tb.pk and ta.pk=tc.pk and ta.pk=td.pk; + +create table te (e int); +insert into te select seq from seq_1_to_500 order by rand(4001); +alter table te add pk int primary key auto_increment; +create table tf (f int); +insert into tf select seq % 10 from seq_1_to_500 order by rand(4507); +alter table tf add pk int primary key auto_increment; +create table tg (g int); +insert into tg select seq % 20 from seq_1_to_500 order by rand(5003); +alter table tg add pk int primary key auto_increment; +create table t2 (pk int primary key, e int, f int, g int); +insert into t2 +select te.pk, e, f, g from te, tf, tg where + te.pk=tf.pk and te.pk=tg.pk; + +select count(distinct a) from t1; +select count(distinct b) from t1; +select count(distinct c) from t1; +select count(distinct d) from t1; +select count(distinct e) from t2; +select count(distinct f) from t2; +select count(distinct g) from t2; + +set @@use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; + +analyze table t1 persistent for all; + +select * from t1 where t1.b=t1.c; +explain extended +select * from t1 where t1.b=t1.c; + +select * from t1 where t1.b=t1.d; +explain extended +select * from t1 where t1.b=t1.d; + +select * from t1 where t1.b=t1.c and t1.c=t1.d; +explain extended +select * from t1 where t1.b=t1.c and t1.c=t1.d; + +create index idx on t1(b); + +select * from t1,t2 where t1.b=t1.c and t1.c=t1.d and t2.f=t2.g and t2.g=t1.b; +explain extended +select * from t1,t2 where t1.b=t1.c and t1.c=t1.d and t2.f=t2.g and t2.g=t1.b; + +set optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; + +drop table ta,tb,tc,td,t1; + --echo # End of 10.1 tests # diff --git a/mysql-test/main/subselect.result b/mysql-test/main/subselect.result index 349e7dc..224847f 100644 --- a/mysql-test/main/subselect.result +++ b/mysql-test/main/subselect.result @@ -1886,13 +1886,13 @@ id text explain extended select * from t1 where id not in (select id from t1 where id < 8); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 100.00 Using index; Using where +2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 58.33 Using index; Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`id` AS `id`,`test`.`t1`.`text` AS `text` from `test`.`t1` where !<expr_cache><`test`.`t1`.`id`>(<in_optimizer>(`test`.`t1`.`id`,<exists>(<primary_index_lookup>(<cache>(`test`.`t1`.`id`) in t1 on PRIMARY where `test`.`t1`.`id` < 8 and <cache>(`test`.`t1`.`id`) = `test`.`t1`.`id`)))) explain extended select * from t1 as tt where not exists (select id from t1 where id < 8 and (id = tt.id or id is null) having id is not null); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY tt ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 100.00 Using where; Using index +2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 58.33 Using where; Using index Warnings: Note 1276 Field or reference 'test.tt.id' of SELECT #2 was resolved in SELECT #1 Note 1003 /* select#1 */ select `test`.`tt`.`id` AS `id`,`test`.`tt`.`text` AS `text` from `test`.`t1` `tt` where !<in_optimizer>(1,<expr_cache><`test`.`tt`.`id`>(exists(/* select#2 */ select `test`.`t1`.`id` from `test`.`t1` where `test`.`t1`.`id` < 8 and `test`.`t1`.`id` = `test`.`tt`.`id` having `test`.`t1`.`id` is not null limit 1))) diff --git a/mysql-test/main/subselect_no_exists_to_in.result b/mysql-test/main/subselect_no_exists_to_in.result index 84c415d..b3ba535 100644 --- a/mysql-test/main/subselect_no_exists_to_in.result +++ b/mysql-test/main/subselect_no_exists_to_in.result @@ -1890,13 +1890,13 @@ id text explain extended select * from t1 where id not in (select id from t1 where id < 8); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 100.00 Using index; Using where +2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 58.33 Using index; Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`id` AS `id`,`test`.`t1`.`text` AS `text` from `test`.`t1` where !<expr_cache><`test`.`t1`.`id`>(<in_optimizer>(`test`.`t1`.`id`,<exists>(<primary_index_lookup>(<cache>(`test`.`t1`.`id`) in t1 on PRIMARY where `test`.`t1`.`id` < 8 and <cache>(`test`.`t1`.`id`) = `test`.`t1`.`id`)))) explain extended select * from t1 as tt where not exists (select id from t1 where id < 8 and (id = tt.id or id is null) having id is not null); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY tt ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 100.00 Using where; Using index +2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 58.33 Using where; Using index Warnings: Note 1276 Field or reference 'test.tt.id' of SELECT #2 was resolved in SELECT #1 Note 1003 /* select#1 */ select `test`.`tt`.`id` AS `id`,`test`.`tt`.`text` AS `text` from `test`.`t1` `tt` where !<expr_cache><`test`.`tt`.`id`>(exists(/* select#2 */ select `test`.`t1`.`id` from `test`.`t1` where `test`.`t1`.`id` < 8 and `test`.`t1`.`id` = `test`.`tt`.`id` having `test`.`t1`.`id` is not null limit 1)) diff --git a/mysql-test/main/subselect_no_mat.result b/mysql-test/main/subselect_no_mat.result index 93035e2..6c874bc 100644 --- a/mysql-test/main/subselect_no_mat.result +++ b/mysql-test/main/subselect_no_mat.result @@ -1893,13 +1893,13 @@ id text explain extended select * from t1 where id not in (select id from t1 where id < 8); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 100.00 Using index; Using where +2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 58.33 Using index; Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`id` AS `id`,`test`.`t1`.`text` AS `text` from `test`.`t1` where !<expr_cache><`test`.`t1`.`id`>(<in_optimizer>(`test`.`t1`.`id`,<exists>(<primary_index_lookup>(<cache>(`test`.`t1`.`id`) in t1 on PRIMARY where `test`.`t1`.`id` < 8 and <cache>(`test`.`t1`.`id`) = `test`.`t1`.`id`)))) explain extended select * from t1 as tt where not exists (select id from t1 where id < 8 and (id = tt.id or id is null) having id is not null); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY tt ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 100.00 Using where; Using index +2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 58.33 Using where; Using index Warnings: Note 1276 Field or reference 'test.tt.id' of SELECT #2 was resolved in SELECT #1 Note 1003 /* select#1 */ select `test`.`tt`.`id` AS `id`,`test`.`tt`.`text` AS `text` from `test`.`t1` `tt` where !<in_optimizer>(1,<expr_cache><`test`.`tt`.`id`>(exists(/* select#2 */ select `test`.`t1`.`id` from `test`.`t1` where `test`.`t1`.`id` < 8 and `test`.`t1`.`id` = `test`.`tt`.`id` having `test`.`t1`.`id` is not null limit 1))) diff --git a/mysql-test/main/subselect_no_opts.result b/mysql-test/main/subselect_no_opts.result index 09f664d..dbc4eb5 100644 --- a/mysql-test/main/subselect_no_opts.result +++ b/mysql-test/main/subselect_no_opts.result @@ -1889,13 +1889,13 @@ id text explain extended select * from t1 where id not in (select id from t1 where id < 8); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 100.00 Using index; Using where +2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 58.33 Using index; Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`id` AS `id`,`test`.`t1`.`text` AS `text` from `test`.`t1` where !<in_optimizer>(`test`.`t1`.`id`,<exists>(<primary_index_lookup>(<cache>(`test`.`t1`.`id`) in t1 on PRIMARY where `test`.`t1`.`id` < 8 and <cache>(`test`.`t1`.`id`) = `test`.`t1`.`id`))) explain extended select * from t1 as tt where not exists (select id from t1 where id < 8 and (id = tt.id or id is null) having id is not null); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY tt ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 100.00 Using where; Using index +2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 58.33 Using where; Using index Warnings: Note 1276 Field or reference 'test.tt.id' of SELECT #2 was resolved in SELECT #1 Note 1003 /* select#1 */ select `test`.`tt`.`id` AS `id`,`test`.`tt`.`text` AS `text` from `test`.`t1` `tt` where !<in_optimizer>(1,exists(/* select#2 */ select `test`.`t1`.`id` from `test`.`t1` where `test`.`t1`.`id` < 8 and `test`.`t1`.`id` = `test`.`tt`.`id` having `test`.`t1`.`id` is not null limit 1)) diff --git a/mysql-test/main/subselect_no_scache.result b/mysql-test/main/subselect_no_scache.result index 765bb15..7e37b88 100644 --- a/mysql-test/main/subselect_no_scache.result +++ b/mysql-test/main/subselect_no_scache.result @@ -1892,13 +1892,13 @@ id text explain extended select * from t1 where id not in (select id from t1 where id < 8); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 100.00 Using index; Using where +2 DEPENDENT SUBQUERY t1 unique_subquery PRIMARY PRIMARY 4 func 1 58.33 Using index; Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`id` AS `id`,`test`.`t1`.`text` AS `text` from `test`.`t1` where !<in_optimizer>(`test`.`t1`.`id`,<exists>(<primary_index_lookup>(<cache>(`test`.`t1`.`id`) in t1 on PRIMARY where `test`.`t1`.`id` < 8 and <cache>(`test`.`t1`.`id`) = `test`.`t1`.`id`))) explain extended select * from t1 as tt where not exists (select id from t1 where id < 8 and (id = tt.id or id is null) having id is not null); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY tt ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 100.00 Using where; Using index +2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 58.33 Using where; Using index Warnings: Note 1276 Field or reference 'test.tt.id' of SELECT #2 was resolved in SELECT #1 Note 1003 /* select#1 */ select `test`.`tt`.`id` AS `id`,`test`.`tt`.`text` AS `text` from `test`.`t1` `tt` where !<in_optimizer>(1,exists(/* select#2 */ select `test`.`t1`.`id` from `test`.`t1` where `test`.`t1`.`id` < 8 and `test`.`t1`.`id` = `test`.`tt`.`id` having `test`.`t1`.`id` is not null limit 1)) diff --git a/mysql-test/main/subselect_no_semijoin.result b/mysql-test/main/subselect_no_semijoin.result index 97d2f3b..77e6306 100644 --- a/mysql-test/main/subselect_no_semijoin.result +++ b/mysql-test/main/subselect_no_semijoin.result @@ -1895,7 +1895,7 @@ Note 1003 /* select#1 */ select `test`.`t1`.`id` AS `id`,`test`.`t1`.`text` AS ` explain extended select * from t1 as tt where not exists (select id from t1 where id < 8 and (id = tt.id or id is null) having id is not null); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY tt ALL NULL NULL NULL NULL 12 100.00 Using where -2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 100.00 Using where; Using index +2 DEPENDENT SUBQUERY t1 eq_ref PRIMARY PRIMARY 4 test.tt.id 1 58.33 Using where; Using index Warnings: Note 1276 Field or reference 'test.tt.id' of SELECT #2 was resolved in SELECT #1 Note 1003 /* select#1 */ select `test`.`tt`.`id` AS `id`,`test`.`tt`.`text` AS `text` from `test`.`t1` `tt` where !<in_optimizer>(1,<expr_cache><`test`.`tt`.`id`>(exists(/* select#2 */ select `test`.`t1`.`id` from `test`.`t1` where `test`.`t1`.`id` < 8 and `test`.`t1`.`id` = `test`.`tt`.`id` having `test`.`t1`.`id` is not null limit 1))) diff --git a/mysql-test/main/subselect_sj2.result b/mysql-test/main/subselect_sj2.result index 8045a81..31e94cc 100644 --- a/mysql-test/main/subselect_sj2.result +++ b/mysql-test/main/subselect_sj2.result @@ -1032,8 +1032,8 @@ alias2.b = alias1.a AND id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL a NULL NULL NULL 38 1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 8 func,func 1 -2 MATERIALIZED alias1 ALL a NULL NULL NULL 19 Using where -2 MATERIALIZED alias2 ref a a 4 test.alias1.a 1 Using where +2 MATERIALIZED alias2 ALL a NULL NULL NULL 19 Using where +2 MATERIALIZED alias1 ref a a 4 test.alias2.a 1 Using where SELECT * FROM t2 WHERE (a, a) IN (SELECT alias2.b, alias2.a FROM t1 AS alias1, t1 AS alias2 WHERE diff --git a/mysql-test/main/subselect_sj2_jcl6.result b/mysql-test/main/subselect_sj2_jcl6.result index 22310ab..3a8f4a6 100644 --- a/mysql-test/main/subselect_sj2_jcl6.result +++ b/mysql-test/main/subselect_sj2_jcl6.result @@ -1045,8 +1045,8 @@ alias2.b = alias1.a AND id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL a NULL NULL NULL 38 1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 8 func,func 1 -2 MATERIALIZED alias1 ALL a NULL NULL NULL 19 Using where -2 MATERIALIZED alias2 ref a a 4 test.alias1.a 1 Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan +2 MATERIALIZED alias2 ALL a NULL NULL NULL 19 Using where +2 MATERIALIZED alias1 ref a a 4 test.alias2.a 1 Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan SELECT * FROM t2 WHERE (a, a) IN (SELECT alias2.b, alias2.a FROM t1 AS alias1, t1 AS alias2 WHERE diff --git a/mysql-test/main/subselect_sj2_mat.result b/mysql-test/main/subselect_sj2_mat.result index 7ccaed5..bbdd1bd 100644 --- a/mysql-test/main/subselect_sj2_mat.result +++ b/mysql-test/main/subselect_sj2_mat.result @@ -1034,8 +1034,8 @@ alias2.b = alias1.a AND id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL a NULL NULL NULL 38 1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 8 func,func 1 -2 MATERIALIZED alias1 ALL a NULL NULL NULL 19 Using where -2 MATERIALIZED alias2 ref a a 4 test.alias1.a 1 Using where +2 MATERIALIZED alias2 ALL a NULL NULL NULL 19 Using where +2 MATERIALIZED alias1 ref a a 4 test.alias2.a 1 Using where SELECT * FROM t2 WHERE (a, a) IN (SELECT alias2.b, alias2.a FROM t1 AS alias1, t1 AS alias2 WHERE diff --git a/sql/field.h b/sql/field.h index f07eab3..0abd582 100644 --- a/sql/field.h +++ b/sql/field.h @@ -718,6 +718,8 @@ class Field: public Value_source */ double cond_selectivity; + double ndv; + /* The next field in the class of equal fields at the top AND level of the WHERE clause diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 8c6b0de..78ead1f0 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -198,6 +198,9 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, } total_rows += rows; total_touched_blocks+= new_touched_blocks; + KEY_PART_INFO *kp= table->key_info[keyno]->key_part + + table->curr_key_tree_part; + kp->quick_cnt+= rows; } if (total_rows != HA_POS_ERROR) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index c47da28..3202e34 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3218,17 +3218,6 @@ double records_in_column_ranges(PARAM *param, uint idx, on the rows of 'table' in the processed query. The calculated selectivity is assigned to the field table->cond_selectivity. - Selectivity is calculated as a product of selectivities imposed by: - - 1. possible range accesses. (if multiple range accesses use the same - restrictions on the same field, we make adjustments for that) - 2. Sargable conditions on fields for which we have column statistics (if - a field is used in a possible range access, we assume that selectivity - is already provided by the range access' estimates) - 3. Reading a few records from the table pages and checking the condition - selectivity (this is used for conditions like "column LIKE '%val%'" - where approaches #1 and #2 do not provide selectivity data). - NOTE Currently the selectivities of range conditions over different columns are considered independent. @@ -3238,7 +3227,8 @@ double records_in_column_ranges(PARAM *param, uint idx, TRUE otherwise */ -bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) +bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, + COND_EQUAL *cond_equal, Item **cond) { uint keynr; uint max_quick_key_parts= 0; @@ -3265,131 +3255,17 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) if (table->pos_in_table_list->schema_table) DBUG_RETURN(FALSE); - MY_BITMAP handled_columns; - my_bitmap_map* buf; - if (!(buf= (my_bitmap_map*)thd->alloc(table->s->column_bitmap_size))) - DBUG_RETURN(TRUE); - my_bitmap_init(&handled_columns, buf, table->s->fields, FALSE); - - /* - Calculate the selectivity of the range conditions supported by indexes. - - First, take into account possible range accesses. - range access estimates are the most precise, we prefer them to any other - estimate sources. - */ - - Json_writer_object trace_wrapper(thd); - Json_writer_array selectivity_for_indexes(thd, "selectivity_for_indexes"); - - for (keynr= 0; keynr < table->s->keys; keynr++) - { - if (table->quick_keys.is_set(keynr)) - set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]); - } - - /* - Walk through all indexes, indexes where range access uses more keyparts - go first. - */ - for (uint quick_key_parts= max_quick_key_parts; - quick_key_parts; quick_key_parts--) - { - for (keynr= 0; keynr < table->s->keys; keynr++) - { - if (table->quick_keys.is_set(keynr) && - table->quick_key_parts[keynr] == quick_key_parts) - { - uint i; - uint used_key_parts= table->quick_key_parts[keynr]; - double quick_cond_selectivity= table->quick_rows[keynr] / - table_records; - KEY *key_info= table->key_info + keynr; - KEY_PART_INFO* key_part= key_info->key_part; - /* - Suppose, there are range conditions on two keys - KEY1 (col1, col2) - KEY2 (col3, col2) - - we don't want to count selectivity of condition on col2 twice. - - First, find the longest key prefix that's made of columns whose - selectivity wasn't already accounted for. - */ - for (i= 0; i < used_key_parts; i++, key_part++) - { - if (bitmap_is_set(&handled_columns, key_part->fieldnr-1)) - break; - bitmap_set_bit(&handled_columns, key_part->fieldnr-1); - } - if (i) - { - double UNINIT_VAR(selectivity_mult); - - /* - There is at least 1-column prefix of columns whose selectivity has - not yet been accounted for. - */ - table->cond_selectivity*= quick_cond_selectivity; - Json_writer_object selectivity_for_index(thd); - selectivity_for_index.add("index_name", key_info->name) - .add("selectivity_from_index", - quick_cond_selectivity); - if (i != used_key_parts) - { - /* - Range access got us estimate for #used_key_parts. - We need estimate for #(i-1) key parts. - */ - double f1= key_info->actual_rec_per_key(i-1); - double f2= key_info->actual_rec_per_key(i); - if (f1 > 0 && f2 > 0) - selectivity_mult= f1 / f2; - else - { - /* - No statistics available, assume the selectivity is proportional - to the number of key parts. - (i=0 means 1 keypart, i=1 means 2 keyparts, so use i+1) - */ - selectivity_mult= ((double)(i+1)) / i; - } - table->cond_selectivity*= selectivity_mult; - selectivity_for_index.add("selectivity_multiplier", - selectivity_mult); - } - /* - We need to set selectivity for fields supported by indexes. - For single-component indexes and for some first components - of other indexes we do it here. For the remaining fields - we do it later in this function, in the same way as for the - fields not used in any indexes. - */ - if (i == 1) - { - uint fieldnr= key_info->key_part[0].fieldnr; - table->field[fieldnr-1]->cond_selectivity= quick_cond_selectivity; - if (i != used_key_parts) - table->field[fieldnr-1]->cond_selectivity*= selectivity_mult; - bitmap_clear_bit(used_fields, fieldnr-1); - } - } - } - } - } - selectivity_for_indexes.end(); - - /* - Second step: calculate the selectivity of the range conditions not - supported by any index and selectivity of the range condition - over the fields whose selectivity has not been set yet. - */ - Json_writer_array selectivity_for_columns(thd, "selectivity_for_columns"); + if (thd->variables.use_stat_tables == 0 || !table->stats_is_read) + bitmap_clear_all(used_fields); if (thd->variables.optimizer_use_condition_selectivity > 2 && - !bitmap_is_clear_all(used_fields) && - thd->variables.use_stat_tables > 0 && table->stats_is_read) + !bitmap_is_clear_all(used_fields)) { + /* + Calculate the selectivity of the range conditions not supported + by any index + */ + PARAM param; MEM_ROOT alloc; SEL_TREE *tree; @@ -3402,7 +3278,6 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) param.mem_root= &alloc; param.old_root= thd->mem_root; param.table= table; - param.remove_false_where_parts= true; if (create_key_parts_for_pseudo_indexes(¶m, used_fields)) goto free_alloc; @@ -3412,6 +3287,7 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) param.using_real_indexes= FALSE; param.real_keynr[0]= 0; param.alloced_sel_args= 0; + param.remove_false_where_parts= true; thd->no_errors=1; @@ -3443,14 +3319,10 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) SEL_ARG *key= tree->keys[idx]; if (key) { - Json_writer_object selectivity_for_column(thd); - selectivity_for_column.add("column_name", key->field->field_name); if (key->type == SEL_ARG::IMPOSSIBLE) { rows= 0; table->reginfo.impossible_range= 1; - selectivity_for_column.add("selectivity_from_histogram", rows); - selectivity_for_column.add("cause", "impossible range"); goto free_alloc; } else @@ -3459,8 +3331,6 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) if (rows != DBL_MAX) { key->field->cond_selectivity= rows/table_records; - selectivity_for_column.add("selectivity_from_histogram", - key->field->cond_selectivity); } } } @@ -3469,12 +3339,9 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) for (Field **field_ptr= table->field; *field_ptr; field_ptr++) { Field *table_field= *field_ptr; - if (bitmap_is_set(used_fields, table_field->field_index) && + if (bitmap_is_set(table->read_set, table_field->field_index) && table_field->cond_selectivity < 1.0) - { - if (!bitmap_is_set(&handled_columns, table_field->field_index)) - table->cond_selectivity*= table_field->cond_selectivity; - } + table->cond_selectivity*= table_field->cond_selectivity; } free_alloc: @@ -3483,7 +3350,104 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) free_root(&alloc, MYF(0)); } - selectivity_for_columns.end(); + + for (keynr= 0; keynr < table->s->keys; keynr++) + { + if (table->quick_keys.is_set(keynr)) + set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]); + } + + bool is_sel_added; + do + { + is_sel_added= false; + for (uint quick_key_parts= 1; + quick_key_parts <= max_quick_key_parts; + quick_key_parts++) + { + for (keynr= 0; keynr < table->s->keys; keynr++) + { + uint used_key_parts= table->quick_key_parts[keynr]; + if (!(table->quick_keys.is_set(keynr) && + used_key_parts == quick_key_parts)) + continue; + KEY *key_info= table->key_info + keynr; + KEY_PART_INFO* key_part= key_info->key_part; + if (bitmap_is_set(used_fields, key_part->fieldnr-1)) + continue; + Field *first_kp_field= table->field[key_part->fieldnr-1]; + double quick_cond_selectivity= table->quick_rows[keynr] / + table_records; + double selectivity_mult= 1.0; + uint i= 1; + for (key_part++; i < used_key_parts; i++, key_part++) + { + if (!bitmap_is_set(used_fields, key_part->fieldnr-1)) + break; + selectivity_mult/= + table->field[key_part->fieldnr-1]->cond_selectivity; + } + if (i != used_key_parts) + continue; + first_kp_field->cond_selectivity= quick_cond_selectivity * + selectivity_mult; + set_if_smaller(first_kp_field->cond_selectivity, 1.0); + table->cond_selectivity*= first_kp_field->cond_selectivity; + bitmap_set_bit(used_fields, first_kp_field->field_index); + is_sel_added= true; + } + } + } while (is_sel_added); + + for (uint quick_key_parts= 1; + quick_key_parts <= max_quick_key_parts; + quick_key_parts++) + { + for (keynr= 0; keynr < table->s->keys; keynr++) + { + uint used_key_parts= table->quick_key_parts[keynr]; + if (!(table->quick_keys.is_set(keynr) && + used_key_parts == quick_key_parts)) + continue; + KEY *key_info= table->key_info + keynr; + KEY_PART_INFO* key_part= key_info->key_part; + if (bitmap_is_set(used_fields, key_part->fieldnr-1)) + continue; + Field *first_kp_field= table->field[key_part->fieldnr-1]; + double quick_cond_selectivity= table->quick_rows[keynr] / + table_records; + key_part+= used_key_parts - 1; + double selectivity_mult= 1.0; + for (uint i= used_key_parts - 1; i; i--, key_part--) + { + if (!bitmap_is_set(used_fields, key_part->fieldnr-1)) + { + double field_selectivity; + double f1= key_info->actual_rec_per_key(i-1); + double f2= key_info->actual_rec_per_key(i); + if (f1 > 0 && f2 > 0) + field_selectivity= f2 / f1; + else + { + /* + No statistics available, assume the field selectivity + is equal to 1 + */ + field_selectivity= 1; + } + table->cond_selectivity*= + table->field[i]->cond_selectivity= field_selectivity; + bitmap_set_bit(used_fields, table->field[i]->field_index); + } + selectivity_mult/= table->field[key_part->fieldnr-1]->cond_selectivity; + } + first_kp_field->cond_selectivity= quick_cond_selectivity * + selectivity_mult; + set_if_smaller(first_kp_field->cond_selectivity, 1.0); + table->cond_selectivity*= first_kp_field->cond_selectivity; + bitmap_set_bit(used_fields, first_kp_field->field_index); + } + } if (quick && (quick->get_type() == QUICK_SELECT_I::QS_TYPE_ROR_UNION || quick->get_type() == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)) @@ -3491,8 +3455,6 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) table->cond_selectivity*= (quick->records/table_records); } - bitmap_union(used_fields, &handled_columns); - /* Check if we can improve selectivity estimates by using sampling */ ulong check_rows= MY_MIN(thd->variables.optimizer_selectivity_sampling_limit, @@ -3552,13 +3514,64 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) } } } - } /* This list and its elements put to mem_root so should not be freed */ table->cond_selectivity_sampling_explain= &dt->list; } } - trace_wrapper.add("cond_selectivity", table->cond_selectivity); + + /* + Take into account selectivity of the equalities between columns + of the table s and discount selectivities of range conditions + induced by these equalities + */ + + if (cond_equal && cond_equal->current_level.elements) + { + Item_equal *item_equal; + List_iterator_fast<Item_equal> it(cond_equal->current_level); + table_map table_bit= table->map; + while ((item_equal= it++)) + { + if (!(item_equal->used_tables() & table_bit)) + continue; + uint eq_cnt= 0; + double max_ndv= 0.0; + Item_equal_fields_iterator fi(*item_equal); + bool found= false; + while (fi++) + { + Field *fld= fi.get_curr_field(); + if (fld->table->map != table_bit) + continue; + set_if_bigger(max_ndv, fld->ndv); + if (!found) + found= true; + else + { + /* + The field fld is equal to some other field of the table whose range + condition selectivity has been already taken into accout. Discount + selectivit of the range condition for the field fld. + */ + table->cond_selectivity/= fld->cond_selectivity; + eq_cnt++; + } + } + if (eq_cnt) + { + /* + There are equality predicates over fields of the table s. Their + selectivity has to be taken into account. + */ + for ( ; eq_cnt; eq_cnt--) + table->cond_selectivity/= max_ndv; + } + } + } + + set_if_smaller(table->cond_selectivity,1.0); + DBUG_RETURN(FALSE); } diff --git a/sql/opt_range.h b/sql/opt_range.h index 73def7b..d49c6a3 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1729,7 +1729,8 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables, SORT_INFO* filesort, bool allow_null_cond, int *error); -bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond); +bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, + COND_EQUAL *cond_equl, Item **cond); bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param, uint limit); diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc index 4afa06a..75c12ef 100644 --- a/sql/opt_range_mrr.cc +++ b/sql/opt_range_mrr.cc @@ -340,6 +340,7 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) } seq->param->range_count++; seq->param->max_key_parts= MY_MAX(seq->param->max_key_parts, max_key_parts); + seq->param->table->curr_key_tree_part= key_tree->part; return 0; } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 896135d..e1c27cb 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -5337,6 +5337,16 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->table->cond_selectivity= 1.0; + /* Calculate expected number of distinct values for fields */ + double table_records= (double)table->stat_records(); + for (Field **field_ptr= s->table->field; *field_ptr; field_ptr++) + { + Field *table_field= *field_ptr; + if (bitmap_is_set(s->table->read_set, table_field->field_index)) + table_field->ndv= table_records / + get_column_avg_frequency(table_field); + } + /* Perform range analysis if there are keys it could use (1). Don't do range analysis for materialized subqueries (2). @@ -5384,7 +5394,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, if (!sargable_cond) sargable_cond= get_sargable_cond(join, s->table); if (join->thd->variables.optimizer_use_condition_selectivity > 1) - calculate_cond_selectivity_for_table(join->thd, s->table, + calculate_cond_selectivity_for_table(join->thd, s->table, join->cond_equal, sargable_cond); if (s->table->reginfo.impossible_range) { @@ -5433,7 +5443,14 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, add_table_scan_values_to_trace(thd, s); } else + { + Item **sargable_cond= NULL; + sargable_cond= get_sargable_cond(join, s->table); + if (join->thd->variables.optimizer_use_condition_selectivity > 1) + calculate_cond_selectivity_for_table(join->thd, s->table, join->cond_equal, + sargable_cond); add_table_scan_values_to_trace(thd, s); + } } } @@ -5494,13 +5511,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, if (!(join->select_options & SELECT_DESCRIBE) && unit->derived && unit->derived->is_materialized_derived()) { - /* - Calculate estimated number of rows for materialized derived - table/view. - */ - for (i= 0; i < join->table_count ; i++) - if (double rr= join->best_positions[i].records_read) - records= COST_MULT(records, rr); + records= join->join_record_count; ha_rows rows= records > HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records; set_if_smaller(rows, unit->select_limit_cnt); join->select_lex->increase_derived_records(rows); @@ -5872,7 +5883,9 @@ add_key_field(JOIN *join, if (is_const) { stat[0].const_keys.merge(possible_keys); - bitmap_set_bit(&field->table->cond_set, field->field_index); + if (possible_keys.is_clear_all() && + join->thd->variables.optimizer_use_condition_selectivity > 2) + bitmap_set_bit(&field->table->cond_set, field->field_index); } else if (!eq_func) { @@ -7109,7 +7122,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) inline double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint, - uint use_cond_selectivity) + uint use_cond_selectivity) { ha_rows records; double dbl_records; @@ -7117,9 +7130,14 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint, if (use_cond_selectivity > 1) { TABLE *table= s->table; - double sel= table->cond_selectivity; - double table_records= (double)table->stat_records(); - dbl_records= table_records * sel; + if (s->quick) + dbl_records= table->quick_condition_rows; + else + { + double sel= table->cond_selectivity; + double table_records= (double)table->stat_records(); + dbl_records= table_records * sel; + } return dbl_records; } @@ -8862,115 +8880,88 @@ double JOIN::get_examined_rows() */ static -double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, - table_map rem_tables, uint keyparts, - uint16 *ref_keyuse_steps) -{ +double table_multi_eq_cond_selectivity(JOIN *join, JOIN_TAB *s, + table_map rem_tables) +{ double sel= 1.0; +#if 1 +#else COND_EQUAL *cond_equal= join->cond_equal; - if (!cond_equal || !cond_equal->current_level.elements) return sel; - if (!s->keyuse) - return sel; - Item_equal *item_equal; List_iterator_fast<Item_equal> it(cond_equal->current_level); TABLE *table= s->table; table_map table_bit= table->map; - POSITION *pos= &join->positions[idx]; - + while ((item_equal= it++)) { - /* - Check whether we need to take into account the selectivity of - multiple equality item_equal. If this is the case multiply - the current value of sel by this selectivity - */ table_map used_tables= item_equal->used_tables(); if (!(used_tables & table_bit)) + { + /* Colums of table s are not used in this item_equal */ continue; + } if (item_equal->get_const()) + { + /* + Selectivity of the equalities from item_equal have been already + taken into account as selectivity of range condititions + */ continue; - - bool adjust_sel= FALSE; + } + + double max_ndv= 0.0; + bool used_for_ref= false; + uint n= 0; Item_equal_fields_iterator fi(*item_equal); - while((fi++) && !adjust_sel) + while (fi++) { Field *fld= fi.get_curr_field(); if (fld->table->map != table_bit) continue; - if (pos->key == 0) - adjust_sel= TRUE; + if (bitmap_is_set(&table->tmp_set, fld->field_index)) + used_for_ref= true; else { - uint i; - KEYUSE *keyuse= pos->key; - uint key= keyuse->key; - for (i= 0; i < keyparts; i++) - { - if (i > 0) - keyuse+= ref_keyuse_steps[i-1]; - uint fldno; - if (is_hash_join_key_no(key)) - fldno= keyuse->keypart; - else - fldno= table->key_info[key].key_part[i].fieldnr - 1; - if (fld->field_index == fldno) - break; - } - keyuse= pos->key; - - if (i == keyparts) - { - /* - Field fld is included in multiple equality item_equal - and is not a part of the ref key. - The selectivity of the multiple equality must be taken - into account unless one of the ref arguments is - equal to fld. - */ - adjust_sel= TRUE; - for (uint j= 0; j < keyparts && adjust_sel; j++) - { - if (j > 0) - keyuse+= ref_keyuse_steps[j-1]; - Item *ref_item= keyuse->val; - if (ref_item->real_item()->type() == Item::FIELD_ITEM) - { - Item_field *field_item= (Item_field *) (ref_item->real_item()); - if (item_equal->contains(field_item->field)) - adjust_sel= FALSE; - } - } - } + set_if_bigger(max_ndv, fld->ndv); + n++; } } - if (adjust_sel) + /* + n == number of fields of the multiple equality belonging to s that + are not accessed by ref in the currently evaluated partial plan + */ + if (!used_for_ref) + { + /* no field of the multiple equality belonging to s is accessed by ref */ + n--; + } + else { - /* - If ref == 0 and there are no fields in the multiple equality - item_equal that belong to the tables joined prior to s - then the selectivity of multiple equality will be set to 1.0. - */ - double eq_fld_sel= 1.0; fi.rewind(); - while ((fi++)) + bool found= false; + while(fi++) { - double curr_eq_fld_sel; - Field *fld= fi.get_curr_field(); - if (!(fld->table->map & ~(table_bit | rem_tables))) + Field *fld= fi.get_curr_field(); + if (fld->table->map & (rem_tables | table_bit)) continue; - curr_eq_fld_sel= get_column_avg_frequency(fld) / - fld->table->stat_records(); - if (curr_eq_fld_sel < 1.0) - set_if_bigger(eq_fld_sel, curr_eq_fld_sel); + found= true; + set_if_bigger(max_ndv, fld->ndv); } - sel*= eq_fld_sel; + if (!found) + n--; } - } - return sel; + if (max_ndv != 0.0) // max_ndv may be 0.0 if the table is empty + { + for ( ; n; n--) + sel/= max_ndv; + } + } +#endif + + return sel; } @@ -8983,15 +8974,6 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, @param rem_tables The bitmap of tables to be joined later @detail - Get selectivity of conditions that can be applied when joining this table - with previous tables. - - For quick selects and full table scans, selectivity of COND(this_table) - is accounted for in matching_candidates_in_table(). Here, we only count - selectivity of COND(this_table, previous_tables). - - For other access methods, we need to calculate selectivity of the whole - condition, "COND(this_table) AND COND(this_table, previous_tables)". @retval selectivity of the conditions imposed on the rows of s @@ -9001,105 +8983,34 @@ static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, table_map rem_tables) { - uint16 ref_keyuse_steps[MAX_REF_PARTS - 1]; - Field *field; TABLE *table= s->table; - MY_BITMAP *read_set= table->read_set; double sel= s->table->cond_selectivity; - POSITION *pos= &join->positions[idx]; + double table_records= table->stat_records(); uint keyparts= 0; uint found_part_ref_or_null= 0; + POSITION *pos= &join->positions[idx]; - if (pos->key != 0) + /* Discount the selectivity of the access method used to join table s */ + if (s->quick && s->quick->index != MAX_KEY && pos->key == 0) { /* - A ref access or hash join is used for this table. ref access is created - from - - tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ... - - and it will only return rows for which this condition is satisified. - Suppose, certain expr{i} is a constant. Since ref access only returns - rows that satisfy - - tbl.keypart{i}=const (*) - - then selectivity of this equality should not be counted in return value - of this function. This function uses the value of - - table->cond_selectivity=selectivity(COND(tbl)) (**) - - as a starting point. This value includes selectivity of equality (*). We - should somehow discount it. - - Looking at calculate_cond_selectivity_for_table(), one can see that that - the value is not necessarily a direct multiplicand in - table->cond_selectivity - - There are three possible ways to discount - 1. There is a potential range access on t.keypart{i}=const. - (an important special case: the used ref access has a const prefix for - which a range estimate is available) - - 2. The field has a histogram. field[x]->cond_selectivity has the data. - - 3. Use index stats on this index: - rec_per_key[key_part+1]/rec_per_key[key_part] - - (TODO: more details about the "t.key=othertable.col" case) - */ - KEYUSE *keyuse= pos->key; - KEYUSE *prev_ref_keyuse= keyuse; - uint key= keyuse->key; - bool used_range_selectivity= false; - - /* - Check if we have a prefix of key=const that matches a quick select. + A range access to the table s is used when joining s. + Discount the selectivity of the range from sel. */ - if (!is_hash_join_key_no(key)) + if (table_records > 0) { - key_part_map quick_key_map= (key_part_map(1) << table->quick_key_parts[key]) - 1; - if (table->quick_rows[key] && - !(quick_key_map & ~table->const_key_parts[key])) - { - /* - Ok, there is an equality for each of the key parts used by the - quick select. This means, quick select's estimate can be reused to - discount the selectivity of a prefix of a ref access. - */ - for (; quick_key_map & 1 ; quick_key_map>>= 1) - { - while (keyuse->table == table && keyuse->key == key && - keyuse->keypart == keyparts) - { - keyuse++; - } - keyparts++; - } - /* - Here we discount selectivity of the constant range CR. To calculate - this selectivity we use elements from the quick_rows[] array. - If we have indexes i1,...,ik with the same prefix compatible - with CR any of the estimate quick_rows[i1], ... quick_rows[ik] could - be used for this calculation but here we don't know which one was - actually used. So sel could be greater than 1 and we have to cap it. - However if sel becomes greater than 2 then with high probability - something went wrong. - */ - sel /= (double)table->quick_rows[key] / (double) table->stat_records(); - // MDEV-20595 FIXME: DBUG_ASSERT(sel > 0 && sel <= 2.0); - set_if_smaller(sel, 1.0); - used_range_selectivity= true; - } + if (sel < 1.0) + sel/= s->quick->records/table_records; + set_if_smaller(sel, 1.0); } - - /* - Go through the "keypart{N}=..." equalities and find those that were - already taken into account in table->cond_selectivity. - */ - keyuse= pos->key; - keyparts=0; - while (keyuse->table == table && keyuse->key == key) + } + else if (pos->key != 0) + { + /* A ref access or hash join is used to join table */ + bitmap_clear_all(&table->tmp_set); + KEYUSE *keyuse= pos->key; + uint key= keyuse->key; + do { if (!(keyuse->used_tables & (rem_tables | table->map))) { @@ -9116,97 +9027,102 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, !((keyuse->val->used_tables()) & ~pos->ref_depend_map) && !(found_part_ref_or_null & keyuse->optimize)) { - /* Found a KEYUSE object that will be used by ref access */ - keyparts++; found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ; } + keyparts++; } - if (keyparts > keyuse->keypart) + if (keyparts - 1 == keyuse->keypart) { - /* Ok this is the keyuse that will be used for ref access */ - if (!used_range_selectivity && keyuse->val->const_item()) - { - uint fldno; - if (is_hash_join_key_no(key)) - fldno= keyuse->keypart; - else - fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1; - - if (table->field[fldno]->cond_selectivity > 0) - { - sel /= table->field[fldno]->cond_selectivity; - // MDEV-20595 FIXME: DBUG_ASSERT(sel > 0 && sel <= 2.0); - set_if_smaller(sel, 1.0); + uint fldno; + if (is_hash_join_key_no(key)) + fldno= keyuse->keypart; + else + fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1; + if (keyuse->val->const_item()) + { + DBUG_ASSERT(!is_hash_join_key_no(key)); + if (table->field[fldno]->cond_selectivity < 1.0) + { + /* + The value of the field fld with number fldno of the table s + is calculated using a constant item: fld=c. + We need to discount the selectivity of this conditition. + Unfortunately this selectivity is not equal to the precomputed + table->field[fldno]->cond_selectivity unless fld=c is the only + condition imposed on fld. So we have to calculate the + selectivity of fld=c somehow and discount it from sel. + Currently we use rec_per_key values of index with number key + to calculate this selectivity. + */ + double discount_sel; + KEY *key_info= table->key_info + key; + uint keypart= keyuse->keypart; + if (keypart == 0) + { + double rec_per_key= key_info->actual_rec_per_key(0); + if (rec_per_key == 0) + rec_per_key= 1; + discount_sel= rec_per_key / table_records; + } + else + { + discount_sel= key_info->actual_rec_per_key(keypart) / + key_info->actual_rec_per_key(keypart - 1); + } + sel/= discount_sel; } - /* - TODO: we could do better here: - 1. cond_selectivity might be =1 (the default) because quick - select on some index prevented us from analyzing - histogram for this column. - 2. we could get an estimate through this? - rec_per_key[key_part-1] / rec_per_key[key_part] - */ } - if (keyparts > 1) + else if (keyuse->val->real_item()->type() == Item::FIELD_ITEM) { - ref_keyuse_steps[keyparts-2]= (uint16)(keyuse - prev_ref_keyuse); - prev_ref_keyuse= keyuse; + Field *start_field= table->field[fldno]; + bitmap_set_bit(&table->tmp_set, fldno); + if (start_field->next_equal_field) + { + Field *next_field; + bool need_discount= false; + Field *value_field= + ((Item_field *) (keyuse->val->real_item()))->field; + for (next_field= start_field->next_equal_field; + next_field != start_field; + next_field= next_field->next_equal_field) + { + if (next_field == value_field) + { + need_discount= true; + break; + } + } + /* + To join the table s ref access is used that employs + the equality + */ + if (need_discount) + { + next_field= start_field; + do + { + if (next_field->table == start_field->table) + sel/= next_field->cond_selectivity; + next_field= next_field->next_equal_field; + } while (next_field != start_field); + } + } } } } } keyuse++; - } + } while (keyuse->table == table && keyuse->key == key); } - else + else { - /* - The table is accessed with full table scan, or quick select. - Selectivity of COND(table) is already accounted for in - matching_candidates_in_table(). - */ sel= 1; } - /* - If the field f from the table is equal to a field from one the - earlier joined tables then the selectivity of the range conditions - over the field f must be discounted. - - We need to discount selectivity only if we're using ref-based - access method (and have sel!=1). - If we use ALL/range/index_merge, then sel==1, and no need to discount. - */ - if (pos->key != NULL) - { - for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) - { - if (!bitmap_is_set(read_set, field->field_index) || - !field->next_equal_field) - continue; - for (Field *next_field= field->next_equal_field; - next_field != field; - next_field= next_field->next_equal_field) - { - if (!(next_field->table->map & rem_tables) && next_field->table != table) - { - if (field->cond_selectivity > 0) - { - sel/= field->cond_selectivity; - // MDEV-20595 FIXME: DBUG_ASSERT(sel > 0 && sel <= 2.0); - set_if_smaller(sel, 1.0); - } - break; - } - } - } - } - - sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, - keyparts, ref_keyuse_steps); + sel*= table_multi_eq_cond_selectivity(join, s, rem_tables); - DBUG_ASSERT(0.0 < sel && sel <= 1.0); + DBUG_ASSERT(0.0 < sel && sel <= 1.0 + DBL_EPSILON); return sel; } diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index a94fb11..ebf5d2d 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3824,7 +3824,7 @@ double get_column_avg_frequency(Field * field) Column_statistics *col_stats= field->read_stats; - if (!col_stats) + if (!col_stats || col_stats->get_avg_frequency() == 0.0) res= (double)table->stat_records(); else res= col_stats->get_avg_frequency(); diff --git a/sql/structs.h b/sql/structs.h index 0c00aee..6064b66 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -72,6 +72,7 @@ typedef struct st_key_part_info { /* Info about a key part */ NOT necessarily the original Field */ uint offset; /* Offset in record (from 0) */ uint null_offset; /* Offset to null_bit in record */ + uint quick_cnt; /* Length of key part in bytes, excluding NULL flag and length bytes */ uint16 length; /* diff --git a/sql/table.h b/sql/table.h index 8297774..a7f48f9 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1244,6 +1244,7 @@ struct TABLE */ ha_rows quick_rows[MAX_KEY]; uint quick_key_parts[MAX_KEY]; + uint16 curr_quick_cnt; double quick_costs[MAX_KEY]; /*
participants (1)
-
IgorBabaev