[Commits] fb230256ae3: MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity
revision-id: fb230256ae3473e4b4191abfea645bd0faa58a82 (mariadb-10.1.37-47-gfb230256ae3) parent(s): 9ad1663f785d28a731ab8212ea62b8125282a27f author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2018-12-23 23:50:33 +0300 message: MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity Make the "ORDER BY ... LIMIT n" optimizer take into account condition selectivity data from EITS (not just from potential range accesses). (Commit into 10.1) --- mysql-test/r/order_by.result | 37 +++++++++++++++++++++++++++++++++++++ mysql-test/t/order_by.test | 37 +++++++++++++++++++++++++++++++++++++ sql/sql_select.cc | 22 ++++++++++++++++++++++ 3 files changed, 96 insertions(+) diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result index 4cd9aebdf49..0772322dde3 100644 --- a/mysql-test/r/order_by.result +++ b/mysql-test/r/order_by.result @@ -3207,3 +3207,40 @@ pk 2 3 DROP TABLE t1; +# +# MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity +# +create table t1(a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2(a int); +insert into t2 select A.a + B.a* 10 + C.a * 100 from t1 A, t1 B, t1 C; +create table t3(a int); +insert into t3 select A.a + 1000 *B.a from t2 A, t1 B; +create table t4 ( +a int, +b int, +c int, +filler1 char(255), +filler2 char(255), +key(a) +); +insert into t4 select a,a,a, a,a from t3; +set @tmp_h=@@histogram_size, @tmp_u=@@use_stat_tables, +@tmp_o=@@optimizer_use_condition_selectivity; +set histogram_size=100; +set use_stat_tables=preferably; +set optimizer_use_condition_selectivity=4; +analyze table t4 persistent for columns(b) indexes (); +Table Op Msg_type Msg_text +test.t4 analyze status Engine-independent statistics collected +test.t4 analyze status Table is already up to date +# rows must be around 1200, not 600: +explain extended +select * from t4 where b < 5000 order by a limit 600; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t4 index NULL a 5 NULL 1188 100.00 Using where +Warnings: +Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` AS `c`,`test`.`t4`.`filler1` AS `filler1`,`test`.`t4`.`filler2` AS `filler2` from `test`.`t4` where (`test`.`t4`.`b` < 5000) order by `test`.`t4`.`a` limit 600 +set histogram_size=@tmp_h, use_stat_tables=@tmp_u, +optimizer_use_condition_selectivity=@tmp_o; +drop table t1,t2,t3,t4; diff --git a/mysql-test/t/order_by.test b/mysql-test/t/order_by.test index 1ca258d1d48..2696fd2a6c2 100644 --- a/mysql-test/t/order_by.test +++ b/mysql-test/t/order_by.test @@ -2141,3 +2141,40 @@ INSERT INTO t1 VALUES (1),(2),(3); SELECT DISTINCT pk FROM t1 GROUP BY 'foo'; SELECT DISTINCT pk FROM t1; DROP TABLE t1; + +--echo # +--echo # MDEV-17761: Odd optimizer choice with ORDER BY LIMIT and condition selectivity +--echo # +create table t1(a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2(a int); +insert into t2 select A.a + B.a* 10 + C.a * 100 from t1 A, t1 B, t1 C; +create table t3(a int); +insert into t3 select A.a + 1000 *B.a from t2 A, t1 B; + +create table t4 ( + a int, + b int, + c int, + filler1 char(255), + filler2 char(255), + key(a) +); +insert into t4 select a,a,a, a,a from t3; + +set @tmp_h=@@histogram_size, @tmp_u=@@use_stat_tables, + @tmp_o=@@optimizer_use_condition_selectivity; +set histogram_size=100; +set use_stat_tables=preferably; +set optimizer_use_condition_selectivity=4; +analyze table t4 persistent for columns(b) indexes (); + +--echo # rows must be around 1200, not 600: +explain extended +select * from t4 where b < 5000 order by a limit 600; + +set histogram_size=@tmp_h, use_stat_tables=@tmp_u, + optimizer_use_condition_selectivity=@tmp_o; + +drop table t1,t2,t3,t4; + diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 62f40eeb99c..2682c06f0cf 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -25929,7 +25929,11 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, uint tablenr= tab - join->join_tab; read_time= join->best_positions[tablenr].read_time; for (uint i= tablenr+1; i < join->table_count; i++) + { fanout*= join->best_positions[i].records_read; // fanout is always >= 1 + // But selectivity is =< 1 : + fanout*= join->best_positions[i].cond_selectivity; + } } else read_time= table->file->scan_time(); @@ -26067,6 +26071,23 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, */ select_limit= (ha_rows) (select_limit < fanout ? 1 : select_limit/fanout); + + /* + refkey_rows_estimate is E(#rows) produced by the table access + strategy that was picked without regard to ORDER BY ... LIMIT. + + It will be used as the source of selectivity data. + Use table->cond_selectivity as a better estimate which includes + condition selectivity too. + */ + { + // we use MIN(...), because "Using LooseScan" queries have + // cond_selectivity=1 while refkey_rows_estimate has a better + // estimate. + refkey_rows_estimate= MY_MIN(refkey_rows_estimate, + table_records * table->cond_selectivity); + } + /* We assume that each of the tested indexes is not correlated with ref_key. Thus, to select first N records we have to scan @@ -26077,6 +26098,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, N/(refkey_rows_estimate/table_records) > table_records <=> N > refkey_rows_estimate. */ + if (select_limit > refkey_rows_estimate) select_limit= table_records; else
participants (1)
-
Sergei Petrunia