#At lp:maria based on revid:knielsen@knielsen-hq.org-20090522175325-xpwm83ilnhqoqjz0 2706 Sergey Petrunia 2009-06-03 MWL#17: Table elimination - First code. Elimination works for simple cases, passes the testsuite. - Known issues: = No elimination is done for aggregate functions. = EXPLAIN EXTENDED shows eliminated tables (I think it better not) = No benchmark yet = The code needs some polishing. added: mysql-test/r/table_elim.result mysql-test/t/table_elim.test modified: sql/sql_select.cc sql/sql_select.h sql/table.h per-file messages: mysql-test/r/table_elim.result MWL#17: Table elimination - Testcases mysql-test/t/table_elim.test MWL#17: Table elimination - Testcases sql/sql_select.cc MWL#17: Table elimination sql/sql_select.h MWL#17: Table elimination - Added JOIN_TAB::eliminated (is JOIN_TAB the best place to store this flag?) sql/table.h MWL#17: Table elimination - ADded NESTED_JOIN::n_tables. We need to have the number of real tables remaining in an outer join nest. === added file 'mysql-test/r/table_elim.result' --- a/mysql-test/r/table_elim.result 1970-01-01 00:00:00 +0000 +++ b/mysql-test/r/table_elim.result 2009-06-03 13:10:45 +0000 @@ -0,0 +1,56 @@ +drop table if exists t0, t1, t2, t3; +create table t1 (a int); +insert into t1 values (0),(1),(2),(3); +create table t0 as select * from t1; +create table t2 (a int primary key, b int) +as select a, a as b from t1 where a in (1,2); +create table t3 (a int primary key, b int) +as select a, a as b from t1 where a in (1,3); +# This will be eliminated: +explain select t1.a from t1 left join t2 on t2.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +select t1.a from t1 left join t2 on t2.a=t1.a; +a +0 +1 +2 +3 +# This will not be eliminated as t2.b is in in select list: +explain select * from t1 left join t2 on t2.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 +# This will not be eliminated as t2.b is in in order list: +explain select t1.a from t1 left join t2 on t2.a=t1.a order by t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 +# This will not be eliminated as t2.b is in group list: +explain select t1.a from t1 left join t2 on t2.a=t1.a group by t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 +# This will not be eliminated as t2.b is in the WHERE +explain select t1.a from t1 left join t2 on t2.a=t1.a where t2.b < 3 or t2.b is null; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 Using where +# Elimination of multiple tables: +explain select t1.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +# Elimination of multiple tables (2): +explain select t1.a from t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +# Elimination when done within an outer join nest: +explain +select t0.* +from +t0 left join (t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and +t3.a=t1.a) on t0.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t0 ALL NULL NULL NULL NULL 4 +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +drop table t0, t1, t2, t3; === added file 'mysql-test/t/table_elim.test' --- a/mysql-test/t/table_elim.test 1970-01-01 00:00:00 +0000 +++ b/mysql-test/t/table_elim.test 2009-06-03 13:10:45 +0000 @@ -0,0 +1,52 @@ +# +# Table elimination (MWL#17) tests +# +--disable_warnings +drop table if exists t0, t1, t2, t3; +--enable_warnings + +create table t1 (a int); +insert into t1 values (0),(1),(2),(3); +create table t0 as select * from t1; + +create table t2 (a int primary key, b int) + as select a, a as b from t1 where a in (1,2); + +create table t3 (a int primary key, b int) + as select a, a as b from t1 where a in (1,3); + +--echo # This will be eliminated: +explain select t1.a from t1 left join t2 on t2.a=t1.a; + +select t1.a from t1 left join t2 on t2.a=t1.a; + +--echo # This will not be eliminated as t2.b is in in select list: +explain select * from t1 left join t2 on t2.a=t1.a; + +--echo # This will not be eliminated as t2.b is in in order list: +explain select t1.a from t1 left join t2 on t2.a=t1.a order by t2.b; + +--echo # This will not be eliminated as t2.b is in group list: +explain select t1.a from t1 left join t2 on t2.a=t1.a group by t2.b; + +## TODO: Aggregate functions prevent table elimination ATM. + +--echo # This will not be eliminated as t2.b is in the WHERE +explain select t1.a from t1 left join t2 on t2.a=t1.a where t2.b < 3 or t2.b is null; + +--echo # Elimination of multiple tables: +explain select t1.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a; + +--echo # Elimination of multiple tables (2): +explain select t1.a from t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a; + +--echo # Elimination when done within an outer join nest: +explain +select t0.* +from + t0 left join (t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and + t3.a=t1.a) on t0.a=t1.a; + + +drop table t0, t1, t2, t3; + === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2009-05-19 09:28:05 +0000 +++ b/sql/sql_select.cc 2009-06-03 13:10:45 +0000 @@ -42,6 +42,11 @@ #define TMP_ENGINE_HTON myisam_hton #endif +#define FT_KEYPART (MAX_REF_PARTS+10) +/* Values in optimize */ +#define KEY_OPTIMIZE_EXISTS 1 +#define KEY_OPTIMIZE_REF_OR_NULL 2 + const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref", "MAYBE_REF","ALL","range","index","fulltext", "ref_or_null","unique_subquery","index_subquery", @@ -2468,6 +2473,304 @@ static ha_rows get_quick_record_count(TH DBUG_RETURN(HA_POS_ERROR); /* This shouldn't happend */ } + +bool has_eq_ref_access_candidate(TABLE *table, table_map can_refer_to_these) +{ + KEYUSE *keyuse= table->reginfo.join_tab->keyuse; + if (keyuse) + { + /* + walk through all of the KEYUSE elements and + - locate unique keys + - check if we have eq_ref access for them + TODO any other reqs? + loops are constructed like in best_access_path + */ + while (keyuse->table == table) + { + uint key= keyuse->key; + key_part_map bound_parts=0; + bool ft_key= test(keyuse->keypart == FT_KEYPART); + + do /* For each keypart and each way to read it */ + { + if (!(keyuse->used_tables & ~can_refer_to_these) && + !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)) + { + bound_parts |= keyuse->keypart_map; + } + keyuse++; + } while (keyuse->table && keyuse->key == key); + + KEY *keyinfo= table->key_info + key; + if (!ft_key && + ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) && + bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts)) + { + return TRUE; + } + } + } + return FALSE; +} + + +static void mark_table_as_eliminated(JOIN *join, TABLE *table, uint *const_tbl_count, + table_map *const_tables) +{ + JOIN_TAB *tab= table->reginfo.join_tab; + if (!(*const_tables & tab->table->map)) + { + DBUG_PRINT("info", ("Eliminated table %s", table->alias)); + tab->type= JT_CONST; + tab->eliminated= TRUE; + *const_tables |= table->map; + join->const_table_map|= table->map; + set_position(join, (*const_tbl_count)++, tab, (KEYUSE*)0); + } +} + + +/* + Now on to traversal. There can be a situation like this: + + FROM t1 + LEFT JOIN t2 ON cond(t1,t2) + LEFT JOIN t3 ON cond(..., possibly-t2) // <--(*) + LEFT JOIN t4 ON cond(..., possibly-t2) + + Besides that, simplify_joins() may have created back references, so when + we're e.g. looking at outer join (*) we need to look both forward and + backward to check if there are any references in preceding/following + outer joins' + + TODO would it create only following-sibling references or + preceding-sibling as well? + And if not, should we rely on that? + +*/ + +int +eliminate_tables_for_join_list(JOIN *join, List<TABLE_LIST> *join_list, + table_map used_tables_elsewhere, + uint *const_tbl_count, table_map *const_tables) +{ + List_iterator<TABLE_LIST> it(*join_list); + table_map used_tables_on_right[MAX_TABLES]; // todo change to alloca + table_map used_tables_on_left; + TABLE_LIST *tbl; + int i, n_tables; + int eliminated=0; + + /* Collect the reverse-bitmap-array */ + for (i=0; (tbl= it++); i++) + { + used_tables_on_right[i]= 0; + if (tbl->on_expr) + used_tables_on_right[i]= tbl->on_expr->used_tables(); + if (tbl->nested_join) + used_tables_on_right[i]= tbl->nested_join->used_tables; + } + n_tables= i; + + for (i= n_tables - 2; i > 0; i--) + used_tables_on_right[i] |= used_tables_on_right[i+1]; + + it.rewind(); + + /* Walk through tables and join nests and see if we can eliminate them */ + used_tables_on_left= 0; + i= 1; + while ((tbl= it++)) + { + table_map tables_used_outside= used_tables_on_left | + used_tables_on_right[i] | + used_tables_elsewhere; + table_map cur_tables; + + if (tbl->nested_join) + { + DBUG_ASSERT(tbl->on_expr); + /* + There can be cases where table removal is applicable for tables + within the outer join but not for the outer join itself. Ask to + remove the children first. + + TODO: NoHopelessEliminationAttempts: the below call can return + information about whether it would make any sense to try removing + this entire outer join nest. + */ + int eliminated_in_children= + eliminate_tables_for_join_list(join, &tbl->nested_join->join_list, + tables_used_outside, + const_tbl_count, const_tables); + tbl->nested_join->n_tables -=eliminated_in_children; + cur_tables= tbl->nested_join->used_tables; + if (!(cur_tables & tables_used_outside)) + { + /* + Check if all embedded tables together can produce at most one + record combination. This is true when + - each of them has one_match(outer-tables) property + (this is a stronger condition than all of them together having + this property but that's irrelevant here) + - there are no outer joins among them + (except for the case of outer join which has all inner tables + to be constant and is guaranteed to produce only one record. + that record will be null-complemented) + */ + bool one_match= TRUE; + List_iterator<TABLE_LIST> it2(tbl->nested_join->join_list); + TABLE_LIST *inner; + while ((inner= it2++)) + { + /* + Bail out if we see an outer join (TODO: handle the above + null-complemntated-rows-only case) + */ + if (inner->on_expr) + { + one_match= FALSE; + break; + } + + if (inner->table && // <-- to be removed after NoHopelessEliminationAttempts + !has_eq_ref_access_candidate(inner->table, + ~tbl->nested_join->used_tables)) + { + one_match= FALSE; + break; + } + } + if (one_match) + { + it2.rewind(); + while ((inner= it2++)) + { + mark_table_as_eliminated(join, inner->table, const_tbl_count, + const_tables); + } + eliminated += tbl->nested_join->join_list.elements; + //psergey-todo: do we need to do anything about removing the join + //nest? + } + else + { + eliminated += eliminated_in_children; + } + } + } + else if (tbl->on_expr) + { + cur_tables= tbl->on_expr->used_tables(); + /* Check and remove */ + if (!(tbl->table->map & tables_used_outside) && + has_eq_ref_access_candidate(tbl->table, (table_map)-1)) + { + mark_table_as_eliminated(join, tbl->table, const_tbl_count, + const_tables); + eliminated += 1; + } + } + + /* Update bitmap of tables we've seen on the left */ + i++; + used_tables_on_left |= cur_tables; + } + return eliminated; +} + + +/* + Perform table elimination based on outer join + + SELECT * FROM t1 LEFT JOIN + (t2 JOIN t3) ON t3.primary_key=t1.col AND + t4.primary_key= t2.col + + CRITERIA FOR REMOVING ONE OJ NEST + we can't rely on sole presense of eq_refs. Because if we do, we'll miss + things like this: + + SELECT * FROM flights LEFT JOIN + (pax as S1 JOIN pax as S2 ON S2.id=S1.spouse AND s1.id=s2.spouse) + + (no-polygamy schema/query but there can be many couples on the flight) + .. + + REMOVAL PROCESS + We can remove an inner side of an outer join if it there is a warranty + that it will produce not more than one record: + + ... t1 LEFT JOIN t2 ON (t2.unique_key = expr) ... + + For nested outer joins: + - The process naturally occurs bottom-up (in order to remove an + outer-join we need to analyze its contents) + - If we failed to remove an outer join nest, it makes no sense to + try removing its ancestors, as the + ot LEFT JOIN it ON cond + pair may possibly produce two records (one record via match and + another one as access-method record). + + Q: If we haven't removed an OUTER JOIN, does it make sense to attempt + removing its ancestors? + A: No as the innermost outer join will produce two records => no ancestor + outer join nest will be able to provide the max_fanout==1 guarantee. + + psergey-todo: . +*/ + +static void eliminate_tables(JOIN *join, uint *const_tbl_count, table_map *const_tables) +{ + Item *item; + table_map used_tables; + DBUG_ENTER("eliminate_tables"); + if (!join->outer_join) + DBUG_VOID_RETURN; + + /* Find the tables that are referred to from WHERE/HAVING */ + used_tables= (join->conds? join->conds->used_tables() : 0) | + (join->having? join->having->used_tables() : 0); + + /* Add tables referred to from the select list */ + List_iterator<Item> it(join->fields_list); + while ((item= it++)) + used_tables |= item->used_tables(); + + /* Add tables referred to from ORDER BY and GROUP BY lists */ + ORDER *all_lists[]= { join->order, join->group_list}; + for (int i=0; i < 2; i++) + { + for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next) + used_tables |= (*(cur_list->item))->used_tables(); + } + + THD* thd= join->thd; + if (join->select_lex == &thd->lex->select_lex) + { + /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */ + used_tables |= thd->table_map_for_update; + + /* Multi-table UPDATE: don't eliminate tables referred from SET statement */ + if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI) + { + List_iterator<Item> it2(thd->lex->value_list); + while ((item= it2++)) + used_tables |= item->used_tables(); + } + } + + if (((1 << join->tables) - 1) & ~used_tables) + { + /* There are some time tables that we probably could eliminate */ + eliminate_tables_for_join_list(join, join->join_list, used_tables, + const_tbl_count, const_tables); + } + DBUG_VOID_RETURN; +} + + /* This structure is used to collect info on potentially sargable predicates in order to check whether they become sargable after @@ -2823,6 +3126,10 @@ make_join_statistics(JOIN *join, TABLE_L } } + //psergey-todo: table elimination + eliminate_tables(join, &const_count, &found_const_table_map); + //:psergey-todo + /* Calc how many (possible) matched records in each table */ for (s=stat ; s < stat_end ; s++) @@ -2956,9 +3263,6 @@ typedef struct key_field_t { bool *cond_guard; /* See KEYUSE::cond_guard */ } KEY_FIELD; -/* Values in optimize */ -#define KEY_OPTIMIZE_EXISTS 1 -#define KEY_OPTIMIZE_REF_OR_NULL 2 /** Merge new key definitions to old ones, remove those not used in both. @@ -3563,7 +3867,6 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array } -#define FT_KEYPART (MAX_REF_PARTS+10) static void add_ft_keys(DYNAMIC_ARRAY *keyuse_array, @@ -6021,7 +6324,7 @@ make_outerjoin_info(JOIN *join) } if (!tab->first_inner) tab->first_inner= nested_join->first_nested; - if (++nested_join->counter < nested_join->join_list.elements) + if (++nested_join->counter < nested_join->n_tables) break; /* Table tab is the last inner table for nested join. */ nested_join->first_nested->last_inner= tab; @@ -8575,6 +8878,8 @@ simplify_joins(JOIN *join, List<TABLE_LI conds= simplify_joins(join, &nested_join->join_list, conds, top); used_tables= nested_join->used_tables; not_null_tables= nested_join->not_null_tables; + /* The following two might become unequal after table elimination: */ + nested_join->n_tables= nested_join->join_list.elements; } else { @@ -8733,7 +9038,7 @@ static uint build_bitmap_for_nested_join with anything) 2. we could run out bits in nested_join_map otherwise. */ - if (nested_join->join_list.elements != 1) + if (nested_join->n_tables != 1) { nested_join->nj_map= (nested_join_map) 1 << first_unused++; first_unused= build_bitmap_for_nested_joins(&nested_join->join_list, @@ -8894,7 +9199,7 @@ static bool check_interleaving_with_nj(J join->cur_embedding_map |= next_emb->nested_join->nj_map; } - if (next_emb->nested_join->join_list.elements != + if (next_emb->nested_join->n_tables != next_emb->nested_join->counter) break; @@ -8928,7 +9233,7 @@ static void restore_prev_nj_state(JOIN_T { if (!(--last_emb->nested_join->counter)) join->cur_embedding_map&= ~last_emb->nested_join->nj_map; - else if (last_emb->nested_join->join_list.elements-1 == + else if (last_emb->nested_join->n_tables-1 == last_emb->nested_join->counter) join->cur_embedding_map|= last_emb->nested_join->nj_map; else @@ -16202,6 +16507,14 @@ static void select_describe(JOIN *join, tmp3.length(0); quick_type= -1; + + //psergey-todo: + if (tab->eliminated) + { + used_tables|=table->map; + continue; + } + item_list.empty(); /* id */ item_list.push_back(new Item_uint((uint32) === modified file 'sql/sql_select.h' --- a/sql/sql_select.h 2009-04-25 10:05:32 +0000 +++ b/sql/sql_select.h 2009-06-03 13:10:45 +0000 @@ -210,6 +210,9 @@ typedef struct st_join_table { JOIN *join; /** Bitmap of nested joins this table is part of */ nested_join_map embedding_map; + + //psergey-todo: more justified place + bool eliminated; void cleanup(); inline bool is_using_loose_index_scan() === modified file 'sql/table.h' --- a/sql/table.h 2009-02-19 09:01:25 +0000 +++ b/sql/table.h 2009-06-03 13:10:45 +0000 @@ -1626,6 +1626,8 @@ typedef struct st_nested_join Before each use the counters are zeroed by reset_nj_counters. */ uint counter; + /* Tables left after elimination */ + uint n_tables; nested_join_map nj_map; /* Bit used to identify this nested join*/ } NESTED_JOIN;