[Maria-developers] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (psergey:2718)
#At lp:maria based on revid:psergey@askmonty.org-20090617052739-37i1r8lip0m4ft9r 2718 Sergey Petrunia 2009-06-22 MWL#17: Table elimination - Make elimination check to be able detect cases like t.primary_key_col1=othertbl.col AND t.primary_key_col2=func(t.primary_key_col1). These are needed to handle e.g. the case of func() being a correlated subquery that selects the latest value. - If we've removed a condition with subquery predicate, EXPLAIN [EXTENDED] won't show the subquery anymore modified: sql/item.cc sql/item.h sql/item_subselect.cc sql/item_subselect.h sql/item_sum.cc sql/sql_lex.cc sql/sql_lex.h sql/sql_select.cc sql/sql_select.h per-file messages: sql/item.cc MWL#17: Table elimination - Add tem_field::check_column_usage_processor(). it allows to check which key parts a condition depends on. sql/item.h MWL#17: Table elimination - Add tem_field::check_column_usage_processor(). it allows to check which key parts a condition depends on. sql/item_subselect.cc MWL#17: Table elimination - Item_subselect got 'eliminated' attribute. It is used only to determine if the subselect should be printed by EXPLAIN. - Item_subselect got List<Item> refers_to - a list of item in the current select that are referred to from within the subselect. - Added Item_*::check_column_usage_processor(). it allows to check which key parts a condition depends on. - Added a comment about possible problem in Item_subselect::walk sql/item_subselect.h MWL#17: Table elimination - Item_subselect got 'eliminated' attribute. It is used only to determine if the subselect should be printed by EXPLAIN. - Item_subselect got List<Item> refers_to - a list of item in the current select that are referred to from within the subselect. - Added Item_*::check_column_usage_processor(). it allows to check which key parts a condition depends on. sql/item_sum.cc MWL#17: Table elimination sql/sql_lex.cc MWL#17: Table elimination sql/sql_lex.h MWL#17: Table elimination sql/sql_select.cc MWL#17: Table elimination - Make elimination check to be able detect cases like t.primary_key_col1=othertbl.col AND t.primary_key_col2=func(t.primary_key_col1). These are needed to handle e.g. the case of func() being a correlated subquery that selects the latest value. - If we've removed a condition with subquery predicate, EXPLAIN [EXTENDED] won't show the subquery anymore sql/sql_select.h MWL#17: Table elimination === modified file 'sql/item.cc' --- a/sql/item.cc 2009-04-25 10:05:32 +0000 +++ b/sql/item.cc 2009-06-22 11:46:31 +0000 @@ -1915,6 +1915,30 @@ void Item_field::reset_field(Field *f) name= (char*) f->field_name; } +bool Item_field::check_column_usage_processor(uchar *arg) +{ + Field_processor_info* info=(Field_processor_info*)arg; + if (used_tables() & ~info->allowed_tables) + return FALSE; + + if (field->table == info->table) + { + if (!(field->part_of_key.is_set(info->keyno))) + return TRUE; + + KEY *key= &field->table->key_info[info->keyno]; + for (uint part= 0; part < key->key_parts; part++) + { + if (field->field_index == key->key_part[part].field->field_index) + { + info->needed_key_parts |= key_part_map(1) << part; + break; + } + } + } + return FALSE; +} + const char *Item_ident::full_name() const { char *tmp; @@ -3380,7 +3404,7 @@ static void mark_as_dependent(THD *thd, /* store pointer on SELECT_LEX from which item is dependent */ if (mark_item) mark_item->depended_from= last; - current->mark_as_dependent(last); + current->mark_as_dependent(last, resolved_item); if (thd->lex->describe & DESCRIBE_EXTENDED) { char warn_buff[MYSQL_ERRMSG_SIZE]; === modified file 'sql/item.h' --- a/sql/item.h 2009-06-09 21:11:33 +0000 +++ b/sql/item.h 2009-06-22 11:46:31 +0000 @@ -888,6 +888,8 @@ public: virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; } virtual bool is_expensive_processor(uchar *arg) { return 0; } virtual bool register_field_in_read_map(uchar *arg) { return 0; } + virtual bool check_column_usage_processor(uchar *arg) { return 0; } + virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; } /* Check if a partition function is allowed SYNOPSIS @@ -1012,6 +1014,14 @@ public: }; +typedef struct +{ + table_map allowed_tables; + TABLE *table; + uint keyno; + uint needed_key_parts; +} Field_processor_info; + class sp_head; @@ -1477,6 +1487,7 @@ public: bool find_item_in_field_list_processor(uchar *arg); bool register_field_in_read_map(uchar *arg); bool check_partition_func_processor(uchar *int_arg) {return FALSE;} + bool check_column_usage_processor(uchar *arg); void cleanup(); bool result_as_longlong() { === modified file 'sql/item_subselect.cc' --- a/sql/item_subselect.cc 2009-01-31 21:22:44 +0000 +++ b/sql/item_subselect.cc 2009-06-22 11:46:31 +0000 @@ -39,7 +39,7 @@ inline Item * and_items(Item* cond, Item Item_subselect::Item_subselect(): Item_result_field(), value_assigned(0), thd(0), substitution(0), engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0), - const_item_cache(1), engine_changed(0), changed(0), is_correlated(FALSE) + const_item_cache(1), in_fix_fields(0), engine_changed(0), changed(0), is_correlated(FALSE) { with_subselect= 1; reset(); @@ -151,10 +151,14 @@ bool Item_subselect::fix_fields(THD *thd DBUG_ASSERT(fixed == 0); engine->set_thd((thd= thd_param)); + if (!in_fix_fields) + refers_to.empty(); + eliminated= FALSE; if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res)) return TRUE; - + + in_fix_fields++; res= engine->prepare(); // all transformation is done (used by prepared statements) @@ -181,12 +185,14 @@ bool Item_subselect::fix_fields(THD *thd if (!(*ref)->fixed) ret= (*ref)->fix_fields(thd, ref); thd->where= save_where; + in_fix_fields--; return ret; } // Is it one field subselect? if (engine->cols() > max_columns) { my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + in_fix_fields--; return TRUE; } fix_length_and_dec(); @@ -203,11 +209,30 @@ bool Item_subselect::fix_fields(THD *thd fixed= 1; err: + in_fix_fields--; thd->where= save_where; return res; } +bool Item_subselect::check_column_usage_processor(uchar *arg) +{ + List_iterator<Item> it(refers_to); + Item *item; + while ((item= it++)) + { + if (item->walk(&Item::check_column_usage_processor,FALSE, arg)) + return TRUE; + } + return FALSE; +} + +bool Item_subselect::mark_as_eliminated_processor(uchar *arg) +{ + eliminated= TRUE; + return FALSE; +} + bool Item_subselect::walk(Item_processor processor, bool walk_subquery, uchar *argument) { @@ -225,6 +250,7 @@ bool Item_subselect::walk(Item_processor if (lex->having && (lex->having)->walk(processor, walk_subquery, argument)) return 1; + /* TODO: why doesn't this walk the OUTER JOINs' ON expressions */ while ((item=li++)) { === modified file 'sql/item_subselect.h' --- a/sql/item_subselect.h 2008-02-22 10:30:33 +0000 +++ b/sql/item_subselect.h 2009-06-22 11:46:31 +0000 @@ -52,8 +52,16 @@ protected: bool have_to_be_excluded; /* cache of constant state */ bool const_item_cache; - + public: + /* + References from inside the subquery to the select that this predicate is + in. References to parent selects not included. + */ + List<Item> refers_to; + int in_fix_fields; + bool eliminated; + /* changed engine indicator */ bool engine_changed; /* subquery is transformed */ @@ -126,6 +134,8 @@ public: virtual void reset_value_registration() {} enum_parsing_place place() { return parsing_place; } bool walk(Item_processor processor, bool walk_subquery, uchar *arg); + bool mark_as_eliminated_processor(uchar *arg); + bool check_column_usage_processor(uchar *arg); /** Get the SELECT_LEX structure associated with this Item. === modified file 'sql/item_sum.cc' --- a/sql/item_sum.cc 2009-06-09 21:11:33 +0000 +++ b/sql/item_sum.cc 2009-06-22 11:46:31 +0000 @@ -350,7 +350,7 @@ bool Item_sum::register_sum_func(THD *th sl= sl->master_unit()->outer_select() ) sl->master_unit()->item->with_sum_func= 1; } - thd->lex->current_select->mark_as_dependent(aggr_sel); + thd->lex->current_select->mark_as_dependent(aggr_sel, NULL); return FALSE; } === modified file 'sql/sql_lex.cc' --- a/sql/sql_lex.cc 2009-04-25 10:05:32 +0000 +++ b/sql/sql_lex.cc 2009-06-22 11:46:31 +0000 @@ -1778,7 +1778,7 @@ void st_select_lex_unit::exclude_tree() 'last' should be reachable from this st_select_lex_node */ -void st_select_lex::mark_as_dependent(st_select_lex *last) +void st_select_lex::mark_as_dependent(st_select_lex *last, Item *dependency) { /* Mark all selects from resolved to 1 before select where was @@ -1804,6 +1804,8 @@ void st_select_lex::mark_as_dependent(st } is_correlated= TRUE; this->master_unit()->item->is_correlated= TRUE; + if (dependency) + this->master_unit()->item->refers_to.push_back(dependency); } bool st_select_lex_node::set_braces(bool value) { return 1; } === modified file 'sql/sql_lex.h' --- a/sql/sql_lex.h 2009-03-17 20:29:24 +0000 +++ b/sql/sql_lex.h 2009-06-22 11:46:31 +0000 @@ -743,7 +743,7 @@ public: return master_unit()->return_after_parsing(); } - void mark_as_dependent(st_select_lex *last); + void mark_as_dependent(st_select_lex *last, Item *dependency); bool set_braces(bool value); bool inc_in_sum_expr(); === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2009-06-17 05:27:39 +0000 +++ b/sql/sql_select.cc 2009-06-22 11:46:31 +0000 @@ -2478,6 +2478,14 @@ static ha_rows get_quick_record_count(TH } +typedef struct st_keyuse_w_needed_reg +{ + KEYUSE *first; + key_part_map second; + +} Keyuse_w_needed_reg; + +static bool has_eq_ref_access_candidate(TABLE *table, table_map can_refer_to_these) { KEYUSE *keyuse= table->reginfo.join_tab->keyuse; @@ -2494,24 +2502,85 @@ bool has_eq_ref_access_candidate(TABLE * { uint key= keyuse->key; key_part_map bound_parts=0; - bool ft_key= test(keyuse->keypart == FT_KEYPART); - + uint n_unusable=0; + bool ft_key= test(keyuse->keypart == FT_KEYPART); + KEY *keyinfo= table->key_info + key; + KEYUSE *key_start = keyuse; + do /* For each keypart and each way to read it */ { - if (!(keyuse->used_tables & ~can_refer_to_these) && - !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)) + if (keyuse->usable) { - bound_parts |= keyuse->keypart_map; + if(!(keyuse->used_tables & ~can_refer_to_these) && + !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)) + { + bound_parts |= keyuse->keypart_map; + } } + else + n_unusable++; keyuse++; - } while (keyuse->table && keyuse->key == key); + } while (keyuse->table == table && keyuse->key == key); + + if (ft_key || ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) + != HA_NOSAME)) + { + continue; + } - KEY *keyinfo= table->key_info + key; - if (!ft_key && - ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) && - bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts)) - { + if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts)) return TRUE; + /* + Ok, usable keyuse elements didn't help us. Try making use of + unusable KEYUSEs (psergey-todo: sane comments:) + */ + if (n_unusable && bound_parts) + { + /* + Check if unusable KEYUSE elements cause all parts of key to be + bound. An unusable keyuse element makes a key part bound when it + represents the following: + + keyXpartY=func(bound_columns, preceding_tables) + + . + */ + Keyuse_w_needed_reg *uses; + if (!(uses= (Keyuse_w_needed_reg*)my_alloca(sizeof(Keyuse_w_needed_reg)*n_unusable))) + return FALSE; + uint n_uses=0; + for (KEYUSE *k= key_start; k!=keyuse; k++) + { + if (!k->usable && !(k->used_tables & ~can_refer_to_these)) + { + //Walk k->val and check which key parts it depends on. + Field_processor_info fp= {can_refer_to_these, table, k->key, 0}; + if (!k->val->walk(&Item::check_column_usage_processor, FALSE, + (uchar*)&fp)) + { + uses[n_uses].first= k; + uses[n_uses].second= fp.needed_key_parts; + n_uses++; + } + } + } + /* Now compute transitive closure */ + uint n_bounded; + do + { + n_bounded= 0; + for (uint i=0; i< n_uses; i++) + { + /* needed_parts is covered by what is already bound*/ + if (!(uses[i].second & ~bound_parts)) + { + bound_parts|= key_part_map(1) << uses[i].first->keypart; + n_bounded++; + } + if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts)) + return TRUE; + } + } while (n_bounded != 0); } } } @@ -2657,6 +2726,7 @@ eliminate_tables_for_join_list(JOIN *joi eliminated += tbl->nested_join->join_list.elements; //psergey-todo: do we need to do anything about removing the join //nest? + tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL); } else { @@ -2673,6 +2743,7 @@ eliminate_tables_for_join_list(JOIN *joi { mark_table_as_eliminated(join, tbl->table, const_tbl_count, const_tables); + tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL); eliminated += 1; } } @@ -3065,14 +3136,16 @@ make_join_statistics(JOIN *join, TABLE_L { start_keyuse=keyuse; key=keyuse->key; - s->keys.set_bit(key); // QQ: remove this ? + if (keyuse->usable) + s->keys.set_bit(key); // QQ: remove this ? refs=0; const_ref.clear_all(); eq_part.clear_all(); do { - if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize) + if (keyuse->usable && keyuse->val->type() != Item::NULL_ITEM && + !keyuse->optimize) { if (!((~found_const_table_map) & keyuse->used_tables)) const_ref.set_bit(keyuse->keypart); @@ -3276,6 +3349,7 @@ typedef struct key_field_t { */ bool null_rejecting; bool *cond_guard; /* See KEYUSE::cond_guard */ + bool usable; } KEY_FIELD; @@ -3284,6 +3358,26 @@ typedef struct key_field_t { This is called for OR between different levels. + That is, the function operates on an array of KEY_FIELD elements which has + two parts: + + $LEFT_PART $RIGHT_PART + +-----------------------+-----------------------+ + start new_fields end + + $LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two + parts of the OR condition. Our task is to produce an array of KEY_FIELD + elements that would correspond to "$LEFT_PART OR $RIGHT_PART". + + The rules for combining elements are as follows: + (keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)= + AND_ij (keyfieldA_i OR keyfieldB_j) + + We discard all (keyfieldA_i OR keyfieldB_j) that refer to different + fields. For those referring to the same field, the logic is as follows: + + t.keycol= + To be able to do 'ref_or_null' we merge a comparison of a column and 'column IS NULL' to one test. This is useful for sub select queries that are internally transformed to something like:. @@ -3348,13 +3442,18 @@ merge_key_fields(KEY_FIELD *start,KEY_FI KEY_OPTIMIZE_REF_OR_NULL)); old->null_rejecting= (old->null_rejecting && new_fields->null_rejecting); + /* + The conditions are the same, hence their usabilities should + be, too (TODO: shouldn't that apply to the above + null_rejecting and optimize attributes?) + */ + DBUG_ASSERT(old->usable == new_fields->usable); } } else if (old->eq_func && new_fields->eq_func && old->val->eq_by_collation(new_fields->val, old->field->binary(), old->field->charset())) - { old->level= and_level; old->optimize= ((old->optimize & new_fields->optimize & @@ -3363,10 +3462,14 @@ merge_key_fields(KEY_FIELD *start,KEY_FI KEY_OPTIMIZE_REF_OR_NULL)); old->null_rejecting= (old->null_rejecting && new_fields->null_rejecting); + // "t.key_col=const" predicates are always usable + DBUG_ASSERT(old->usable && new_fields->usable); } else if (old->eq_func && new_fields->eq_func && - ((old->val->const_item() && old->val->is_null()) || - new_fields->val->is_null())) + ((new_fields->usable && old->val->const_item() && + old->val->is_null()) || + ((old->usable && new_fields->val->is_null())))) + /* TODO ^ why is the above asymmetric, why const_item()? */ { /* field = expression OR field IS NULL */ old->level= and_level; @@ -3437,6 +3540,7 @@ add_key_field(KEY_FIELD **key_fields,uin table_map usable_tables, SARGABLE_PARAM **sargables) { uint exists_optimize= 0; + bool optimizable=0; if (!(field->flags & PART_KEY_FLAG)) { // Don't remove column IS NULL on a LEFT JOIN table @@ -3449,15 +3553,15 @@ add_key_field(KEY_FIELD **key_fields,uin else { table_map used_tables=0; - bool optimizable=0; for (uint i=0; i<num_values; i++) { used_tables|=(value[i])->used_tables(); if (!((value[i])->used_tables() & (field->table->map | RAND_TABLE_BIT))) optimizable=1; } - if (!optimizable) - return; + // psergey-tbl-elim: + // if (!optimizable) + // return; if (!(usable_tables & field->table->map)) { if (!eq_func || (*value)->type() != Item::NULL_ITEM || @@ -3470,7 +3574,8 @@ add_key_field(KEY_FIELD **key_fields,uin JOIN_TAB *stat=field->table->reginfo.join_tab; key_map possible_keys=field->key_start; possible_keys.intersect(field->table->keys_in_use_for_query); - stat[0].keys.merge(possible_keys); // Add possible keys + if (optimizable) + stat[0].keys.merge(possible_keys); // Add possible keys /* Save the following cases: @@ -3563,6 +3668,7 @@ add_key_field(KEY_FIELD **key_fields,uin (*key_fields)->val= *value; (*key_fields)->level= and_level; (*key_fields)->optimize= exists_optimize; + (*key_fields)->usable= optimizable; /* If the condition has form "tbl.keypart = othertbl.field" and othertbl.field can be NULL, there will be no matches if othertbl.field @@ -3874,6 +3980,7 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL; keyuse.null_rejecting= key_field->null_rejecting; keyuse.cond_guard= key_field->cond_guard; + keyuse.usable= key_field->usable; VOID(insert_dynamic(keyuse_array,(uchar*) &keyuse)); } } @@ -3954,6 +4061,11 @@ sort_keyuse(KEYUSE *a,KEYUSE *b) return (int) (a->key - b->key); if (a->keypart != b->keypart) return (int) (a->keypart - b->keypart); + + // Usable ones go before the unusable + if (a->usable != b->usable) + return (int)a->usable - (int)b->usable; + // Place const values before other ones if ((res= test((a->used_tables & ~OUTER_REF_TABLE_BIT)) - test((b->used_tables & ~OUTER_REF_TABLE_BIT)))) @@ -4164,7 +4276,8 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR found_eq_constant=0; for (i=0 ; i < keyuse->elements-1 ; i++,use++) { - if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL) + if (use->usable && !use->used_tables && + use->optimize != KEY_OPTIMIZE_REF_OR_NULL) use->table->const_key_parts[use->key]|= use->keypart_map; if (use->keypart != FT_KEYPART) { @@ -4188,7 +4301,8 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR /* Save ptr to first use */ if (!use->table->reginfo.join_tab->keyuse) use->table->reginfo.join_tab->keyuse=save_pos; - use->table->reginfo.join_tab->checked_keys.set_bit(use->key); + if (use->usable) + use->table->reginfo.join_tab->checked_keys.set_bit(use->key); save_pos++; } i=(uint) (save_pos-(KEYUSE*) keyuse->buffer); @@ -4218,7 +4332,7 @@ static void optimize_keyuse(JOIN *join, To avoid bad matches, we don't make ref_table_rows less than 100. */ keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref - if (keyuse->used_tables & + if (keyuse->usable && keyuse->used_tables & (map= (keyuse->used_tables & ~join->const_table_map & ~OUTER_REF_TABLE_BIT))) { @@ -4411,7 +4525,8 @@ best_access_path(JOIN *join, if 1. expression doesn't refer to forward tables 2. we won't get two ref-or-null's */ - if (!(remaining_tables & keyuse->used_tables) && + if (keyuse->usable && + !(remaining_tables & keyuse->used_tables) && !(ref_or_null_part && (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))) { @@ -5915,9 +6030,11 @@ static bool create_ref_for_key(JOIN *joi uint i; for (i=0 ; i < keyparts ; keyuse++,i++) { - while (keyuse->keypart != i || - ((~used_tables) & keyuse->used_tables)) + while (keyuse->keypart != i || ((~used_tables) & keyuse->used_tables) || + !keyuse->usable) + { keyuse++; /* Skip other parts */ + } uint maybe_null= test(keyinfo->key_part[i].null_bit); j->ref.items[i]=keyuse->val; // Save for cond removal @@ -16853,8 +16970,11 @@ static void select_describe(JOIN *join, unit; unit= unit->next_unit()) { - if (mysql_explain_union(thd, unit, result)) - DBUG_VOID_RETURN; + if (!(unit->item && unit->item->eliminated)) + { + if (mysql_explain_union(thd, unit, result)) + DBUG_VOID_RETURN; + } } DBUG_VOID_RETURN; } === modified file 'sql/sql_select.h' --- a/sql/sql_select.h 2009-06-14 12:35:04 +0000 +++ b/sql/sql_select.h 2009-06-22 11:46:31 +0000 @@ -51,6 +51,7 @@ typedef struct keyuse_t { NULL - Otherwise (the source equality can't be turned off) */ bool *cond_guard; + bool usable; } KEYUSE; class store_key;
participants (1)
-
Sergey Petrunia