Hello Igor, Please find some minor comments below. Ok to push after they are addressed. On Wed, Oct 06, 2010 at 05:14:08AM -0700, Igor Babaev wrote:
At file:///home/igor/maria/maria-5.3-mwl128-bug57024/
------------------------------------------------------------ revno: 2827 revision-id: igor@askmonty.org-20101006121408-ra44i6o7eich5hrf parent: igor@askmonty.org-20101004014546-xirepv04xs7txxhw committer: Igor Babaev <igor@askmonty.org> branch nick: maria-5.3-mwl128-bug57024 timestamp: Wed 2010-10-06 05:14:08 -0700 message: Ported the fix for bug #57024 (a performance issue for outer joins). Employed the same kind of optimization as in the fix for the cases when join buffer is used. The optimization performs early evaluation of the conditions from on expression with table references to only outer tables of an outer join. ... === modified file 'sql/sql_join_cache.cc' @@ -1604,6 +1625,12 @@ uchar *init_pos= pos; CACHE_FIELD *copy= field_descr; CACHE_FIELD *copy_end= copy+flag_fields; + if (with_match_flag) + { + copy->str[0]= test((Match_flag) pos[0] == MATCH_FOUND); + pos+= copy->length; + copy++; + } for ( ; copy < copy_end; copy++) { memcpy(copy->str, pos, copy->length); TODO where does the above copy to?
@@ -1754,30 +1781,68 @@
/* - Skip record from join buffer if its match flag is on: default implementation - - SYNOPSIS - skip_recurrent_match() - - DESCRIPTION - This default implementation of the virtual function skip_record_if_match - skips the next record from the join buffer if its match flag is set on. - If the record is skipped the value of 'pos' is set to points to the position - right after the record. - - RETURN VALUE - TRUE the match flag is on and the record has been skipped - FALSE the match flag is off -*/ - -bool JOIN_CACHE::skip_recurrent_match() -{ - DBUG_ASSERT(with_length); - uint offset= size_of_rec_len; - if (prev_cache) - offset+= prev_cache->get_size_of_rec_offset(); - /* Check whether the match flag is on */ - if (get_match_flag_by_pos(pos+offset)) + Skip record from join buffer if's already matched: default implementation + + SYNOPSIS + skip_as_matched() + + DESCRIPTION + This default implementation of the virtual function skip_as_matched + skips the next record from the join buffer if its match flag is set to 1. + If the record is skipped the value of 'pos' is set to points to the position + right after the record. + + RETURN VALUE + TRUE the match flag is set to 1 and the record has been skipped + FALSE the match flag is not 1 Please s/1/MATCH_FOUND/
+*/ + +bool JOIN_CACHE::skip_as_matched()
Please rename the function to skip_if_matched(), because that what it actually does: checks *if* the records is matched and skips it if it is so.
+{ + DBUG_ASSERT(with_length); + uint offset= size_of_rec_len; + if (prev_cache) + offset+= prev_cache->get_size_of_rec_offset(); + /* Check whether the match flag is MATCH_FOUND */ + if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND) + { + pos+= size_of_rec_len + get_rec_length(pos); + return TRUE; + } + return FALSE; +} + + +/* + Skip record from join buffer if the match isn't needed: default implementation + + SYNOPSIS + skip_as_unneeded_match() + + DESCRIPTION + This default implementation of the virtual function skip_as_unneeded_match + skips the next record from the join buffer if its match flag is not + MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the + first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE + and join_tab is the first inner table of an outer join. + If the record is skipped the value of 'pos' is set to points to the position Typo, s/points/point/
+ right after the record. + + RETURN VALUE + TRUE the record has to be been skipped ^^^^^^^^^^^ wrong grammar.
+ FALSE otherwise +*/ + +bool JOIN_CACHE::skip_as_unneeded_match() +{ + DBUG_ASSERT(with_length); + enum Match_flag match_fl; + uint offset= size_of_rec_len; + if (prev_cache) + offset+= prev_cache->get_size_of_rec_offset(); + + if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND && + (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) ) { pos+= size_of_rec_len + get_rec_length(pos); return TRUE; @@ -1990,9 +2055,9 @@ { int error; enum_nested_loop_state rc= NESTED_LOOP_OK; + join_tab->table->null_row= 0; bool check_only_first_match= join_tab->check_only_first_match(); - - join_tab->table->null_row= 0; + bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
/* Return at once if there are no records in the join buffer */ if (!records) @@ -2042,9 +2107,11 @@ /* If only the first match is needed, and, it has been already found for the next record read from the join buffer, then the record is skipped. + Also those records that must be null complemented are not considered + as candidates for matches. */ - if (!(check_only_first_match && - skip_recurrent_candidate_for_match(rec_ptr))) + if ((!check_only_first_match && !outer_join_first_inner) || + !skip_next_candidate_for_match(rec_ptr)) { read_next_candidate_for_match(rec_ptr); rc= generate_full_extensions(rec_ptr); @@ -2268,7 +2335,6 @@ uint cnt; enum_nested_loop_state rc= NESTED_LOOP_OK; bool is_first_inner= join_tab == join_tab->first_unmatched; - bool is_last_inner= join_tab == join_tab->first_unmatched->last_inner;
/* Return at once if there are no records in the join buffer */ if (!records) @@ -2289,7 +2355,7 @@ goto finish; } /* Just skip the whole record if a match for it has been already found */ - if (!is_first_inner || !skip_recurrent_match()) + if (!is_first_inner || !skip_as_matched()) { get_record(); /* The outer row is complemented by nulls for each inner table */ @@ -2527,6 +2593,8 @@ is attached to the key entry. The key value is either placed in the hash element added for the key or, if the use_emb_key flag is set, remains in the record from the partial join. + If the match flag field of a record contains MATCH_IMPOSSIBLE the key is + not created for this record.
RETURN VALUE TRUE if it has been decided that it should be the last record @@ -2550,6 +2618,9 @@ link= prev_cache->get_curr_rec_link(); write_record_data(link, &is_full);
+ if (last_written_is_null_compl) + return is_full; + if (use_emb_key) key= get_curr_emb_key(); else @@ -2634,26 +2705,55 @@
/* - Skip record from a hashed join buffer if its match flag is on - - SYNOPSIS - skip_recurrent_match() - - DESCRIPTION - This implementation of the virtual function skip_recurrent_match does - the same as the default implementation does, but it takes into account - the link element used to connect the records with the same key into a chain. - - RETURN VALUE - TRUE the match flag is on and the record has been skipped + Skip record from a hashed join buffer if its match flag is set to 1 + + SYNOPSIS + skip_as_matched() + + DESCRIPTION + This implementation of the virtual function skip_as_matched does + the same as the default implementation does, but it takes into account + the link element used to connect the records with the same key into a chain. + + RETURN VALUE + TRUE the match flag is MATCH_FOUND and the record has been skipped + FALSE the match flag is not MATCH_FOUND +*/ + +bool JOIN_CACHE_HASHED::skip_as_matched() +{ + uchar *save_pos= pos; + pos+= get_size_of_rec_offset(); + if (!this->JOIN_CACHE::skip_as_matched()) + { + pos= save_pos; + return FALSE; + } + return TRUE; +} + + +/* + Skip record from a hashed join buffer if its match flag dictates to do so + + SYNOPSIS + skip_as_uneeded_match() + + DESCRIPTION + This implementation of the virtual function skip_as_uneeded_match does + the same as the default implementation does, but it takes into account + the link element used to connect the records with the same key into a chain. + + RETURN VALUE + TRUE the match flag dictates to skip the record FALSE the match flag is off */
-bool JOIN_CACHE_HASHED::skip_recurrent_match() +bool JOIN_CACHE_HASHED::skip_as_unneeded_match() { uchar *save_pos= pos; pos+= get_size_of_rec_offset(); - if (!this->JOIN_CACHE::skip_recurrent_match()) + if (!this->JOIN_CACHE::skip_as_unneeded_match()) { pos= save_pos; return FALSE; @@ -2780,7 +2880,7 @@ last element of this chain.
RETURN VALUE - TRUE if each retrieved record has its match flag set on + TRUE if each retrieved record has its match flag set to MATCH_FOUND FALSE otherwise */
@@ -2792,7 +2892,7 @@ { next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset; - if (!get_match_flag_by_pos(rec_ptr)) + if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND) return FALSE; } while (next_rec_ref_ptr != last_rec_ref_ptr); @@ -3000,25 +3100,28 @@ Check whether the matching record from the BNL cache is to be skipped
SYNOPSIS - skip_recurrent_candidate_for_match + skip_next_candidate_for_match rec_ptr pointer to the position in the join buffer right after the prefix of the current record
DESCRIPTION This implementation of the virtual function just calls the - method skip_recurrent_match to check whether the record referenced by - ref_ptr has its match flag set on and, if so, just skips this record - setting the value of the cursor 'pos' to the position right after it. + method skip_as_unneded_match to check whether the record referenced by + ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the + first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and + join_tab is the first inner table of an outer join. + If so, the function just skips this record setting the value of the + cursor 'pos' to the position right after it.
RETURN VALUE TRUE the record referenced by rec_ptr has been skipped FALSE otherwise */
-bool JOIN_CACHE_BNL::skip_recurrent_candidate_for_match(uchar *rec_ptr) +bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr) { pos= rec_ptr-base_prefix_length; - return skip_recurrent_match(); + return skip_as_unneeded_match(); }
@@ -3162,7 +3265,7 @@ The methods performs the necessary preparations to read the next record from the join buffer into the record buffer by the method read_next_candidate_for_match, or, to skip the next record from the join - buffer by the method skip_recurrent_candidate_for_match. + buffer by the method skip_next_candidate_for_match. This implementation of the virtual method moves to the next record in the chain of all records from the join buffer that are to be equi-joined with the current record from join_tab. @@ -3187,23 +3290,25 @@ Check whether the matching record from the BNLH cache is to be skipped
SYNOPSIS - skip_recurrent_candidate_for_match + skip_next_candidate_for_match rec_ptr pointer to the position in the join buffer right after the previous record
DESCRIPTION This implementation of the virtual function just calls the method get_match_flag_by_pos to check whether the record referenced - by ref_ptr has its match flag set on. + by ref_ptr has its match flag set to MATCH_FOUND.
RETURN VALUE - TRUE the record referenced by rec_ptr has its match flag set on + TRUE the record referenced by rec_ptr has its match flag set to + MATCH_FOUND FALSE otherwise */
-bool JOIN_CACHE_BNLH::skip_recurrent_candidate_for_match(uchar *rec_ptr) +bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr) { - return get_match_flag_by_pos(rec_ptr); + return join_tab->check_only_first_match() && + (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); }
@@ -3364,7 +3469,7 @@ int JOIN_TAB_SCAN_MRR::next() { char **ptr= (char **) cache->get_curr_association_ptr(); - uint rc= join_tab->table->file->multi_range_read_next(ptr) ? -1 : 0; + int rc= join_tab->table->file->multi_range_read_next(ptr) ? -1 : 0; if (!rc) { /* @@ -3482,7 +3587,8 @@ { DBUG_ENTER("bka_range_seq_skip_record"); JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; - bool res= cache->get_match_flag_by_pos((uchar *) range_info); + bool res= cache->get_match_flag_by_pos((uchar *) range_info) == + JOIN_CACHE::MATCH_FOUND; DBUG_RETURN(res); }
@@ -3560,7 +3666,7 @@ The method performs the necessary preparations to read the next record from the join buffer into the record buffer by the method read_next_candidate_for_match, or, to skip the next record from the join - buffer by the method skip_recurrent_match. + buffer by the method skip_as_unneeded_match. This implementation of the virtual method get_next_candidate_for_match just decrements the counter of the records that are to be iterated over and returns the value of curr_association as a reference to the position @@ -3584,23 +3690,25 @@ Check whether the matching record from the BKA cache is to be skipped
SYNOPSIS - skip_recurrent_candidate_for_match + skip_next_candidate_for_match rec_ptr pointer to the position in the join buffer right after the previous record
DESCRIPTION This implementation of the virtual function just calls the method get_match_flag_by_pos to check whether the record referenced - by ref_ptr has its match flag set on. + by ref_ptr has its match flag set to MATCH_FOUND.
RETURN VALUE - TRUE the record referenced by rec_ptr has its match flag set on + TRUE the record referenced by rec_ptr has its match flag set to + MATCH_FOUND FALSE otherwise */
-bool JOIN_CACHE_BKA::skip_recurrent_candidate_for_match(uchar *rec_ptr) +bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr) { - return get_match_flag_by_pos(rec_ptr); + return join_tab->check_only_first_match() && + (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); }
@@ -3691,7 +3799,9 @@ If is assumed that the functions starts reading at the position of the record length which is provided for each records in a BKA cache. After the key is built the 'pos' value points to the first position after - the current record. + the current record. + The function just skips the records with MATCH_IMPOSSIBLE in the + match flag field if there is any. The function returns 0 if the initial position is after the beginning of the record fields for last record from the join buffer.
@@ -3708,6 +3818,8 @@ uchar *init_pos; JOIN_CACHE *cache;
+start: + /* Any record in a BKA cache is prepended with its length */ DBUG_ASSERT(with_length);
@@ -3727,6 +3839,13 @@
/* Read all flag fields of the record */ read_flag_fields(); + + if (with_match_flag && + (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE ) + { + pos= init_pos+rec_len; + goto start; + }
if (use_emb_key) {
=== modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2010-10-01 18:05:27 +0000 +++ b/sql/sql_select.cc 2010-10-06 12:14:08 +0000 @@ -7090,9 +7090,13 @@ used_tables2|= current_map; COND *tmp_cond= make_cond_for_table(on_expr, used_tables2, current_map, FALSE); + add_cond_and_fix(&tmp_cond, tab->on_precond); if (tmp_cond) { JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : tab; + Item **sel_cond_ref= tab < first_inner_tab ? + &first_inner_tab->on_precond : + &tab->select_cond; /* First add the guards for match variables of all embedding outer join operations. @@ -7115,15 +7119,15 @@ tmp_cond->quick_fix_field(); /* Add the predicate to other pushed down predicates */ DBUG_PRINT("info", ("Item_cond_and")); - cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond : - new Item_cond_and(cond_tab->select_cond, - tmp_cond); + *sel_cond_ref= !(*sel_cond_ref) ? + tmp_cond : + new Item_cond_and(*sel_cond_ref, tmp_cond); DBUG_PRINT("info", ("Item_cond_and 0x%lx", - (ulong)cond_tab->select_cond)); - if (!cond_tab->select_cond) - DBUG_RETURN(1); - cond_tab->select_cond->update_used_tables(); - cond_tab->select_cond->quick_fix_field(); + (ulong)(*sel_cond_ref))); + if (!(*sel_cond_ref)) + DBUG_RETURN(1); + (*sel_cond_ref)->quick_fix_field(); + (*sel_cond_ref)->update_used_tables(); if (cond_tab->select) cond_tab->select->cond= cond_tab->select_cond; } @@ -13246,7 +13250,7 @@ DBUG_RETURN(nls); } int error; - enum_nested_loop_state rc; + enum_nested_loop_state rc= NESTED_LOOP_OK; READ_RECORD *info= &join_tab->read_record;
if (join_tab->flush_weedout_table) @@ -13279,18 +13283,21 @@
/* Set first_unmatched for the last inner table of this group */ join_tab->last_inner->first_unmatched= join_tab; - } + if (join_tab->on_precond && !join_tab->on_precond->val_int()) + rc= NESTED_LOOP_NO_MORE_ROWS; + } join->thd->row_count= 0;
if (join_tab->loosescan_match_tab) join_tab->loosescan_match_tab->found_match= FALSE;
- error= (*join_tab->read_first_record)(join_tab); - - if (join_tab->keep_current_rowid) - join_tab->table->file->position(join_tab->table->record[0]); - - rc= evaluate_join_record(join, join_tab, error); + if (rc != NESTED_LOOP_NO_MORE_ROWS) + { + error= (*join_tab->read_first_record)(join_tab); + if (join_tab->keep_current_rowid) + join_tab->table->file->position(join_tab->table->record[0]); + rc= evaluate_join_record(join, join_tab, error); + } }
/*
=== modified file 'sql/sql_select.h' --- a/sql/sql_select.h 2010-10-01 18:05:27 +0000 +++ b/sql/sql_select.h 2010-10-06 12:14:08 +0000 @@ -161,6 +161,8 @@ KEYUSE *keyuse; /**< pointer to first used key */ SQL_SELECT *select; COND *select_cond; + COND *on_precond; /**< part of on condition to check before + accessing the first inner table */ QUICK_SELECT_I *quick; /* The value of select_cond before we've attempted to do Index Condition @@ -678,6 +680,14 @@ */ uchar *curr_rec_link;
+ /* + This flag is set to TRUE if join_tab is the first inner table of an outer + join and the latest record written to the join buffer is detected to be + null complemented after checking on conditions over the outer tables for + this outer join operation + */ + bool last_written_is_null_compl; + /* The number of fields put in the join buffer of the join cache that are used in building keys to access the table join_tab @@ -792,8 +802,11 @@ /* Read a referenced field from the join buffer */ bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len);
- /* Shall skip record from the join buffer if its match flag is on */ - virtual bool skip_recurrent_match(); + /* Shall skip record from the join buffer if its match flag is set to 1 */ + virtual bool skip_as_matched(); Please s/1/MATCH_FOUND/;
+ + /* Shall skip record from the join buffer if its match flag is not-zero */ + virtual bool skip_as_unneeded_match();
/* True if rec_ptr points to the record whose blob data stay in @@ -836,11 +849,11 @@ virtual uchar *get_next_candidate_for_match()= 0; /* Shall check whether the given record from the join buffer has its match - flag set on and, if so, skip the record in the buffer. + flag settings commands to skip the record in the buffer. */ - virtual bool skip_recurrent_candidate_for_match(uchar *rec_ptr)= 0; + virtual bool skip_next_candidate_for_match(uchar *rec_ptr)= 0; /* - Shall read the given record from the join buffer into the + Shall read the given record from the join buffer intTCo the
Is the above change a typo ^^
the corresponding record buffer */ virtual void read_next_candidate_for_match(uchar *rec_ptr)= 0; @@ -868,6 +881,21 @@
public:
+ /* + The enumeration type Match_flag describes possible states of the match flag + field stored for the records of the first inner tables of outer joins and + semi-joins in the cases when the first match strategy is used for them. + When a record with match flag field is written into the join buffer the + state of the field usually is MATCH_NOT_FOUND unless this is a record of the + first inner table of the outer join for which the on precondition (the + condition from on expression over outer tables) has turned out not to be + true. In the last case the state of the match flag is MATCH_IMPOSSIBLE. + The state of the match flag field is changed to MATCH_FOUND as soon as + the first full matching combination of inner tables of the outer join or + the semi-join is discovered. + */ + enum Match_flag { MATCH_NOT_FOUND, MATCH_FOUND, MATCH_IMPOSSIBLE }; + /* Table to be joined with the partial join records from the cache */ JOIN_TAB *join_tab;
@@ -920,7 +948,7 @@ virtual void get_record_by_pos(uchar *rec_ptr);
/* Shall return the value of the match flag for the positioned record */ - virtual bool get_match_flag_by_pos(uchar *rec_ptr); + virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr);
/* Shall return the position of the current record */ virtual uchar *get_curr_rec() { return curr_rec_pos; }
BR Sergey -- Sergey Petrunia, Software Developer Monty Program AB, http://askmonty.org Blog: http://s.petrunia.net/blog