Hi! Here is the third and last part of the review.
"Sergey" == Sergey Petrunya <psergey@askmonty.org> writes:
<cut>
@@ -7433,8 +7718,18 @@
table_map used_tables2= (join->const_table_map | OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); - for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) { + if (!tab->table) + { + /* + psergey-todo: this is probably incorrect, fix this when we get + correct processing for outer joins + semi joins + */ + continue; + }
When is the above fix going to happen ? (Which worklog). What is the effect if we execute the continue? a) Wrong result b) Crash 3) Slower code ? Please add a comment about this if it's 3) If it's 1 or 2 we should fix it before pushing to trunk.
@@ -7480,37 +7775,12 @@ (*sel_cond_ref)->update_used_tables(); if (cond_tab->select) cond_tab->select->cond= cond_tab->select_cond; - } + } + if (tab == last_tab) + break;
Wouldn't it be better to fix the 'for' loop, as otherwise you may get a crash if tab->table is not true for the tab == last_tab. How about this for a better for loop for (JOIN_TAB *tab= first_linear_tab(join, TRUE), prev_tab= 0; prev_tab != last_tab; prev_tab= tab, tab = next_linear_tab(join, tab, TRUE)) Note that we don't have to test for tab == 0 as we will always find last_tab first.
@@ -7968,8 +8240,8 @@ Don't use join buffering if we're dictated not to by no_jbuf_after (this ...) */ - if (!(i <= no_jbuf_after) || tab->loosescan_match_tab || - sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) + if ((!tab->bush_root_tab? !(i <= no_jbuf_after) : FALSE) || + tab->loosescan_match_tab || tab->bush_children) goto no_join_cache;
The above was a bit complex if. How about: if (((!tab->bush_root_tab && !(i <= no_jbuf_after)) || tab->loosescan_match_tab || tab->bush_children) goto no_join_cache; Don't really understand how we could use join_cache with SMJ_NEST child (ie, a tab with tab->bush_root_tab set).
for (JOIN_TAB *first_inner= tab->first_inner; first_inner; @@ -8122,16 +8394,24 @@ void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, uint no_jbuf_after) { - JOIN_TAB *first_sjm_table= NULL; - JOIN_TAB *last_sjm_table= NULL; + //JOIN_TAB *first_sjm_table= NULL; + //JOIN_TAB *last_sjm_table= NULL;
Remove comments. (Please remove all these before you request a review as the deleted lines will show up nicely in the diff anyway).
+ JOIN_TAB *tab;
- for (uint i= join->const_tables; i < join->tables; i++) - join->join_tab[i].used_join_cache_level= join->max_allowed_join_cache_level; - - for (uint i= join->const_tables; i < join->tables; i++) - { - JOIN_TAB *tab= join->join_tab+i; + //for (uint i= join->const_tables; i < join->tables; i++)
Remove comment
+ for (tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) + { + tab->used_join_cache_level= join->max_allowed_join_cache_level; + }
+ //for (uint i= join->const_tables; i < join->tables; i++)
Remove comment
+ for (tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) + { +#if 0 if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) { first_sjm_table= tab; @@ -8142,9 +8422,16 @@ } if (!(tab >= first_sjm_table && tab < last_sjm_table)) tab->first_sjm_sibling= NULL; - +#endif
Remove above ifdefed block
+ JOIN_TAB *prev_tab; +restart: tab->icp_other_tables_ok= TRUE; tab->idx_cond_fact_out= TRUE; + + prev_tab= tab - 1;
Isn't it more clear to set prev_tab as part of the for loop? (This will make it independent of how next_linear_tab() is implemented)
@@ -8154,12 +8441,22 @@
+ /* if (join->return_tab) i= join->return_tab-join->join_tab-1; // always >= 0 + */
Remove commented code
@@ -8204,10 +8501,17 @@ setup_semijoin_dups_elimination(join, options, no_jbuf_after)) DBUG_RETURN(TRUE); /* purecov: inspected */
- for (i= 0; i < join->const_tables; i++) - join->join_tab[i].partial_join_cardinality= 1; + for (tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) + { + tab->partial_join_cardinality= 1; + }
Isn't the above loop wrong ? It was supposed to only loop over const tables, not skip const tables.
- for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++)
Remove comment
+ for (tab= first_linear_tab(join, TRUE), i= join->const_tables; + tab; + tab= next_linear_tab(join, tab, TRUE)) { /* The approximation below for partial join cardinality is not good because @@ -8215,24 +8519,45 @@ - it does not differentiate between inner joins, outer joins and semi-joins. Later it should be improved. */ - JOIN_TAB *tab=join->join_tab+i; + JOIN_TAB *prev_tab= tab - 1;
Set prev_tab in the for loop. This allows us to get automaticly the second part of the following if done:
+ if ((tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab) || + (tab == join->join_tab + join->const_tables)) + prev_tab= NULL;
In other words use: for (tab= first_linear_tab(join, TRUE), i= join->const_tables, prev_tab=0; tab; prev_tab= tab; tab= next_linear_tab(join, tab, TRUE))
+ DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table); tab->partial_join_cardinality= join->best_positions[i].records_read * - (i ? (tab-1)->partial_join_cardinality : 1); + (prev_tab? prev_tab->partial_join_cardinality : 1); + if (!tab->bush_children) + i++;
Why do you assign and increment i? I can't see it beeing used anymore in the code.
- for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++)
Remove comment.
+ for (tab= first_linear_tab(join, TRUE), i= join->const_tables; + tab; + tab= next_linear_tab(join, tab, TRUE), i++) { - JOIN_TAB *tab=join->join_tab+i; + //JOIN_TAB *tab=join->join_tab+i;
Remove comment.
+ if (tab->bush_children) + { + if (setup_sj_materialization(tab)) + return TRUE; + } + TABLE *table=tab->table; uint jcl= tab->used_join_cache_level; tab->read_record.table= table; tab->read_record.file=table->file; tab->read_record.unlock_row= rr_unlock_row; - tab->next_select=sub_select; /* normal select */ tab->sorted= sorted; sorted= 0; // only first must be sorted + + if (!(tab->bush_root_tab && + tab->bush_root_tab->bush_children->end == tab + 1)) + { + tab->next_select=sub_select; /* normal select */ + }
The above needs a comment like: /* We should not set tab->next_select for the last table in the SMJ-nest, as this is already set in setup_sj_materialization(). */ (It would be nicer to have next_select set to 0 by default and here only set it if it was not set before).
@@ -8395,13 +8707,16 @@ abort(); /* purecov: deadcode */ } } - join->join_tab[join->tables-1].next_select=0; /* Set by do_select */ + uint n_top_tables= join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start; + join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */
Isn't the above same as (join->join_tab_ranges.head()->end -1).next_select= 0 ?
-/* + /* If a join buffer is used to join a table the ordering by an index for the first non-constant table cannot be employed anymore. */ - for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++) + for (i=join->const_tables ; i < n_top_tables ; i++) {
Wouldn't it be better to also here use the normal for loop for JOIN_TAB's to hide the implementation a bit ? for (tab= first_linear_tab(join, TRUE), tab; tab= next_linear_tab(join, tab, FALSE))
@@ -8481,6 +8799,12 @@ { table->disable_keyread(); table->file->ha_index_or_rnd_end(); + + if (table->pos_in_table_list && + table->pos_in_table_list->jtbm_subselect) + { + table->pos_in_table_list->jtbm_subselect->cleanup(); + } /* We need to reset this for next select (Tested in part_of_refkey) @@ -8675,7 +8999,7 @@
if (table) { - JOIN_TAB *tab,*end; + JOIN_TAB *tab; /* Only a sorted table may be cached. This sorted table is always the first non const table in join->table @@ -8688,13 +9012,15 @@
if (full) { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE)) tab->cleanup(); + //psergey4: how is the above supposed to work when + //top_jtrange_tables==FALSE? It will crash right away!
Don't understand the comment. If top_jtrange_tables is 0 then tab is 0 and the for loop will never execute. anyway, it would be better to use our standard loop: for (tab= first_linear_tab(join, FALSE), tab; tab= next_linear_tab(join, tab, TRUE)) instead of having yet another way to do the loop over JOIN_TAB's
table= 0; } else { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE))
Same here; Change to a basic for loop
@@ -9982,7 +10343,8 @@
/* Pick the "head" item: the constant one or the first in the join order - that's not inside some SJM nest. + that's not inside some SJM nest. psergey2: out-of-date comment. It is ok + inside SJM, too.
Please update comment to be correct.
*/ if (item_const) head= item_const; @@ -11084,6 +11446,9 @@ for (i= first_tab; i <= last_tab; i++) reopt_remaining_tables |= join->positions[i].table->table->map;
+ table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; + join->cur_sj_inner_tables= 0; +
Add a comment why the above has to be saved, reset & restored. (Didn't se anything in the loop that would obviously need this)
for (i= first_tab; i <= last_tab; i++) { JOIN_TAB *rs= join->positions[i].table; @@ -11109,6 +11474,7 @@ if (!rs->emb_sj_nest) *outer_rec_count *= pos.records_read; } + join->cur_sj_inner_tables= save_cur_sj_inner_tables; *reopt_cost= cost; }
@@ -12391,6 +12757,8 @@ share->keys=1; share->uniques= test(using_unique_constraint); table->key_info= table->s->key_info= keyinfo; + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; @@ -12406,6 +12774,8 @@ bool maybe_null=(*cur_group->item)->maybe_null; key_part_info->null_bit=0; key_part_info->field= field; + if (cur_group == group) + field->key_start.set_bit(0); key_part_info->offset= field->offset(table->record[0]); key_part_info->length= (uint16) field->key_length(); key_part_info->type= (uint8) field->key_type(); @@ -12475,6 +12845,8 @@ keyinfo->key_parts * sizeof(KEY_PART_INFO)))) goto err; bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); table->key_info= table->s->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; @@ -12513,6 +12885,13 @@ { key_part_info->null_bit=0;
Remove above as you are setting it below.
key_part_info->field= *reg_field; + (*reg_field)->flags |= PART_KEY_FLAG; + if (key_part_info == keyinfo->key_part) + (*reg_field)->key_start.set_bit(0); + key_part_info->null_bit= (*reg_field)->null_bit; + key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + (uchar*) table->record[0]); +
You should only set null_offset if the field has a null_ptr. It's strange that the old code worked without setting null_bit.... Found the issue; in ha_heap.cc we corrected a wrongly set null bit value. However we should not count in this behavior (which the above fix ensures)
@@ -14603,13 +14848,24 @@ return (*tab->read_record.read_record)(&tab->read_record); }
-static int +int join_read_record_no_init(JOIN_TAB *tab) { + Copy_field *save_copy, *save_copy_end; +
Add comment: /* init_read_record resets all elements of tab->read_record(). Remember things that we don't want to have reset. */ I think it would be better to move copy_field and copy_field_end to JOIN_TAB as these don't have anything directly to do with the read_record functions.
+ save_copy= tab->read_record.copy_field; + save_copy_end= tab->read_record.copy_field_end; + + init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select,1,1, FALSE); + + tab->read_record.copy_field= save_copy; + tab->read_record.copy_field_end= save_copy_end; + tab->read_record.read_record= rr_sequential_and_unpack; + return (*tab->read_record.read_record)(&tab->read_record); }
<cut>
@@ -19032,16 +19322,21 @@ { table_map used_tables=0;
- uchar sjm_nests[MAX_TABLES]; - uint sjm_nests_cur=0; - uint sjm_nests_end= 0; - uint end_table= join->tables; bool printing_materialize_nest= FALSE; uint select_id= join->select_lex->select_number;
- for (uint i=0 ; i < end_table ; i++) + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + while ((jt_range= it++))
Please replace with a function that goes over all ranges, so that we don't get an extra indentation level here. (Part of suggestion of review 1)
@@ -19321,12 +19550,16 @@ examined_rows= tab->limit; else { - tab->table->file->info(HA_STATUS_VARIABLE); - examined_rows= tab->table->file->stats.records; + //tab->table->file->info(HA_STATUS_VARIABLE);
Remove comment
+ if (!tab->table->pos_in_table_list || + tab->table->is_filled_at_execution()) // temporary, is_filled_at_execution + examined_rows= tab->records; + else + examined_rows= tab->table->file->stats.records;
Shouldn't you call tab->table->file->info(HA_STATUS_VARIABLE) in the last case ? If not, where is it called now?
@@ -19347,7 +19580,7 @@ */ float f= 0.0; if (examined_rows) - f= (float) (100.0 * join->best_positions[i].records_read / + f= (float) (100.0 * tab->records_read/*join->best_positions[i].records_read*/ /
Removed comment as it's not accurate anymore <cut>
diff -urN --exclude='.*' 5.3-noc/sql/sql_show.cc maria-5.3-subqueries-r36-noc/sql/sql_show.cc
--- 5.3-noc/sql/sql_show.cc 2011-01-27 21:39:33.000000000 +0300
+JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables); +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots);
Please add the prototypes in sql_select.h and include this instead of having prototypes to functions in many places.
@@ -6602,14 +6605,17 @@ bool get_schema_tables_result(JOIN *join, enum enum_schema_table_state executed_place) { - JOIN_TAB *tmp_join_tab= join->join_tab+join->tables; + //JOIN_TAB *tmp_join_tab= join->join_tab+join->tables;
Remove comment
THD *thd= join->thd; LEX *lex= thd->lex; bool result= 0; DBUG_ENTER("get_schema_tables_result");
thd->no_warnings_for_error= 1; - for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++) + //for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++)
Remove comment
+ for (JOIN_TAB *tab= first_linear_tab(join, FALSE); + tab; + tab= next_linear_tab(join, tab, FALSE)) { if (!tab->table || !tab->table->pos_in_table_list) break;
+++ maria-5.3-subqueries-r36-noc/sql/sql_test.cc 2011-02-16 14:42:52.000000000 +0300 @@ -166,58 +166,66 @@ void TEST_join(JOIN *join) { - uint i,ref; + uint ref; + int i; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; DBUG_ENTER("TEST_join");
- /* - Assemble results of all the calls to full_name() first, - in order not to garble the tabular output below. - */ - String ref_key_parts[MAX_TABLES]; - for (i= 0; i < join->tables; i++) - { - JOIN_TAB *tab= join->join_tab + i; - for (ref= 0; ref < tab->ref.key_parts; ref++) - { - ref_key_parts[i].append(tab->ref.items[ref]->full_name()); - ref_key_parts[i].append(" "); - } - } - DBUG_LOCK_FILE; VOID(fputs("\nInfo about JOIN\n",DBUG_FILE)); + + while ((jt_range= it++)) { + /* + Assemble results of all the calls to full_name() first, + in order not to garble the tabular output below. + */ + String ref_key_parts[MAX_TABLES]; + for (i= 0; i < (jt_range->end - jt_range->start); i++)
Store (jt_range->end - jt_range->start) in a variable and use it above and below. <cut>
+ for (i= 0; i < (jt_range->end - jt_range->start); i++)
<cut>
+++ maria-5.3-subqueries-r36-noc/sql/table.cc 2011-02-16 14:42:52.000000000 +0300 @@ -5337,6 +5337,12 @@ (parent && parent->children_attached)); }
Add also a comment here what this functions means (copy from table.h)
+ +bool st_table::is_filled_at_execution() +{ + return test(pos_in_table_list->jtbm_subselect); +} +
Regards, Monty