[Maria-developers] MWL#90 combined patch for review
Hi Monty, Please find attached the complete patch of MWL#90 for review. The tree with the code is on launchpad/buildbot: https://code.launchpad.net/~maria-captains/maria/5.3-subqueries-mwl90 http://buildbot.askmonty.org/buildbot/grid?branch=5.3-subqueries-mwl90 and it has no failures other than those also found in mainline 5.3. The merge from 5.3-main was about a week ago. BR Sergey -- Sergey Petrunia, Software Developer Monty Program AB, http://askmonty.org Blog: http://s.petrunia.net/blog
On Tue, Feb 22, 2011 at 05:24:37PM +0300, Sergey Petrunya wrote:
Hi Monty,
Please find attached the complete patch of MWL#90 for review. The tree with the code is on launchpad/buildbot: https://code.launchpad.net/~maria-captains/maria/5.3-subqueries-mwl90 http://buildbot.askmonty.org/buildbot/grid?branch=5.3-subqueries-mwl90
and it has no failures other than those also found in mainline 5.3. The merge from 5.3-main was about a week ago.
BR Sergey -- Sergey Petrunia, Software Developer Monty Program AB, http://askmonty.org Blog: http://s.petrunia.net/blog
On Tue, Feb 22, 2011 at 06:03:38PM +0300, Sergey Petrunya wrote:
On Tue, Feb 22, 2011 at 05:24:37PM +0300, Sergey Petrunya wrote:
Hi Monty,
Please find attached the complete patch of MWL#90 for review. The tree with the code is on launchpad/buildbot: https://code.launchpad.net/~maria-captains/maria/5.3-subqueries-mwl90 http://buildbot.askmonty.org/buildbot/grid?branch=5.3-subqueries-mwl90
Please find below dgcov coverage report for the tree. My summary is that uncovered parts are - error handling - handling of semi-joins together with outer joins (a known bug/problem area) - make_in_exists_conversion() which serves as a fallback in case we've failed to convert the subquery to [merged] semi-join. I think I should be able to come up with a test for this. -- dgcov.out file starts -- File: sql/item_subselect.cc ------------------------------------------------------------------------------- | 505: 3844: uint len= my_snprintf(buf, sizeof(buf), "<subquery%d>", subquery_id); | -: 3845: char *name; | 505: 3846: if (!(name= (char*)thd->alloc(len + 1))) | #####: 3847: DBUG_RETURN(TRUE); | 505: 3848: memcpy(name, buf, len+1); | -: 3849: . 505: 3850: if (!(result= new select_materialize_with_stats)) File: sql/item_subselect.h ------------------------------------------------------------------------------- . -: 536: THD * get_thd() { return thd; } . -: 537: virtual int prepare()= 0; . -: 538: virtual void fix_length_and_dec(Item_cache** row)= 0; | #####: 539: virtual int optimize() { DBUG_ASSERT(0); return 0; } . -: 540: /* . -: 541: Execute the engine . -: 542: File: sql/opt_subselect.cc ------------------------------------------------------------------------------- | -: 527:*/ | -: 528: | -: 529:static | #####: 530:bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item) | -: 531:{ | #####: 532: DBUG_ENTER("make_in_exists_conversion"); | #####: 533: JOIN *child_join= item->unit->first_select()->join; | -: 534: Item_subselect::trans_res res; | #####: 535: item->changed= 0; | #####: 536: item->fixed= 0; | -: 537: | #####: 538: SELECT_LEX *save_select_lex= thd->lex->current_select; | #####: 539: thd->lex->current_select= item->unit->first_select(); | -: 540: | #####: 541: res= item->select_transformer(child_join); | -: 542: | #####: 543: thd->lex->current_select= save_select_lex; | -: 544: | #####: 545: if (res == Item_subselect::RES_ERROR) | #####: 546: DBUG_RETURN(TRUE); | -: 547: | #####: 548: item->changed= 1; | #####: 549: item->fixed= 1; | -: 550: | #####: 551: Item *substitute= item->substitution; | #####: 552: bool do_fix_fields= !item->substitution->fixed; | -: 553: /* | -: 554: The Item_subselect has already been wrapped with Item_in_optimizer, so we | -: 555: should search for item->optimizer, not 'item'. | -: 556: */ | #####: 557: Item *replace_me= item->optimizer; | #####: 558: DBUG_ASSERT(replace_me==substitute); | -: 559: | -: 560: Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? | #####: 561: &join->conds : &(item->emb_on_expr_nest->on_expr); | #####: 562: if (replace_where_subcondition(join, tree, replace_me, substitute, | -: 563: do_fix_fields)) | #####: 564: DBUG_RETURN(TRUE); | #####: 565: item->substitution= NULL; | -: 566: | #####: 567: if (!thd->stmt_arena->is_conventional()) | -: 568: { | -: 569: tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? | -: 570: &join->select_lex->prep_where : | #####: 571: &(item->emb_on_expr_nest->prep_on_expr); | -: 572: | #####: 573: if (replace_where_subcondition(join, tree, replace_me, substitute, | -: 574: FALSE)) | #####: 575: DBUG_RETURN(TRUE); | -: 576: } | #####: 577: DBUG_RETURN(FALSE); | -: 578:} | -: 579: | -: 580: | 1451: 695: if ((*in_subq)->is_flattenable_semijoin) | -: 696: { | 1375: 697: if (convert_subq_to_sj(join, *in_subq)) | #####: 698: DBUG_RETURN(TRUE); | -: 699: } | -: 700: else | -: 701: { | 76: 702: if (convert_subq_to_jtbm(join, *in_subq, &remove_item)) | #####: 703: DBUG_RETURN(TRUE); | -: 704: } | 1451: 705: if (remove_item) | -: 706: { | 76: 1236: DBUG_ENTER("convert_subq_to_jtbm"); | -: 1237: | 76: 1238: if (subq_pred->setup_engine(TRUE)) | #####: 1239: DBUG_RETURN(TRUE); | -: 1240: | 76: 1241: if (subq_pred->engine->engine_type() != subselect_engine::HASH_SJ_ENGINE) | -: 1242: { | #####: 1243: *remove_item= FALSE; | #####: 1244: make_in_exists_conversion(parent_join->thd, parent_join, subq_pred); | #####: 1245: DBUG_RETURN(FALSE); | -: 1246: } | 76: 1247: *remove_item= TRUE; | -: 1248: | 76: 1252: if (!(tbl_alias= (char*)parent_join->thd->calloc(sizeof(alias_mask)+5)) || | -: 1253: !(jtbm= alloc_join_nest(parent_join->thd))) //todo: this is not a join nest! | -: 1254: { | #####: 1255: DBUG_RETURN(TRUE); | -: 1256: } | -: 1257: | 76: 1258: jtbm->join_list= emb_join_list; | -: 1302: /* Inject sj_on_expr into the parent's WHERE or ON */ | 76: 1303: if (emb_tbl_nest) | -: 1304: { | #####: 1305: DBUG_ASSERT(0); | -: 1306: /*emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, | -: 1307: sj_nest->sj_on_expr); | -: 1308: emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr); | -: 2748: */ | 601: 2749: if (!(tab_ref->cond_guards= (bool**) thd->calloc(sizeof(uint*)*tmp_key_parts))) | -: 2750: { | #####: 2751: DBUG_RETURN(TRUE); | -: 2752: } | -: 2753: . 601: 2754: tab_ref->key_err= 1; | 52: 4010: hash_sj_engine->is_materialized= TRUE; | -: 4011: | 52: 4012: if (hash_sj_engine->materialize_join->error || tab->join->thd->is_fatal_error) | #####: 4013: DBUG_RETURN(NESTED_LOOP_ERROR); | -: 4014: } | -: 4015: } | 455621: 4016: else if (tab->bush_children) | 137: 4032: if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 || | -: 4033: (rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0) | -: 4034: { | #####: 4035: join->return_tab= save_return_tab; | #####: 4036: DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/ | -: 4037: } | 137: 4038: join->return_tab= save_return_tab; | 137: 4039: sjm->materialized= TRUE; File: sql/sql_base.cc ------------------------------------------------------------------------------- | 10: 7784: Item *item= table_list->jtbm_subselect; | 10: 7785: if (item->fix_fields(thd, &item)) | -: 7786: { | #####: 7787: my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES); | #####: 7788: DBUG_RETURN(1); | -: 7789: } | 10: 7790: DBUG_ASSERT(item == table_list->jtbm_subselect); | 10: 7791: if (table_list->jtbm_subselect->setup_engine(FALSE)) | #####: 7792: DBUG_RETURN(1); | -: 7793: } . -: 7794: } . -: 7795: File: sql/sql_join_cache.cc ------------------------------------------------------------------------------- . -: 2140: } . -: 2141: | 4620: 2142: if ((rc= join_tab_execution_startup(join_tab)) < 0) | #####: 2143: goto finish2; | -: 2144: . -: 2145: /* Prepare to retrieve all records of the joined table */ | 4620: 2146: if ((error= join_tab_scan->open())) File: sql/sql_select.cc ------------------------------------------------------------------------------- | -: 7556: { | -: 7557: /* Push condition to handler */ | 75098: 7558: if (!tab->table->file->cond_push(push_cond)) | #####: 7559: tab->table->file->pushed_cond= push_cond; | -: 7560: } . -: 7561: } . -: 7562: } | 136998: 8558: if (tab->bush_children) | -: 8559: { | 670: 8560: if (setup_sj_materialization(tab)) | #####: 8561: return TRUE; | -: 8562: } | -: 8563: . 136998: 8564: TABLE *table=tab->table; . -:14093: | 2548539:14094: if (rc != NESTED_LOOP_NO_MORE_ROWS && | -:14095: (rc= join_tab_execution_startup(join_tab)) < 0) | #####:14096: DBUG_RETURN(rc); | -:14097: . 2548539:14098: if (join_tab->loosescan_match_tab) . 4:14099: join_tab->loosescan_match_tab->found_match= FALSE; File: sql/sql_test.cc ------------------------------------------------------------------------------- | 8: 210: if (tab->use_quick == 2) | -: 211: fprintf(DBUG_FILE, | -: 212: " quick select checked for each record (keys: %s)\n", | #####: 213: tab->select->quick_keys.print(buf)); | 8: 214: else if (tab->select->quick) | -: 215: { | #####: 216: fprintf(DBUG_FILE, " quick select used:\n"); | #####: 217: tab->select->quick->dbug_dump(18, FALSE); | -: 218: } | -: 219: else | 8: 220: VOID(fputs(" select used\n",DBUG_FILE)); ------------------------------------------------------------------------------- 1667 line(s) in 16 source file(s) modified in revision(s). 50 line(s) not covered by tests. For documentation, see http://forge.mysql.com/wiki/DGCov_doc BR Sergey -- Sergey Petrunia, Software Developer Monty Program AB, http://askmonty.org Blog: http://s.petrunia.net/blog
Hi! Here is part 1 of the review; part 2 should come later today.
"Sergey" == Sergey Petrunya <psergey@askmonty.org> writes:
Sergey> On Tue, Feb 22, 2011 at 05:24:37PM +0300, Sergey Petrunya wrote:
Hi Monty,
Please find attached the complete patch of MWL#90 for review. The tree with the code is on launchpad/buildbot: https://code.launchpad.net/~maria-captains/maria/5.3-subqueries-mwl90 http://buildbot.askmonty.org/buildbot/grid?branch=5.3-subqueries-mwl90
<cut>
+++ maria-5.3-subqueries-r36-noc/mysql-test/include/mix1.inc 2011-02-16 14:42:52.000000000 +0300 @@ -1532,6 +1532,12 @@
SELECT 1 FROM (SELECT COUNT(DISTINCT c1) FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x; +--echo # MariaDB note: +--echo # This will show 2 for table which has 5 rows. +--echo # This is because the access method employed is actually range access +--echo # which scans 2 records (yes, EXPLAIN displays it incorrectly). +--echo # our correct printing is an artifact of changing in select_describe() +--echo # from printing table->starts.records() to tab->records.
Please remove comment as this is not an issue anymore.
diff -urN --exclude='.*' 5.3-noc/sql/item_subselect.cc maria-5.3-subqueries-r36-noc/sql/item_subselect.cc
@@ -4067,6 +4078,17 @@ }
+int subselect_hash_sj_engine::optimize() +{ + int res= 0;
Don't reset res
+ SELECT_LEX *save_select= thd->lex->current_select; + thd->lex->current_select= materialize_join->select_lex; + res= materialize_join->optimize(); + thd->lex->current_select= save_select; + + return res; +} +
diff -urN --exclude='.*' 5.3-noc/sql/item_subselect.h maria-5.3-subqueries-r36-noc/sql/item_subselect.h --- 5.3-noc/sql/item_subselect.h 2011-01-27 21:39:33.000000000 +0300 +++ maria-5.3-subqueries-r36-noc/sql/item_subselect.h 2011-02-16 14:42:52.000000000 +0300 @@ -46,16 +46,17 @@ < child_join->prepare < engine->prepare *ref= substitution; + substitution= NULL; < Item_subselect::fix_fields */ - Item *substitution; public: + Item *substitution; /* unit of subquery */ st_select_lex_unit *unit; -protected: Item *expr_cache; /* engine that perform execution of subselect (single select or union) */ subselect_engine *engine; +protected: /* old engine if engine was changed */ subselect_engine *old_engine; /* cache of used external tables */ @@ -148,6 +149,7 @@ bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void recalc_used_tables(st_select_lex *new_parent, bool after_pullout); + virtual int optimize(); virtual bool exec(); virtual void fix_length_and_dec(); table_map used_tables() const; @@ -351,7 +353,9 @@ all JOIN in UNION */ Item *expr; +public: Item_in_optimizer *optimizer; +protected:
Move the above to after Item *substitions; No reason to have many :public :protected blocks
@@ -397,6 +401,16 @@ }; enum_exec_method exec_method;
+ /* + TRUE<=>this is a flattenable semi-join, false overwise. + */ + bool is_flattenable_semijoin;
You should set this to zero in Item_subselect::Item_subselect() !
+ + /* + Cost to populate the temporary table (set on if-needed basis). + */ + //double startup_cost; +
Remove startup_cost as it's not used <cut>
@@ -752,7 +767,7 @@
class subselect_hash_sj_engine : public subselect_engine { -protected: +public: /* The table into which the subquery is materialized. */ TABLE *tmp_table; /* TRUE if the subquery was materialized into a temp table. */ @@ -764,14 +779,16 @@ of subselect_single_select_engine::[prepare | cols]. */ subselect_single_select_engine *materialize_engine; +protected: /* The engine used to compute the IN predicate. */ subselect_engine *lookup_engine; /* QEP to execute the subquery and materialize its result into a temporary table. Created during the first call to exec(). */ +public: JOIN *materialize_join; - +protected: /* Keyparts of the only non-NULL composite index in a rowid merge. */ MY_BITMAP non_null_key_parts; /* Keyparts of the single column indexes with NULL, one keypart per index. */ @@ -784,7 +801,9 @@ IN results because index lookups sometimes match values that are actually not equal to the search key in SQL terms. */ +public: Item_cond_and *semi_join_conds; +protected: /* Possible execution strategies that can be used to compute hash semi-join.*/
Please move all public variables to one block.
+++ maria-5.3-subqueries-r36-noc/sql/opt_subselect.cc 2011-02-16 14:42:52.000000000 +0300 @@ -166,6 +328,7 @@ (void)subquery_types_allow_materialization(in_subs);
in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; + in_subs->is_flattenable_semijoin= TRUE;
/* Register the subquery for further processing in flatten_subqueries() */ select_lex-> @@ -220,10 +383,24 @@ (in_subs->is_top_level_item() || optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4 - !in_subs->is_correlated && // 5 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 + !in_subs->is_correlated) // 5 { + if (in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
You removed //6 comment without updating the comment for the block that describes 6. Please update comment!
+ + /* + If the subquery is an AND-part of WHERE register for being processed + with jtbm strategy + */ + if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && + thd->thd_marker.emb_on_expr_nest == (TABLE_LIST*)0x1 && + optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN))
Instead of doing: thd->thd_marker.emb_on_expr_nest == (TABLE_LIST*)0x1 please create an inline function with a meningfull name to test test or at least replace (TABLE_LIST*) 0x with a define or both.
+ { + in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
Here you could make things cleared and set in_subs->emb_on_expr_nest to a define. <cut>
@@ -339,6 +516,69 @@
/* + Finalize IN->EXISTS conversion in case we couldn't use materialization. + + DESCRIPTION Invoke the IN->EXISTS converter + Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE. + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +static +bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item) +{ + DBUG_ENTER("make_in_exists_conversion"); + JOIN *child_join= item->unit->first_select()->join; + Item_subselect::trans_res res; + item->changed= 0;
Please add a comment why you set item->fixed to 0 as this is not obvious
+ item->fixed= 0; + + SELECT_LEX *save_select_lex= thd->lex->current_select; + thd->lex->current_select= item->unit->first_select(); + + res= item->select_transformer(child_join); + + thd->lex->current_select= save_select_lex; + + if (res == Item_subselect::RES_ERROR) + DBUG_RETURN(TRUE); + + item->changed= 1; + item->fixed= 1; + + Item *substitute= item->substitution; + bool do_fix_fields= !item->substitution->fixed;
item->substitution -> substitute
+ /* + The Item_subselect has already been wrapped with Item_in_optimizer, so we + should search for item->optimizer, not 'item'. + */ + Item *replace_me= item->optimizer; + DBUG_ASSERT(replace_me==substitute); + + Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &(item->emb_on_expr_nest->on_expr); + if (replace_where_subcondition(join, tree, replace_me, substitute, + do_fix_fields)) + DBUG_RETURN(TRUE); + item->substitution= NULL; +
The code belove makes a permanent change to the prepared statement tree. Please add a comment about this! Also please add a test where you do a prepared statement that hits his code and excecute it twice with different parameters.
+ if (!thd->stmt_arena->is_conventional()) + { + tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->select_lex->prep_where : + &(item->emb_on_expr_nest->prep_on_expr); + + if (replace_where_subcondition(join, tree, replace_me, substitute, + FALSE)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* Convert semi-join subquery predicates into semi-join join nests
SYNOPSIS @@ -445,25 +685,41 @@ // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES /* Replace all subqueries to be flattened with Item_int(1) */ arena= thd->activate_stmt_arena_if_needed(&backup); - for (in_subq= join->sj_subselects.front(); - in_subq != in_subq_end && - join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; - in_subq++) - { - Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? - &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(join, tree, *in_subq, new Item_int(1), - FALSE)) - DBUG_RETURN(TRUE); /* purecov: inspected */ - }
for (in_subq= join->sj_subselects.front(); in_subq != in_subq_end && join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; in_subq++) { - if (convert_subq_to_sj(join, *in_subq)) - DBUG_RETURN(TRUE); + bool remove_item= TRUE; + if ((*in_subq)->is_flattenable_semijoin) + { + if (convert_subq_to_sj(join, *in_subq)) + DBUG_RETURN(TRUE); + } + else + { + if (convert_subq_to_jtbm(join, *in_subq, &remove_item)) + DBUG_RETURN(TRUE); + } + if (remove_item) + { + Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->is_flattenable_semijoin) + { + replace_me= (*in_subq)->optimizer; + }
Consider doing a function: Item_in_subselect::original_item() { return is_flattenable_semijoin ? self : item->optimizer; } This would make the above code a bit easier to understand by doing: Item *replace_me= (*in_subq)->original_item() This would thus be a bit like real_item()
+ if (replace_where_subcondition(join, tree, replace_me, new Item_int(1), + FALSE)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + } } skip_conversion: /* @@ -494,7 +750,19 @@ bool do_fix_fields= !(*in_subq)->substitution->fixed; Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(join, tree, *in_subq, substitute, + + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->is_flattenable_semijoin) + { + replace_me= (*in_subq)->optimizer; + }
The above code can also be made easier with (*in_subq)->original_item(). The above comment is a good one for original_item()!
+ +/* + Get #output_rows and scan_time estimates for a "delayed" table. + + SYNOPSIS + get_delayed_table_estimates() + table IN Table to get estimates for + out_rows OUT E(#rows in the table) + scan_time OUT E(scan_time). + startup_cost OUT cost to populate the table. + + DESCRIPTION + Get #output_rows and scan_time estimates for a "delayed" table. By + "delayed" here we mean that the table is filled at the start of query + execution. This means that the optimizer can't use table statistics to + get #rows estimate for it, it has to call this function instead. + + This function is expected to make different actions depending on the nature + of the table. At the moment there is only one kind of delayed tables, + non-flattenable semi-joins. +*/ + +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost) +{ + Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect; + item->optimize(); + + DBUG_ASSERT(item->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)item->engine); + JOIN *join= hash_sj_engine->materialize_join; + + double rows= 1; + double read_time= 0.0; + + /* Calculate #rows and cost of join execution */ + for (uint i= join->const_tables; i < join->tables; i++) + { + rows *= join->best_positions[i].records_read; + read_time += join->best_positions[i].read_time; + } + *out_rows= (ha_rows)rows; + *startup_cost= read_time;
The above loop is identical to doing: get_partial_join_cost(join, join->table, startup_cost, &rows); *out_rows= (ha_rows) rows;
+ /* Calculate cost of scanning the temptable */ + double data_size= rows * hash_sj_engine->tmp_table->s->reclength; + /* Do like in handler::read_time */ + *scan_time= data_size/IO_SIZE + 2; +}
+ +/* + Convert subquery predicate into non-mergeable semi-join nest. + + TODO: + why does this do IN-EXISTS conversion? Can't we unify it with mergeable + semi-joins? currently, convert_subq_to_sj() cannot fail to convert (unless + fatal errors) + + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, + bool *remove_item) +{ + SELECT_LEX *parent_lex= parent_join->select_lex; + List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list; + TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins + TABLE_LIST *tl; + DBUG_ENTER("convert_subq_to_jtbm"); + + if (subq_pred->setup_engine(TRUE)) + DBUG_RETURN(TRUE); +
/* Please ensure that the code below is covered with some test case !*/
+ if (subq_pred->engine->engine_type() != subselect_engine::HASH_SJ_ENGINE) + { + *remove_item= FALSE; + make_in_exists_conversion(parent_join->thd, parent_join, subq_pred);
You should return value of make_in_exists_conversion() as this may fail...
+ DBUG_RETURN(FALSE); + }
+ *remove_item= TRUE; + + TABLE_LIST *jtbm; + char *tbl_alias; + const char alias_mask[]="<subquery%d>"; + if (!(tbl_alias= (char*)parent_join->thd->calloc(sizeof(alias_mask)+5)) || + !(jtbm= alloc_join_nest(parent_join->thd))) //todo: this is not a join nest! + { + DBUG_RETURN(TRUE); + } + + jtbm->join_list= emb_join_list; + jtbm->embedding= emb_tbl_nest; + jtbm->jtbm_subselect= subq_pred; + jtbm->nested_join= NULL; + + /* Nests do not participate in those 'chains', so: */ + /* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/ + emb_join_list->push_back(jtbm); + + /* + Inject the jtbm table into TABLE_LIST::next_leaf list, so that + make_join_statistics() and co. can find it. + */ + for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) ;
Add an explicit {} on an empty row, as otherwise you may get compiler warnings from some compilers
+ tl->next_leaf= jtbm; + + /* + Same as above for TABLE_LIST::next_local chain + (a theory: a next_local chain always starts with ::leaf_tables + because view's tables are inserted after the view) + */ + for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) ;
Add an explicit {} on an empty row...
+ tl->next_local= jtbm;
Do you really have to put the table last ? It would be much esier to put it first...
+ + /* A theory: no need to re-connect the next_global chain */ + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)subq_pred->engine); + jtbm->table= hash_sj_engine->tmp_table; + + jtbm->table->tablenr= parent_join->tables; + jtbm->table->map= table_map(1) << (parent_join->tables); + + parent_join->tables++; +
Where is it checked that this doesn't exceed MAX_TABLES? Add a comment for the function that guarantees this and an DBUG_ASSERT
+ Item *conds= hash_sj_engine->semi_join_conds; + conds->fix_after_pullout(parent_lex, &conds); + + DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY);); + + my_snprintf(tbl_alias, sizeof(alias_mask)+5, alias_mask, + hash_sj_engine->materialize_join->select_lex->select_number);
A fast way to generate the name would be to use the following function: #define subquery_table_name_length= sizeof("<subquery9999>"); void create_temporary_subquery_table_name(char *to, uint number) { DBUG_ASSERT(number < 10000); to= strmov(to, "<subquery"); to= int10_to_str((int) number, to, 10); to[0]= '>'; to[1]= 0; } This is significantly faster than my_snprintf(). This would also simplify the other code you have when you generate the name.
+ jtbm->alias= tbl_alias; + + /* Inject sj_on_expr into the parent's WHERE or ON */ + if (emb_tbl_nest) + { + DBUG_ASSERT(0); + /*emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, + sj_nest->sj_on_expr); + emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr); + */
Please remove the comment and change the above to: DBUG_ASSERT(!emb_tbl_nest);
+enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab) +{
<cut>
+ else if (tab->bush_children) + { + /* It's a merged SJM nest */ + enum_nested_loop_state rc; + JOIN *join= tab->join; + SJ_MATERIALIZATION_INFO *sjm= tab->bush_children->start->emb_sj_nest->sj_mat_info; + JOIN_TAB *join_tab= tab->bush_children->start; + JOIN_TAB *save_return_tab= join->return_tab;
move setting of the last 2 variables inside the loop belpw
--- 5.3-noc/sql/sql_base.cc 2011-01-27 21:39:33.000000000 +0300 +++ maria-5.3-subqueries-r36-noc/sql/sql_base.cc 2011-02-21 18:15:06.000000000 +0300 @@ -7778,6 +7778,18 @@ if (res) DBUG_RETURN(1); } + if (table_list->jtbm_subselect) + { + Item *item= table_list->jtbm_subselect; + if (item->fix_fields(thd, &item)) + { + my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES); + DBUG_RETURN(1); + } + DBUG_ASSERT(item == table_list->jtbm_subselect); + if (table_list->jtbm_subselect->setup_engine(FALSE)) + DBUG_RETURN(1); + } }
Regards, Monty
Hi! Part 2 (of 3) Sorry got guests for birthday party that I have to make food for, so I have to finnish the last part on Sunday.
"Sergey" == Sergey Petrunya <psergey@askmonty.org> writes:
<cut>
--- 5.3-noc/sql/sql_join_cache.cc 2011-02-17 15:42:51.000000000 +0300
@@ -203,11 +159,34 @@
void JOIN_CACHE::calc_record_fields() { + JOIN_TAB *tab; + + if (prev_cache) + tab= prev_cache->join_tab; + else + { + if (join_tab->bush_root_tab) + { + /* + If the tab we're attached to is inside an SJM-nest, start from the + first tab in that SJM nest + */ + tab= join_tab->bush_root_tab->bush_children->start; + } + else + { + /* + The tab we're attached to is not inside an SJM-nest. Start from the + first non-const table. + */ + tab= join->join_tab + join->const_tables; + } + } + start_tab= tab; + if (start_tab->bush_children) + start_tab= start_tab->bush_children->start;
Is it possible that the new start_tab also have bush_childrens or is this a case for if (prev_cache). If it's the later (conclusion we took on IRC), move the if to the first if part. To make this clear, add at end: DBUG_ASSERT(!start_tab->bush_children);
+ + tab= start_tab;
Above code is a simpler if you use start_tab in all the places and only last set tab= start_tab
@@ -524,7 +507,8 @@
/* Now create local fields that are used to build ref for this key access */ copy= field_descr+flag_fields; - for (tab= join_tab-tables; tab; tab= get_next_table(tab)) + for (tab= start_tab; tab != join_tab; tab= next_linear_tab(join, tab, FALSE)) + //for (tab= join_tab-tables; tab; tab= get_next_table(tab))
Remove comment
@@ -2155,8 +2140,12 @@ }
/* Prepare to retrieve all records of the joined table */ - if ((error= join_tab_scan->open())) + if ((error= join_tab_scan->open())) goto finish; /* psergey-note: if this returns error, we will assert in net_send_statement() */
Remove psgergey-note and do a proper comment before the goto.
+void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save) +{ + JOIN_TAB *first= join_tab->bush_root_tab? + join_tab->bush_root_tab->bush_children->start : + join_tab->join->join_tab + join_tab->join->const_tables; + + for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--) + { + if (tab->bush_children) + { + for (JOIN_TAB *child= tab->bush_children->start; + child != tab->bush_children->end; + child++) + { + if (save) + child->table->status= child->status;
an 'else' is missing here. Please spend a short time trying to create a test case that catches this bug, as it will ensure that this part of the code is properly tested...
+ { + tab->status= tab->table->status; + tab->table->status= 0; + } + } + }
<cut>
diff -urN --exclude='.*' 5.3-noc/sql/sql_join_cache.h maria-5.3-subqueries-r36-noc/sql/sql_join_cache.h --- 5.3-noc/sql/sql_join_cache.h 2011-02-17 15:42:51.000000000 +0300 +++ maria-5.3-subqueries-r36-noc/sql/sql_join_cache.h 2011-02-21 18:15:06.000000000 +0300
<cut>
@@ -647,7 +650,7 @@ buff= 0; }
- JOIN_TAB *get_next_table(JOIN_TAB *tab); + // JOIN_TAB *get_next_table(JOIN_TAB *tab);
Remove comment
+++ maria-5.3-subqueries-r36-noc/sql/sql_select.cc 2011-02-22 17:19:29.000000000 +0300
@@ -1036,50 +1037,72 @@ /* Perform the optimization on fields evaluation mentioned above for all on expressions. + */ + { + List_iterator<JOIN_TAB_RANGE> it(join_tab_ranges); + JOIN_TAB_RANGE *jt_range;
Add comment: /* Skip the const tables from the upper level JOIN_TAB's. We don't have to do this for SJM nests as the const tables are pulled out from these and added to the upper level JOIN_TAB. */
+ uint first_tab_offs= const_tables;
<cut>
/* Perform the optimization on fields evaliation mentioned above for all used ref items. */ - for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables; tab++) + //for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables; tab++) + //{
Remove comments
{ - for (uint i=0; i < tab->ref.key_parts; i++) + List_iterator<JOIN_TAB_RANGE> it(join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + uint first_tab_offs= const_tables; + while ((jt_range= it++)) { + for (JOIN_TAB *tab= jt_range->start + first_tab_offs; + tab < jt_range->end; tab++) + { + for (uint i=0; i < tab->ref.key_parts; i++) + { +
This is a big change that causes a lot of code to be re-indented which will cause merge conflicts. The above also exposes a little too much how the tables are organized. Please replace loop with either: for (tab= first_linear_tab(...) ; tab ; tab= next_linear_tab(...)) Or add a set of similar functions to loop over join_tab_ranges: This also simplifies the usage of-
@@ -1352,8 +1375,11 @@ */ if (need_tmp || select_distinct || group_list || order) { - for (uint i = const_tables; i < tables; i++) - join_tab[i].table->prepare_for_position();
+ for (uint i= 0; i < tables; i++) + { + if (!(table[i]->map & const_table_map)) + table[i]->prepare_for_position(); + } }
Did not understand the above change as const tables should still be first in join_tab[] and all tables in 'table' should have a correspoding join_tab element. The fact that we added some sjm tables, should not change this, should it? After discussion on IRC: There is no mapping between join->join_tab[] and join->table[] anymore. join->table contains all 'real tables' while join_tab also contains sjm (materialized) tables. This makes me worried that one may accidently use 'join->tables' wrongly when they mean join->top_jtrange_tables. Suggestion. Rename join->tables to 'join->real_tables' Rename join->top_jtrange_tables to 'join->join_tab_tables' or Rename join->tables to 'join->table_count' Rename join->top_jtrange_tables to 'join->join_tab_count'
DBUG_EXECUTE("info",TEST_join(this);); @@ -1604,9 +1630,12 @@ if (conds) conds= conds->transform(&Item::expr_cache_insert_transformer, (uchar*) thd); + for (JOIN_TAB *tab= first_linear_tab(this, TRUE); + tab; + tab= next_linear_tab(this, tab, TRUE)) + //for (JOIN_TAB *tab= join_tab + const_tables; + // tab < join_tab + tables ; + // tab++)
Remove comment
@@ -2799,10 +2828,12 @@ JOIN_TAB *stat_vector[MAX_TABLES+1]; DBUG_ENTER("make_join_statistics");
+ LINT_INIT(table); /* inited in all loops */ table_count=join->tables; - stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*table_count); + + stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*(table_count)); stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES); - table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*(table_count*2)); + table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*((table_count)*2));
Don't understand the above change. please revert it. (no need to have () around table_count)
if (!stat || !stat_ref || !table_vector) DBUG_RETURN(1); // Eom /* purecov: inspected */
@@ -2835,9 +2866,10 @@ table->reginfo.join_tab=s; table->reginfo.not_exists_optimize=0; bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys); - all_table_map|= table->map; + all_table_map|= s->table->map;
Revert, as we here know that s->table == table (assignment done a couple of lines before)
s->join=join; s->info=0; // For describe + s->bush_root_tab= NULL;
Remove the above two lines as all elements in s are guaranteed to be 0
s->dependent= tables->dep_tables; s->key_dependent= 0;
You can remove the above line too
@@ -2998,6 +3033,9 @@ { table=s->table;
+ if (table->is_filled_at_execution()) + continue; + /* If equi-join condition by a key is null rejecting and after a substitution of a const table the key value happens to be null @@ -3155,15 +3193,30 @@
for (s=stat ; s < stat_end ; s++) { + s->startup_cost= 0; if (s->type == JT_SYSTEM || s->type == JT_CONST) { /* Only one matching row */ - s->found_records=s->records=s->read_time=1; s->worst_seeks=1.0; + s->found_records= s->records= 1; + s->read_time=1.0; + s->worst_seeks=1.0; continue; } /* Approximate found rows and time to read them */ - s->found_records=s->records=s->table->file->stats.records; - s->read_time=(ha_rows) s->table->file->scan_time(); + + if (s->table->is_filled_at_execution()) + { + get_delayed_table_estimates(s->table, &s->records, &s->read_time, + &s->startup_cost); + s->found_records= s->records;
Add a comment why the following is set as we don't set it below.
+ table->quick_condition_rows=s->records; + } + else + { + s->found_records= s->records= s->table->file->stats.records; + s->read_time= s->table->file->scan_time(); + } +
+JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables) +{ + JOIN_TAB *first= join->join_tab; + if (after_const_tables) + first+= join->const_tables; + if (first < join->join_tab + join->top_jtrange_tables) + return first;
Add comment: /* All tables where const tables */
+ return NULL; +}
+ + +/* + A helper function to loop over all join's join_tab in sequential fashion + + DESCRIPTION + Depending on include_bush_roots parameter, JOIN_TABS that represent + SJM-scan/lookups are produced or omitted. + + SJM-Bush children are returned right after (or in place of) their container + join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems + to.)
Add comment: If one is scanning with include_bush_roots, subquent calls to next_linear_tab() will return: ot1 ot2 sjm it1 it2 it3 ot3 .... When include_bush_rooots=FALSE we get: ot1 ot2 it1 it2 it3 ot3 .... (note the lack of sjm)
+*/ + +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots) +{ + if (include_bush_roots && tab->bush_children) + return tab->bush_children->start; + + DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab);
+ if (tab->last_leaf_in_bush) + tab= tab->bush_root_tab; + + if (tab->bush_root_tab) + return ++tab; + + if (++tab == join->join_tab + join->top_jtrange_tables) + return NULL; + + if (!include_bush_roots && tab->bush_children) + tab= tab->bush_children->start; + + return tab; +}
Here is the above function with more comments and I moved the bush_root_tab handling into one place JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots) { if (include_bush_roots && tab->bush_children) { /* This JOIN_TAB is a SJM nest; Start from first table in nest */ return tab->bush_children->start; } DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab); if (tab->bush_root_tab) /* Are we inside an SJM nest */ { /* Inside SJM nest */ if (!tab->last_leaf_in_bush) return tab+1; /* Return next in nest */ /* Continue from the sjm on the top level */ tab= tab->bush_root_tab; } /* If no more JOIN_TAB's on the top level */ if (++tab == join->join_tab + join->top_jtrange_tables) return NULL; if (!include_bush_roots && tab->bush_children) { /* This JOIN_TAB is a SJM nest; Start from first table in nest */ tab= tab->bush_children->start; } return tab; } Conclusion from IRC: <spetrunia> if the caller intends to enumerate bush roots, too, then he starts from the first join_tab (that is an sjm) <spetrunia> if the caller intends to skip bush roots, then its his responsibility to start from non-sjm <spetrunia> Hm.. is suppose we could isolate the above logic in first_linear_tab() function <spetrunia> because right now it's all over the place <montywi> yes, that is confusing me a bit <montywi> it would be good to always start with 'first linear_tab()' and then do next_linear_tab() <montywi> and first_linear tab would also have the same include_bush_roots argument <montywi> wouldn't that solve it? <spetrunia> yes Please change then the logic as above.
+ + +/* + A helper function to iterate over all join tables in bush-children-first order + + DESCRIPTION + + For example, for this join plan + + ot1 ot2 sjm ot3 + | +--------+ + | | + it1 it2 it3 + + + the function will return + + ot1-ot2-it1-it2-it3-sjm-ot3 ... + +*/
Good comment. More of these!
+JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) +{ + bool start= FALSE; + if (tab == NULL) + { + /* This means we're starting the enumeration */ + if (join->const_tables == join->top_jtrange_tables) + return NULL; + + tab= join->join_tab + join->const_tables; + start= TRUE; + } + + if (tab->last_leaf_in_bush) + return tab->bush_root_tab; + + /* Move to next tab in the array we're traversing*/ + if (!start) + tab++; + + if (tab == join->join_tab_ranges.head()->end) + return NULL; /* End */
Replace with: if (tab == join->join_tab + join->top_jtrange_tables) Because this is simpler and similar to the earlier test in same function
+ + if (tab->bush_children) + return tab->bush_children->start; + + return tab; +}
I think it's better to have a separate function for the first case as it makes things faster and easier to understand: JOIN_TAB *start_depth_first_tab(JOIN *join) { JOIN_TAB* tab; if (join->const_tables == join->top_jtrange_tables) return NULL; tab= join->join_tab + join->const_tables; return (tab->bush_children) ? tab->bush_children->start : tab; }
@@ -6349,7 +6511,7 @@ static bool get_best_combination(JOIN *join) { - uint i,tablenr; + uint tablenr; table_map used_tables; JOIN_TAB *join_tab,*j; KEYUSE *keyuse;
@@ -6368,10 +6530,74 @@
fix_semijoin_strategies_for_picked_join_order(join);
+ JOIN_TAB_RANGE *root_range= new JOIN_TAB_RANGE;
Please move the code for replacing join to another function to keep get_best_combination() small. Check for out of memory condition here.
+ root_range->start= join->join_tab; + /* root_range->end will be set later */ + join->join_tab_ranges.empty(); + join->join_tab_ranges.push_back(root_range);
Check for out of memory condition here.
+ + JOIN_TAB *sjm_nest_end= NULL; + JOIN_TAB *sjm_saved_tab; /* protected by sjm_nest_end */ + LINT_INIT(sjm_saved_tab); + for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) { TABLE *form; + POSITION *cur_pos= &join->best_positions[tablenr]; + if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || + cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + {
Please move this to another function to keep get_best_combination() short.
+ /* + Ok, we've entered an SJ-Materialization semi-join (note that this can't + be done recursively, semi-joins are not allowed to be nested). + */ + /* + 1. Put into main join order a JOIN_TAB that represents a lookup or scan + in the temptable. + // TODO: record this join_tab to be processed by + // setup_semijoin_elimination? + */ + bzero(j, sizeof(JOIN_TAB)); + j->join= join; + j->table= NULL; //temporary way to tell SJM tables from others. + j->ref.key = -1; + j->ref.key_parts=0; + j->loosescan_match_tab= NULL; //non-nulls will be set later + j->use_join_cache= FALSE; + j->on_expr_ref= (Item**) &null_ptr; + j->cache= NULL; + j->keys= key_map(1); // The unique index is always in 'possible keys' in EXPLAIN
Above remove all setting of things to 0 or NULL as bzero does this for you!
+ /* + 2. Proceed with processing SJM nest's join tabs, putting them into the + sub-order + */ + SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; + j->records= j->records_read= (ha_rows)(sjm->is_sj_scan? sjm->rows : 1); + JOIN_TAB *jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB) * sjm->tables);
Test for out of memory!
+ JOIN_TAB_RANGE *jt_range= new JOIN_TAB_RANGE;
Test for out of memory!
+ jt_range->start= jt; + jt_range->end= jt + sjm->tables; + //sjm->jt_range= jt_range;
Remove comment
+ join->join_tab_ranges.push_back(jt_range);
Test for out of memory!
+ j->bush_children= jt_range; + j->bush_root_tab= NULL; //note: a lot of code depends on bush nodes not containing one another + j->quick= NULL;
Remove setting of the above NULL's
+ sjm_nest_end= jt + sjm->tables; + sjm_saved_tab= j; + root_range->end= j+1;
Why set root_range->end here (and below) ?
+ + j= jt; + //goto loop_end_not_table;
Remove comment.
+ } + *j= *join->best_positions[tablenr].table; + + if (sjm_nest_end) + j->bush_root_tab= sjm_saved_tab;
You can do this simpler by having sjm_saved_tab set to 0 at start and reset to 0 when sjm_nest_end is set. This allows you to simple do: j->bush_root_tab= sjm_saved_tab;
+ else + root_range->end= j+1;
Why set root_range->end here (and above) ? Isn't it better to set this once end of loop ? Becasue now you set it for every found sjm...
form=join->table[tablenr]=j->table; used_tables|= form->map; form->reginfo.join_tab=j; @@ -6379,14 +6605,14 @@ form->reginfo.not_exists_optimize=0; // Only with LEFT JOIN DBUG_PRINT("info",("type: %d", j->type)); if (j->type == JT_CONST) - continue; // Handled in make_join_stat.. + goto loop_end; // Handled in make_join_stat..
j->loosescan_match_tab= NULL; //non-nulls will be set later j->ref.key = -1; j->ref.key_parts=0;
if (j->type == JT_SYSTEM) - continue; + goto loop_end; if ( !(keyuse= join->best_positions[tablenr].key) || (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)) { @@ -6397,10 +6623,24 @@ } else if (create_ref_for_key(join, j, keyuse, used_tables)) DBUG_RETURN(TRUE); // Something went wrong
+ loop_end: + j->records_read= (ha_rows)join->best_positions[tablenr].records_read;
Why the above addition? (Was not part of the original code) Add at least a comment about this.
+ join->map2table[j->table->tablenr]= j; + + // If we've reached the end of sjm nest, switch back to main sequence + if (j + 1 == sjm_nest_end) + { + j->last_leaf_in_bush= TRUE; + j= sjm_saved_tab; + sjm_nest_end= NULL;
If you use my suggestion about simpliying setting of bush_root_tab you should set sjm_save_tab to NULL here.
+ } }
+ //for (i=0 ; i < table_count ; i++) + // join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i;
Remove comment
update_depend_map(join); DBUG_RETURN(0); } @@ -6741,6 +6981,8 @@
join_tab= parent->join_tab_reexec; table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table; + top_jtrange_tables= 1; + tables= 1;
You can do: top_jtrange_tables= table = 1; /* We have only one table and one join_tab*/
@@ -6786,6 +7028,9 @@ join_tab->loosescan_match_tab= NULL; join_tab->emb_sj_nest= NULL; join_tab->pre_idx_push_select_cond= NULL; + join_tab->bush_root_tab= NULL; + join_tab->bush_children= NULL; + join_tab->last_leaf_in_bush= FALSE;
To make things safe, change earlier: !(parent->join_tab_reexec= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)))) to !(parent->join_tab_reexec= (JOIN_TAB*) thd->calloc(sizeof(JOIN_TAB)))) To guarantee that all elements are 0
@@ -6996,58 +7243,72 @@ make_outerjoin_info(JOIN *join) { DBUG_ENTER("make_outerjoin_info"); - for (uint i=join->const_tables ; i < join->tables ; i++) - { - JOIN_TAB *tab=join->join_tab+i; - TABLE *table=tab->table; - TABLE_LIST *tbl= table->pos_in_table_list; - TABLE_LIST *embedding= tbl->embedding; + bool top= TRUE; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range;
- if (tbl->outer_join) + while ((jt_range= it++)) + {
Please see if you can fix this to use first_linear_tab() ... next_linear_tab(join, tab, FALSE)) to not get another indentation level (and simpler code)
+ for (JOIN_TAB *tab=jt_range->start + (top ? join->const_tables : 0); + tab != jt_range->end; tab++) { + TABLE *table=tab->table; /* + psergey: The following is probably incorrect, fix it when we get + semi+outer joins processing to work: */
When is that?
+ if (!table) continue; + TABLE_LIST *tbl= table->pos_in_table_list; + TABLE_LIST *embedding= tbl->embedding; + + if (tbl->outer_join) { /* + Table tab is the only one inner table for outer join. + (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a + is in the query above.) + */ + tab->last_inner= tab->first_inner= tab; + tab->on_expr_ref= &tbl->on_expr; tab->cond_equal= tbl->cond_equal; - if (embedding->embedding) - tab->first_upper= embedding->embedding->nested_join->first_nested;
Why remove the above? (I can see that could be wrong as we don't know if embedding exists, and it should at least be inside the if below, but why remove it?)
+ JOIN_TAB *first_inner_tab= tab->first_inner; + + if (tab->table) + current_map= tab->table->map; + else + current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables; +
Please rename 'sj_inner_tables' to sj_inner_table_map' to make the above a bit easier to read. Also, as we spoke on phone that it would be good that after the WL is done, you add some documentation also of the emb_sj_nest into the comment of opt_subselect.c. It would be good to have the whole tab->bush_children structure documented in one place and how and why one should use JOIN_TAB to connect to TABLE_LIST. (The reason is that a goal should be that after we have created JOIN_TAB we should never have to again refer to TABLE_LIST objects and to reach this goal we need to understand how things works now)
bool use_quick_range=0; COND *tmp;
if (tab->type != JT_ALL)
Regards, Monty
Hi! Here is the third and last part of the review.
"Sergey" == Sergey Petrunya <psergey@askmonty.org> writes:
<cut>
@@ -7433,8 +7718,18 @@
table_map used_tables2= (join->const_table_map | OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); - for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) { + if (!tab->table) + { + /* + psergey-todo: this is probably incorrect, fix this when we get + correct processing for outer joins + semi joins + */ + continue; + }
When is the above fix going to happen ? (Which worklog). What is the effect if we execute the continue? a) Wrong result b) Crash 3) Slower code ? Please add a comment about this if it's 3) If it's 1 or 2 we should fix it before pushing to trunk.
@@ -7480,37 +7775,12 @@ (*sel_cond_ref)->update_used_tables(); if (cond_tab->select) cond_tab->select->cond= cond_tab->select_cond; - } + } + if (tab == last_tab) + break;
Wouldn't it be better to fix the 'for' loop, as otherwise you may get a crash if tab->table is not true for the tab == last_tab. How about this for a better for loop for (JOIN_TAB *tab= first_linear_tab(join, TRUE), prev_tab= 0; prev_tab != last_tab; prev_tab= tab, tab = next_linear_tab(join, tab, TRUE)) Note that we don't have to test for tab == 0 as we will always find last_tab first.
@@ -7968,8 +8240,8 @@ Don't use join buffering if we're dictated not to by no_jbuf_after (this ...) */ - if (!(i <= no_jbuf_after) || tab->loosescan_match_tab || - sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) + if ((!tab->bush_root_tab? !(i <= no_jbuf_after) : FALSE) || + tab->loosescan_match_tab || tab->bush_children) goto no_join_cache;
The above was a bit complex if. How about: if (((!tab->bush_root_tab && !(i <= no_jbuf_after)) || tab->loosescan_match_tab || tab->bush_children) goto no_join_cache; Don't really understand how we could use join_cache with SMJ_NEST child (ie, a tab with tab->bush_root_tab set).
for (JOIN_TAB *first_inner= tab->first_inner; first_inner; @@ -8122,16 +8394,24 @@ void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, uint no_jbuf_after) { - JOIN_TAB *first_sjm_table= NULL; - JOIN_TAB *last_sjm_table= NULL; + //JOIN_TAB *first_sjm_table= NULL; + //JOIN_TAB *last_sjm_table= NULL;
Remove comments. (Please remove all these before you request a review as the deleted lines will show up nicely in the diff anyway).
+ JOIN_TAB *tab;
- for (uint i= join->const_tables; i < join->tables; i++) - join->join_tab[i].used_join_cache_level= join->max_allowed_join_cache_level; - - for (uint i= join->const_tables; i < join->tables; i++) - { - JOIN_TAB *tab= join->join_tab+i; + //for (uint i= join->const_tables; i < join->tables; i++)
Remove comment
+ for (tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) + { + tab->used_join_cache_level= join->max_allowed_join_cache_level; + }
+ //for (uint i= join->const_tables; i < join->tables; i++)
Remove comment
+ for (tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) + { +#if 0 if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) { first_sjm_table= tab; @@ -8142,9 +8422,16 @@ } if (!(tab >= first_sjm_table && tab < last_sjm_table)) tab->first_sjm_sibling= NULL; - +#endif
Remove above ifdefed block
+ JOIN_TAB *prev_tab; +restart: tab->icp_other_tables_ok= TRUE; tab->idx_cond_fact_out= TRUE; + + prev_tab= tab - 1;
Isn't it more clear to set prev_tab as part of the for loop? (This will make it independent of how next_linear_tab() is implemented)
@@ -8154,12 +8441,22 @@
+ /* if (join->return_tab) i= join->return_tab-join->join_tab-1; // always >= 0 + */
Remove commented code
@@ -8204,10 +8501,17 @@ setup_semijoin_dups_elimination(join, options, no_jbuf_after)) DBUG_RETURN(TRUE); /* purecov: inspected */
- for (i= 0; i < join->const_tables; i++) - join->join_tab[i].partial_join_cardinality= 1; + for (tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) + { + tab->partial_join_cardinality= 1; + }
Isn't the above loop wrong ? It was supposed to only loop over const tables, not skip const tables.
- for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++)
Remove comment
+ for (tab= first_linear_tab(join, TRUE), i= join->const_tables; + tab; + tab= next_linear_tab(join, tab, TRUE)) { /* The approximation below for partial join cardinality is not good because @@ -8215,24 +8519,45 @@ - it does not differentiate between inner joins, outer joins and semi-joins. Later it should be improved. */ - JOIN_TAB *tab=join->join_tab+i; + JOIN_TAB *prev_tab= tab - 1;
Set prev_tab in the for loop. This allows us to get automaticly the second part of the following if done:
+ if ((tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab) || + (tab == join->join_tab + join->const_tables)) + prev_tab= NULL;
In other words use: for (tab= first_linear_tab(join, TRUE), i= join->const_tables, prev_tab=0; tab; prev_tab= tab; tab= next_linear_tab(join, tab, TRUE))
+ DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table); tab->partial_join_cardinality= join->best_positions[i].records_read * - (i ? (tab-1)->partial_join_cardinality : 1); + (prev_tab? prev_tab->partial_join_cardinality : 1); + if (!tab->bush_children) + i++;
Why do you assign and increment i? I can't see it beeing used anymore in the code.
- for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++)
Remove comment.
+ for (tab= first_linear_tab(join, TRUE), i= join->const_tables; + tab; + tab= next_linear_tab(join, tab, TRUE), i++) { - JOIN_TAB *tab=join->join_tab+i; + //JOIN_TAB *tab=join->join_tab+i;
Remove comment.
+ if (tab->bush_children) + { + if (setup_sj_materialization(tab)) + return TRUE; + } + TABLE *table=tab->table; uint jcl= tab->used_join_cache_level; tab->read_record.table= table; tab->read_record.file=table->file; tab->read_record.unlock_row= rr_unlock_row; - tab->next_select=sub_select; /* normal select */ tab->sorted= sorted; sorted= 0; // only first must be sorted + + if (!(tab->bush_root_tab && + tab->bush_root_tab->bush_children->end == tab + 1)) + { + tab->next_select=sub_select; /* normal select */ + }
The above needs a comment like: /* We should not set tab->next_select for the last table in the SMJ-nest, as this is already set in setup_sj_materialization(). */ (It would be nicer to have next_select set to 0 by default and here only set it if it was not set before).
@@ -8395,13 +8707,16 @@ abort(); /* purecov: deadcode */ } } - join->join_tab[join->tables-1].next_select=0; /* Set by do_select */ + uint n_top_tables= join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start; + join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */
Isn't the above same as (join->join_tab_ranges.head()->end -1).next_select= 0 ?
-/* + /* If a join buffer is used to join a table the ordering by an index for the first non-constant table cannot be employed anymore. */ - for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++) + for (i=join->const_tables ; i < n_top_tables ; i++) {
Wouldn't it be better to also here use the normal for loop for JOIN_TAB's to hide the implementation a bit ? for (tab= first_linear_tab(join, TRUE), tab; tab= next_linear_tab(join, tab, FALSE))
@@ -8481,6 +8799,12 @@ { table->disable_keyread(); table->file->ha_index_or_rnd_end(); + + if (table->pos_in_table_list && + table->pos_in_table_list->jtbm_subselect) + { + table->pos_in_table_list->jtbm_subselect->cleanup(); + } /* We need to reset this for next select (Tested in part_of_refkey) @@ -8675,7 +8999,7 @@
if (table) { - JOIN_TAB *tab,*end; + JOIN_TAB *tab; /* Only a sorted table may be cached. This sorted table is always the first non const table in join->table @@ -8688,13 +9012,15 @@
if (full) { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE)) tab->cleanup(); + //psergey4: how is the above supposed to work when + //top_jtrange_tables==FALSE? It will crash right away!
Don't understand the comment. If top_jtrange_tables is 0 then tab is 0 and the for loop will never execute. anyway, it would be better to use our standard loop: for (tab= first_linear_tab(join, FALSE), tab; tab= next_linear_tab(join, tab, TRUE)) instead of having yet another way to do the loop over JOIN_TAB's
table= 0; } else { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE))
Same here; Change to a basic for loop
@@ -9982,7 +10343,8 @@
/* Pick the "head" item: the constant one or the first in the join order - that's not inside some SJM nest. + that's not inside some SJM nest. psergey2: out-of-date comment. It is ok + inside SJM, too.
Please update comment to be correct.
*/ if (item_const) head= item_const; @@ -11084,6 +11446,9 @@ for (i= first_tab; i <= last_tab; i++) reopt_remaining_tables |= join->positions[i].table->table->map;
+ table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; + join->cur_sj_inner_tables= 0; +
Add a comment why the above has to be saved, reset & restored. (Didn't se anything in the loop that would obviously need this)
for (i= first_tab; i <= last_tab; i++) { JOIN_TAB *rs= join->positions[i].table; @@ -11109,6 +11474,7 @@ if (!rs->emb_sj_nest) *outer_rec_count *= pos.records_read; } + join->cur_sj_inner_tables= save_cur_sj_inner_tables; *reopt_cost= cost; }
@@ -12391,6 +12757,8 @@ share->keys=1; share->uniques= test(using_unique_constraint); table->key_info= table->s->key_info= keyinfo; + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; @@ -12406,6 +12774,8 @@ bool maybe_null=(*cur_group->item)->maybe_null; key_part_info->null_bit=0; key_part_info->field= field; + if (cur_group == group) + field->key_start.set_bit(0); key_part_info->offset= field->offset(table->record[0]); key_part_info->length= (uint16) field->key_length(); key_part_info->type= (uint8) field->key_type(); @@ -12475,6 +12845,8 @@ keyinfo->key_parts * sizeof(KEY_PART_INFO)))) goto err; bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); table->key_info= table->s->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; @@ -12513,6 +12885,13 @@ { key_part_info->null_bit=0;
Remove above as you are setting it below.
key_part_info->field= *reg_field; + (*reg_field)->flags |= PART_KEY_FLAG; + if (key_part_info == keyinfo->key_part) + (*reg_field)->key_start.set_bit(0); + key_part_info->null_bit= (*reg_field)->null_bit; + key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + (uchar*) table->record[0]); +
You should only set null_offset if the field has a null_ptr. It's strange that the old code worked without setting null_bit.... Found the issue; in ha_heap.cc we corrected a wrongly set null bit value. However we should not count in this behavior (which the above fix ensures)
@@ -14603,13 +14848,24 @@ return (*tab->read_record.read_record)(&tab->read_record); }
-static int +int join_read_record_no_init(JOIN_TAB *tab) { + Copy_field *save_copy, *save_copy_end; +
Add comment: /* init_read_record resets all elements of tab->read_record(). Remember things that we don't want to have reset. */ I think it would be better to move copy_field and copy_field_end to JOIN_TAB as these don't have anything directly to do with the read_record functions.
+ save_copy= tab->read_record.copy_field; + save_copy_end= tab->read_record.copy_field_end; + + init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select,1,1, FALSE); + + tab->read_record.copy_field= save_copy; + tab->read_record.copy_field_end= save_copy_end; + tab->read_record.read_record= rr_sequential_and_unpack; + return (*tab->read_record.read_record)(&tab->read_record); }
<cut>
@@ -19032,16 +19322,21 @@ { table_map used_tables=0;
- uchar sjm_nests[MAX_TABLES]; - uint sjm_nests_cur=0; - uint sjm_nests_end= 0; - uint end_table= join->tables; bool printing_materialize_nest= FALSE; uint select_id= join->select_lex->select_number;
- for (uint i=0 ; i < end_table ; i++) + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + while ((jt_range= it++))
Please replace with a function that goes over all ranges, so that we don't get an extra indentation level here. (Part of suggestion of review 1)
@@ -19321,12 +19550,16 @@ examined_rows= tab->limit; else { - tab->table->file->info(HA_STATUS_VARIABLE); - examined_rows= tab->table->file->stats.records; + //tab->table->file->info(HA_STATUS_VARIABLE);
Remove comment
+ if (!tab->table->pos_in_table_list || + tab->table->is_filled_at_execution()) // temporary, is_filled_at_execution + examined_rows= tab->records; + else + examined_rows= tab->table->file->stats.records;
Shouldn't you call tab->table->file->info(HA_STATUS_VARIABLE) in the last case ? If not, where is it called now?
@@ -19347,7 +19580,7 @@ */ float f= 0.0; if (examined_rows) - f= (float) (100.0 * join->best_positions[i].records_read / + f= (float) (100.0 * tab->records_read/*join->best_positions[i].records_read*/ /
Removed comment as it's not accurate anymore <cut>
diff -urN --exclude='.*' 5.3-noc/sql/sql_show.cc maria-5.3-subqueries-r36-noc/sql/sql_show.cc
--- 5.3-noc/sql/sql_show.cc 2011-01-27 21:39:33.000000000 +0300
+JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables); +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots);
Please add the prototypes in sql_select.h and include this instead of having prototypes to functions in many places.
@@ -6602,14 +6605,17 @@ bool get_schema_tables_result(JOIN *join, enum enum_schema_table_state executed_place) { - JOIN_TAB *tmp_join_tab= join->join_tab+join->tables; + //JOIN_TAB *tmp_join_tab= join->join_tab+join->tables;
Remove comment
THD *thd= join->thd; LEX *lex= thd->lex; bool result= 0; DBUG_ENTER("get_schema_tables_result");
thd->no_warnings_for_error= 1; - for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++) + //for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++)
Remove comment
+ for (JOIN_TAB *tab= first_linear_tab(join, FALSE); + tab; + tab= next_linear_tab(join, tab, FALSE)) { if (!tab->table || !tab->table->pos_in_table_list) break;
+++ maria-5.3-subqueries-r36-noc/sql/sql_test.cc 2011-02-16 14:42:52.000000000 +0300 @@ -166,58 +166,66 @@ void TEST_join(JOIN *join) { - uint i,ref; + uint ref; + int i; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; DBUG_ENTER("TEST_join");
- /* - Assemble results of all the calls to full_name() first, - in order not to garble the tabular output below. - */ - String ref_key_parts[MAX_TABLES]; - for (i= 0; i < join->tables; i++) - { - JOIN_TAB *tab= join->join_tab + i; - for (ref= 0; ref < tab->ref.key_parts; ref++) - { - ref_key_parts[i].append(tab->ref.items[ref]->full_name()); - ref_key_parts[i].append(" "); - } - } - DBUG_LOCK_FILE; VOID(fputs("\nInfo about JOIN\n",DBUG_FILE)); + + while ((jt_range= it++)) { + /* + Assemble results of all the calls to full_name() first, + in order not to garble the tabular output below. + */ + String ref_key_parts[MAX_TABLES]; + for (i= 0; i < (jt_range->end - jt_range->start); i++)
Store (jt_range->end - jt_range->start) in a variable and use it above and below. <cut>
+ for (i= 0; i < (jt_range->end - jt_range->start); i++)
<cut>
+++ maria-5.3-subqueries-r36-noc/sql/table.cc 2011-02-16 14:42:52.000000000 +0300 @@ -5337,6 +5337,12 @@ (parent && parent->children_attached)); }
Add also a comment here what this functions means (copy from table.h)
+ +bool st_table::is_filled_at_execution() +{ + return test(pos_in_table_list->jtbm_subselect); +} +
Regards, Monty
participants (2)
-
Michael Widenius
-
Sergey Petrunya