[Commits] 7ea4872: MDEV-21104 Wrong result (extra rows and wrong values) with incremental BNLH
revision-id: 7ea487207e4c8d582161c7c4dc413f29f9e61552 (mariadb-10.2.31-769-g7ea4872) parent(s): 259e5243faa88370bbb890342326a324fb648f7d author: Igor Babaev committer: Igor Babaev timestamp: 2021-03-10 17:26:43 -0800 message: MDEV-21104 Wrong result (extra rows and wrong values) with incremental BNLH This bug could affect multi-way join queries with embedded outer joins that contained a conjunctive IS NULL predicate over a non-nullable column from inner table of an outer join. The predicate could occur in WHERE condition or in ON condition. Due to this bug a wrong result set could be returned by the query. The bug manifested itself only when join buffers were employed for join operations. The problem appeared because - a bug in the function JOIN_CACHE::get_match_flag_by_pos that not always returned proper match flags for embedding outer joins stored together with table rows put a join buffer. - bug in the function JOIN_CACHE::join_matching_records that not always correctly determined that a row from the buffer could be skipped due to applied 'not_exists' optimization. Example: SELECT * FROM t1 LEFT JOIN ((t2 LEFT JOIN t3 ON c = d) JOIN t4) ON b = e WHERE e IS NULL; The patch introduces a new function that finds the match flag for a record from join buffer specifying the buffer where this flag has to be found. The function is called JOIN_CACHE::get_match_flag_by_pos_from_join_buffer(). Now this function rather than JOIN_CACHE::get_match_flag_by_pos() is used in JOIN_CACHE::skip_if_matched() to check whether a record from the join buffer must be ignored when extending the record by null complements. Also the code of the function JOIN_CACHE::skip_if_not_needed_match() has been changed. The function checks whether a record from the join buffer still may produce some useful extensions. Also some clarifying comments has been added. Approved by monty@mariadb.com. --- mysql-test/r/join_cache.result | 53 +++++++++++++++++++++++++ mysql-test/t/join_cache.test | 36 +++++++++++++++++ sql/sql_join_cache.cc | 88 +++++++++++++++++++++++++++++++++++++----- sql/sql_join_cache.h | 11 +++++- 4 files changed, 177 insertions(+), 11 deletions(-) diff --git a/mysql-test/r/join_cache.result b/mysql-test/r/join_cache.result index e41c79a..87c4079 100644 --- a/mysql-test/r/join_cache.result +++ b/mysql-test/r/join_cache.result @@ -6054,4 +6054,57 @@ select f2 from t2,t1 where f2 = 0; f2 drop table t1, t2; set join_buffer_size=@save_join_buffer_size; +# +# MDEV-21104: BNLH used for multi-join query with embedded outer join +# and possible 'not exists' optimization +# +set join_cache_level=4; +CREATE TABLE t1 (a int) ENGINE=MyISAM; +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 (b int, c int) ENGINE=MyISAM; +INSERT INTO t2 VALUES (1,2),(2,4); +CREATE TABLE t3 (d int, KEY(d)) ENGINE=MyISAM; +INSERT INTO t3 VALUES (1),(2); +CREATE TABLE t4 (e int primary key) ENGINE=MyISAM; +INSERT INTO t4 VALUES (1),(2); +ANALYZE TABLE t1,t2,t3,t4; +Table Op Msg_type Msg_text +test.t1 analyze status OK +test.t2 analyze status OK +test.t3 analyze status OK +test.t4 analyze status OK +SELECT * FROM t2 LEFT JOIN t3 ON c = d; +b c d +1 2 2 +2 4 NULL +SELECT * FROM (t2 LEFT JOIN t3 ON c = d ) JOIN t4; +b c d e +1 2 2 1 +2 4 NULL 1 +1 2 2 2 +2 4 NULL 2 +EXPLAIN SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join) +1 SIMPLE t3 hash_index d #hash#d:d 5:5 test.t2.c 2 Using where; Using index; Using join buffer (incremental, BNLH join) +1 SIMPLE t4 hash_index PRIMARY #hash#PRIMARY:PRIMARY 4:4 test.t2.b 2 Using index; Using join buffer (incremental, BNLH join) +SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e; +a b c d e +1 1 2 2 1 +2 1 2 2 1 +1 2 4 NULL 2 +2 2 4 NULL 2 +EXPLAIN SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e +WHERE e IS NULL; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join) +1 SIMPLE t3 hash_index d #hash#d:d 5:5 test.t2.c 2 Using where; Using index; Using join buffer (incremental, BNLH join) +1 SIMPLE t4 hash_index PRIMARY #hash#PRIMARY:PRIMARY 4:4 test.t2.b 2 Using where; Using index; Not exists; Using join buffer (incremental, BNLH join) +SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e +WHERE e IS NULL; +a b c d e +DROP TABLE t1,t2,t3,t4; +set join_cache_level=@save_join_cache_level; set @@optimizer_switch=@save_optimizer_switch; diff --git a/mysql-test/t/join_cache.test b/mysql-test/t/join_cache.test index 9576d59..15cd1e9 100644 --- a/mysql-test/t/join_cache.test +++ b/mysql-test/t/join_cache.test @@ -4014,5 +4014,41 @@ select f2 from t2,t1 where f2 = 0; drop table t1, t2; set join_buffer_size=@save_join_buffer_size; + +--echo # +--echo # MDEV-21104: BNLH used for multi-join query with embedded outer join +--echo # and possible 'not exists' optimization +--echo # + +set join_cache_level=4; + +CREATE TABLE t1 (a int) ENGINE=MyISAM; +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 (b int, c int) ENGINE=MyISAM; +INSERT INTO t2 VALUES (1,2),(2,4); +CREATE TABLE t3 (d int, KEY(d)) ENGINE=MyISAM; +INSERT INTO t3 VALUES (1),(2); +CREATE TABLE t4 (e int primary key) ENGINE=MyISAM; +INSERT INTO t4 VALUES (1),(2); +ANALYZE TABLE t1,t2,t3,t4; + +SELECT * FROM t2 LEFT JOIN t3 ON c = d; +SELECT * FROM (t2 LEFT JOIN t3 ON c = d ) JOIN t4; + +let $q1= +SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e; +eval EXPLAIN $q1; +eval $q1; + +let $q2= +SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e + WHERE e IS NULL; +eval EXPLAIN $q2; +eval $q2; + +DROP TABLE t1,t2,t3,t4; + +set join_cache_level=@save_join_cache_level; + # The following command must be the last one in the file set @@optimizer_switch=@save_optimizer_switch; diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 1dfc938..594094a 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -1649,7 +1649,7 @@ void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr) } -/* +/* Get the match flag from the referenced record: the default implementation SYNOPSIS @@ -1661,6 +1661,7 @@ void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr) get the match flag for the record pointed by the reference at the position rec_ptr. If the match flag is placed in one of the previous buffers the function first reaches the linked record fields in this buffer. + The function returns the value of the first encountered match flag. RETURN VALUE match flag for the record at the position rec_ptr @@ -1685,6 +1686,39 @@ enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr) /* + Get the match flag for the referenced record from specified join buffer + + SYNOPSIS + get_match_flag_by_pos_from_join_buffer() + rec_ptr position of the first field of the record in the join buffer + tab join table with join buffer where to look for the match flag + + DESCRIPTION + This default implementation of the get_match_flag_by_pos_from_join_buffer + method gets the match flag for the record pointed by the reference at the + position rec_ptr from the join buffer attached to the join table tab. + + RETURN VALUE + match flag for the record at the position rec_ptr from the join + buffer attached to the table tab. +*/ + +enum JOIN_CACHE::Match_flag +JOIN_CACHE::get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr, + JOIN_TAB *tab) +{ + DBUG_ASSERT(tab->cache && tab->cache->with_match_flag); + for (JOIN_CACHE *cache= this; ; ) + { + if (cache->join_tab == tab) + return (enum Match_flag) rec_ptr[0]; + cache= cache->prev_cache; + rec_ptr= cache->get_rec_ref(rec_ptr); + } +} + + +/* Calculate the increment of the auxiliary buffer for a record write SYNOPSIS @@ -1954,6 +1988,10 @@ bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy, If the record is skipped the value of 'pos' is set to point to the position right after the record. + NOTE + Currently this function is called only when generating null complemented + records for outer joins (=> only when join_tab->first_unmatched != NULL). + RETURN VALUE TRUE the match flag is set to MATCH_FOUND and the record has been skipped FALSE otherwise @@ -1966,7 +2004,9 @@ bool JOIN_CACHE::skip_if_matched() if (prev_cache) offset+= prev_cache->get_size_of_rec_offset(); /* Check whether the match flag is MATCH_FOUND */ - if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND) + if (get_match_flag_by_pos_from_join_buffer(pos+offset, + join_tab->first_unmatched) == + MATCH_FOUND) { pos+= size_of_rec_len + get_rec_length(pos); return TRUE; @@ -1983,13 +2023,23 @@ bool JOIN_CACHE::skip_if_matched() DESCRIPTION This default implementation of the virtual function skip_if_not_needed_match - skips the next record from the join buffer if its match flag is not - MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the - first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE - and join_tab is the first inner table of an outer join. + skips the next record from the join when generating join extensions + for the records in the join buffer depending on the value of the match flag. + - In the case of a semi-nest the match flag may be in two states + {MATCH_NOT_FOUND, MATCH_FOUND}. The record is skipped if the flag is set + to MATCH_FOUND. + - In the case of a outer join nest when not_exists optimization is applied + the match may be in three states {MATCH_NOT_FOUND, MATCH_IMPOSSIBLE, + MATCH_FOUND. The record is skipped if the flag is set to MATCH_FOUND or + to MATCH_IMPOSSIBLE. + If the record is skipped the value of 'pos' is set to point to the position right after the record. + NOTE + Currently the function is called only when generating non-null complemented + extensions for records in the join buffer. + RETURN VALUE TRUE the record has to be skipped FALSE otherwise @@ -2000,11 +2050,19 @@ bool JOIN_CACHE::skip_if_not_needed_match() DBUG_ASSERT(with_length); enum Match_flag match_fl; uint offset= size_of_rec_len; + bool skip= FALSE; if (prev_cache) offset+= prev_cache->get_size_of_rec_offset(); - if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND && - (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) ) + if (!join_tab->check_only_first_match()) + return FALSE; + + match_fl= get_match_flag_by_pos(pos+offset); + skip= join_tab->first_sj_inner_tab ? + match_fl == MATCH_FOUND : // the case of semi-join + match_fl != MATCH_NOT_FOUND; // the case of outer-join + + if (skip) { pos+= size_of_rec_len + get_rec_length(pos); return TRUE; @@ -2104,7 +2162,14 @@ enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last) goto finish; } join_tab->not_null_compl= FALSE; - /* Prepare for generation of null complementing extensions */ + /* + Prepare for generation of null complementing extensions. + For all inner tables of the outer join operation for which + regular matches have been just found the field 'first_unmatched' + is set to point the the first inner table. After all null + complement rows are generated for this outer join this field + is set back to NULL. + */ for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) tab->first_unmatched= join_tab->first_inner; } @@ -2221,7 +2286,10 @@ enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last) int error; enum_nested_loop_state rc= NESTED_LOOP_OK; join_tab->table->null_row= 0; - bool check_only_first_match= join_tab->check_only_first_match(); + bool check_only_first_match= + join_tab->check_only_first_match() && + (!join_tab->first_inner || // semi-join case + join_tab->first_inner == join_tab->first_unmatched); // outer join case bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); DBUG_ENTER("JOIN_CACHE::join_matching_records"); diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h index 1cbc6ac..4569910 100644 --- a/sql/sql_join_cache.h +++ b/sql/sql_join_cache.h @@ -206,7 +206,9 @@ class JOIN_CACHE :public Sql_alloc /* This flag indicates that records written into the join buffer contain - a match flag field. The flag must be set by the init method. + a match flag field. The flag must be set by the init method. + Currently any implementation of the virtial init method calls + the function JOIN_CACHE::calc_record_fields() to set this flag. */ bool with_match_flag; /* @@ -646,6 +648,13 @@ class JOIN_CACHE :public Sql_alloc /* Shall return the value of the match flag for the positioned record */ virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr); + /* + Shall return the value of the match flag for the positioned record + from the join buffer attached to the specified table + */ + virtual enum Match_flag + get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr, JOIN_TAB *tab); + /* Shall return the position of the current record */ virtual uchar *get_curr_rec() { return curr_rec_pos; }
participants (1)
-
IgorBabaev