On Tue, Dec 29, 2020 at 7:59 PM Sergey Petrunia <sergey@mariadb.com> wrote:
Hi Varun,

Please find input below.

On Mon, Dec 28, 2020 at 02:34:40PM +0530, varun wrote:
> revision-id: cdc305c8dd89a726e09e5fe70ff890d06609cbfb (mariadb-10.3.21-309-gcdc305c8dd8)
> parent(s): 043bd85a574a88856ab9c6d497e682ed06fe45e9
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2020-12-28 14:12:14 +0530
> message:
>
> MDEV-19620: Changing join_buffer_size causes different results
>
> The scenario here is that query refinement phase decides to use a hash join.
> When the join buffers are allocated in the JOIN::init_join_caches, for a table
> the size exceeds the value for join_buffer_space_limit (which is the limit of the
> space available for all join buffers). When this happens then we disallow join
> buffering for the table, this is done in the revise_cache_usage and set_join_cache_denial.
>
> In this issue the hash key is created on an index for which ref access is possible, so
> when we disallow hash join then instead of switching to REF access we switch to a table
> scan. This is a problem because the equijoin conditions for which a lookup can be made
> are not attached to the table(or are not evaluated for the table). This leads to incorrect
> results.
>
> The fix here would be to switch to using a lookup because it was picked by the join planner
> to be more efficient than the table scan.
>
> ---
>  mysql-test/main/join_cache.result | 138 ++++++++++++++++++++++++++++++++++++++
>  mysql-test/main/join_cache.test   | 105 +++++++++++++++++++++++++++++
>  sql/sql_select.cc                 |  69 +++++++++++++++----
>  sql/sql_select.h                  |   6 ++
>  4 files changed, 304 insertions(+), 14 deletions(-)
>
> diff --git a/mysql-test/main/join_cache.result b/mysql-test/main/join_cache.result
> index 3d1d91df997..e58503f422f 100644
> --- a/mysql-test/main/join_cache.result
> +++ b/mysql-test/main/join_cache.result
> @@ -6128,4 +6128,142 @@ EXPLAIN
>    }
>  }
>  drop table t1,t2,t3;
> +#
> +#  MDEV-19620: Changing join_buffer_size causes different results
> +#
> +SET @save_join_cache_level= @@join_cache_level;
> +SET @save_join_buffer_size= @@join_buffer_size;
> +SET @save_join_buffer_space_limit= @@join_buffer_space_limit;
> +SET join_cache_level = 3;
> +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500)) ENGINE=MyISAM;
> +INSERT INTO t1 VALUES ('1','v'),('7','s');
> +CREATE TABLE t2 (pk3 INT PRIMARY KEY, i3 VARCHAR(300), c3 VARCHAR(500)) ENGINE=MyISAM;
> +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','p'),(3,'1','q');
> +INSERT INTO t2 VALUES (4,'7','g'),(5,'4','p'),(6,'1','q');
> +INSERT INTO t2 VALUES (16,'7','g'),(17,'4','p'),(28,'1','q');
> +#
> +# Hash join + table Scan on t2
> +#
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       
> +1    SIMPLE  t2      ALL     NULL    NULL    NULL    NULL    9       Using where
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +i2   c2      pk3     i3      c3
> +1    v       NULL    NULL    NULL
> +7    s       NULL    NULL    NULL
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       
> +1    SIMPLE  t2      hash_ALL        NULL    #hash#$hj       503     test.t1.c2      9       Using where; Using join buffer (flat, BNLH join)
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +i2   c2      pk3     i3      c3
> +1    v       NULL    NULL    NULL
> +7    s       NULL    NULL    NULL
> +#
> +# HASH join + ref access on t2
> +#
> +ALTER TABLE t2 ADD KEY k1(c3);
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       
> +1    SIMPLE  t2      ref     k1      k1      503     test.t1.c2      2       Using where
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +i2   c2      pk3     i3      c3
> +1    v       NULL    NULL    NULL
> +7    s       NULL    NULL    NULL
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       
> +1    SIMPLE  t2      hash_ALL        k1      #hash#k1        503     test.t1.c2      9       Using where; Using join buffer (flat, BNLH join)
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +i2   c2      pk3     i3      c3
> +1    v       NULL    NULL    NULL
> +7    s       NULL    NULL    NULL
> +#
> +# Hash join + index scan on t2
> +#
> +ALTER TABLE t2 DROP KEY k1;
> +ALTER TABLE t2 ADD KEY k1(i3,c3);
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       
> +1    SIMPLE  t2      index   NULL    k1      806     NULL    9       Using where; Using index
> +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +c3   i3      i2      c2
> +NULL NULL    1       v
> +NULL NULL    7       s
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       
> +1    SIMPLE  t2      hash_index      NULL    #hash#$hj:k1    503:806 test.t1.c2      9       Using where; Using index; Using join buffer (flat, BNLH join)
> +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +c3   i3      i2      c2
> +NULL NULL    1       v
> +NULL NULL    7       s
> +DROP TABLE t1,t2;
> +#
> +# Hash join + range scan on t2
> +#
> +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500));
> +INSERT INTO t1 VALUES ('1','v'),('7','s');
> +CREATE TABLE t2 (pk3 INT PRIMARY KEY, i3 VARCHAR(300), c3 VARCHAR(500), INDEX(i3,c3));
> +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','s'),(3,'1','q');
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       Using where
> +1    SIMPLE  t2      range   i3      i3      303     NULL    2       Using index condition; Using where
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> +i2   c2      pk3     i3      c3
> +7    s       2       4       s
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       Using where
> +1    SIMPLE  t2      hash_range      i3      #hash#$hj:i3    503:303 test.t1.c2      2       Using where; Using join buffer (flat, BNLH join)
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> +i2   c2      pk3     i3      c3
> +7    s       2       4       s
> +DROP TABLE t1,t2;
> +#
> +# Hash join + eq ref access on t2
> +#
> +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500));
> +INSERT INTO t1 VALUES ('1','v'),('7','s');
> +CREATE TABLE t2 (pk3 INT, i3 VARCHAR(300), c3 VARCHAR(500) PRIMARY KEY);
> +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','s'),(3,'1','q');
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       
> +1    SIMPLE  t2      eq_ref  PRIMARY PRIMARY 502     test.t1.c2      1       Using where
> +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +c3   i3      i2      c2
> +NULL NULL    1       v
> +s    4       7       s
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +id   select_type     table   type    possible_keys   key     key_len ref     rows    Extra
> +1    SIMPLE  t1      ALL     NULL    NULL    NULL    NULL    2       
> +1    SIMPLE  t2      hash_ALL        PRIMARY #hash#PRIMARY   502     test.t1.c2      3       Using where; Using join buffer (flat, BNLH join)
> +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +c3   i3      i2      c2
> +NULL NULL    1       v
> +s    4       7       s
> +DROP TABLE t1,t2;
> +SET @save_join_cache_level= @@join_cache_level;
> +SET @save_join_buffer_size= @@join_buffer_size;
> +SET @save_join_buffer_space_limit= @@join_buffer_space_limit;
>  set @@optimizer_switch=@save_optimizer_switch;
> diff --git a/mysql-test/main/join_cache.test b/mysql-test/main/join_cache.test
> index 91339c2cb21..6670c62516b 100644
> --- a/mysql-test/main/join_cache.test
> +++ b/mysql-test/main/join_cache.test
> @@ -4054,5 +4054,110 @@ where

>  drop table t1,t2,t3;

> +--echo #
> +--echo #  MDEV-19620: Changing join_buffer_size causes different results
> +--echo #
> +
> +SET @save_join_cache_level= @@join_cache_level;
> +SET @save_join_buffer_size= @@join_buffer_size;
> +SET @save_join_buffer_space_limit= @@join_buffer_space_limit;
> +SET join_cache_level = 3;
> +
> +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500)) ENGINE=MyISAM;
> +INSERT INTO t1 VALUES ('1','v'),('7','s');
> +CREATE TABLE t2 (pk3 INT PRIMARY KEY, i3 VARCHAR(300), c3 VARCHAR(500)) ENGINE=MyISAM;
> +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','p'),(3,'1','q');
> +INSERT INTO t2 VALUES (4,'7','g'),(5,'4','p'),(6,'1','q');
> +INSERT INTO t2 VALUES (16,'7','g'),(17,'4','p'),(28,'1','q');
> +
> +--echo #
> +--echo # Hash join + table Scan on t2
> +--echo #
> +
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +
> +--echo #
> +--echo # HASH join + ref access on t2
> +--echo #
> +
> +ALTER TABLE t2 ADD KEY k1(c3);
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3=c2);
> +
> +--echo #
> +--echo # Hash join + index scan on t2
> +--echo #
> +ALTER TABLE t2 DROP KEY k1;
> +ALTER TABLE t2 ADD KEY k1(i3,c3);
> +
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +
> +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +DROP TABLE t1,t2;
> +
> +--echo #
> +--echo # Hash join + range scan on t2
> +--echo #
> +
> +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500));
> +INSERT INTO t1 VALUES ('1','v'),('7','s');
> +CREATE TABLE t2 (pk3 INT PRIMARY KEY, i3 VARCHAR(300), c3 VARCHAR(500), INDEX(i3,c3));
> +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','s'),(3,'1','q');
> +
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> +
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> +SELECT * FROM t1 LEFT JOIN t2 ON (c3 = c2) WHERE i3 >= '4';
> +
> +DROP TABLE t1,t2;
> +
> +--echo #
> +--echo # Hash join + eq ref access on t2
> +--echo #
> +
> +CREATE TABLE t1 (i2 VARCHAR(500), c2 VARCHAR(500));
> +INSERT INTO t1 VALUES ('1','v'),('7','s');
> +CREATE TABLE t2 (pk3 INT, i3 VARCHAR(300), c3 VARCHAR(500) PRIMARY KEY);
> +INSERT INTO t2 VALUES (1,'7','g'),(2,'4','s'),(3,'1','q');
> +
> +set join_buffer_size=1024;
> +set join_buffer_space_limit=2048;
> +
> +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +
> +set join_buffer_space_limit=262144;
> +EXPLAIN SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +SELECT c3,i3, i2,c2 FROM t1 LEFT JOIN t2 ON (c3 = c2);
> +DROP TABLE t1,t2;
> +
> +SET @save_join_cache_level= @@join_cache_level;
> +SET @save_join_buffer_size= @@join_buffer_size;
> +SET @save_join_buffer_space_limit= @@join_buffer_space_limit;
> +
>  # The following command must be the last one in the file
>  set @@optimizer_switch=@save_optimizer_switch;
> diff --git a/sql/sql_select.cc b/sql/sql_select.cc
> index 203422f0b43..8b7deb98dc6 100644
> --- a/sql/sql_select.cc
> +++ b/sql/sql_select.cc
> @@ -111,6 +111,7 @@ static bool best_extension_by_limited_search(JOIN *join,
>                                               uint prune_level,
>                                               uint use_cond_selectivity);
>  static uint determine_search_depth(JOIN* join);
> +static void pick_table_access_method(JOIN_TAB *tab);
>  C_MODE_START
>  static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
>  static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
> @@ -10081,6 +10082,7 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
>    j->ref.disable_cache= FALSE;
>    j->ref.null_ref_part= NO_REF_PART;
>    j->ref.const_ref_part_map= 0;
> +  j->ref.not_null_keyparts= 0;
>    keyuse=org_keyuse;

>    store_key **ref_key= j->ref.key_copy;
> @@ -10173,23 +10175,12 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
>      }
>    } /* not ftkey */
>    *ref_key=0;                                // end_marker
> +  j->ref.not_null_keyparts= not_null_keyparts;
>    if (j->type == JT_FT)
>      DBUG_RETURN(0);
> -  ulong key_flags= j->table->actual_key_flags(keyinfo);
>    if (j->type == JT_CONST)
>      j->table->const_table= 1;
> -  else if (!((keyparts == keyinfo->user_defined_key_parts &&
> -              (
> -                (key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
> -                /* Unique key and all keyparts are NULL rejecting */
> -                ((key_flags & HA_NOSAME) && keyparts == not_null_keyparts)
> -              )) ||
> -              /* true only for extended keys */
> -              (keyparts > keyinfo->user_defined_key_parts &&
> -               MY_TEST(key_flags & HA_EXT_NOSAME) &&
> -               keyparts == keyinfo->ext_key_parts)
> -            ) ||
> -            null_ref_key)
> +  else if (!j->is_eq_ref_access()|| null_ref_key)
>    {
>      /* Must read with repeat */
>      j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
> @@ -11582,11 +11573,25 @@ void set_join_cache_denial(JOIN_TAB *join_tab)
>        don't do join buffering for the first table in sjm nest.
>      */
>      join_tab[-1].next_select= sub_select;
> -    if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join())
> +    if ((join_tab->type == JT_REF || join_tab->type ==  JT_HASH) &&
> +         join_tab->is_ref_for_hash_join())
>      {
>        join_tab->type= JT_ALL;
>        join_tab->ref.key_parts= 0;
Do we ever get here if join_tab->type == JT_REF?  I think we don't.
If this is so, please remove it.
Yes we do reach here for a case with BKA join. I had added a DBUG_ASSERT(0) to check that. 

>      }
> +
> +    if (join_tab->type == JT_HASH && !join_tab->is_ref_for_hash_join())
> +    {
> +      join_tab->type= join_tab->is_eq_ref_access() ? JT_EQ_REF : JT_REF;
> +      pick_table_access_method(join_tab);
> +    }
Also please add a comment explaining that the join optimizer's choice was
ref access and we fall back to that.

Got it 
 
> +
> +    if (join_tab->type == JT_HASH_NEXT)
> +    {
> +      join_tab->type = JT_NEXT;
> +      DBUG_ASSERT(join_tab->ref.key_parts ==  0);
> +    }
> +
>      join_tab->join->return_tab= join_tab;
>    }
>  }
> @@ -27954,6 +27959,42 @@ void JOIN_TAB::partial_cleanup()
>  }


> +/*
> +  @brief
> +    Check if the access method for the table is EQ_REF access or not
> +
> +  @retval
> +    TRUE   EQ_REF access
> +    FALSE  Otherwise
> +*/
> +bool JOIN_TAB::is_eq_ref_access()
> +{
> +
> +  KEY *keyinfo;
> +  if (!is_hash_join_key_no(ref.key))
> +    keyinfo= table->key_info + ref.key;
> +  else
> +    keyinfo= hj_key;

This code now looks as if it was possible that hash join is used and also
eq_ref is used.

Is this really possible? I've tried catching an example of this with MTR and I
wasn't able to do so.

Maybe we should just return false if hash join is used and also a comment about
this?

Yes sure, this looks confusing, will think and change this. 
 
> +
> +  uint keyparts= ref.key_parts;
> +  ulong key_flags= table->actual_key_flags(keyinfo);
> +  if ( (keyparts == keyinfo->user_defined_key_parts &&
> +          (
> +            (key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
> +            /* Unique key and all keyparts are NULL rejecting */
> +            ((key_flags & HA_NOSAME) && keyparts == ref.not_null_keyparts)
> +          )
> +        ) ||
> +        /* true only for extended keys */
> +        (keyparts > keyinfo->user_defined_key_parts &&
> +         MY_TEST(key_flags & HA_EXT_NOSAME) &&
> +         keyparts == keyinfo->ext_key_parts)
> +      )
> +    return true;
> +  return false;
> +}
> +
> +
>  /**
>    @} (end of group Query_Optimizer)
>  */

BR
 Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://petrunia.net