Hi Oleksandr,
> From 660e9eefd7c5c5a2b083573b8185be8b5748b0de Mon Sep 17 00:00:00 2001
> From: Oleksandr Byelkin <sanja(a)mariadb.com>
> Date: Thu, 19 Jun 2025 14:40:03 +0200
> Subject: [PATCH] MDEV-36866 Oracle outer join syntax (+): query with checking
> for null of non-null column uses wrong query plan and returns wrong result
> Recalculate maybe_null for parts witch was fixed before converting
s/witch/which/
> oracle joins.
> ---
> [... 97 lines elided]
> diff --git a/sql/item.h b/sql/item.h
> index 345d99d43cc..5b4ca045cc9 100644
> --- a/sql/item.h
> +++ b/sql/item.h
> @@ -2292,6 +2292,7 @@ class Item :public Value_source,
> return 0;
> }
> virtual bool ora_join_processor(void *arg) { return 0; }
> + virtual bool recalc_maybe_null_processor(void *arg) { return 0; }
Can you add a comment here? Something like "update maybe_nulls after
converting oracle (+) outer joins"
> virtual bool remove_ora_join_processor(void *arg)
> {
> with_flags&= ~item_with_t::ORA_JOIN;
> @@ -2905,6 +2906,15 @@ class Item_args
> }
> return false;
> }
> + bool arg_check_maybe_null()
> + {
> + for (uint i= 0; i < arg_count; i++)
> + {
> + if (args[i]->maybe_null())
> + return true;
> + }
> + return false;
> + }
I suggest rename this function to some_maybe_null, to mean at least one
of the args is maybe_null (some vs. all)
> bool transform_args(THD *thd, Item_transformer transformer, uchar *arg);
> void propagate_equal_fields(THD *, const Item::Context &, COND_EQUAL *);
> bool excl_dep_on_table(table_map tab_map)
> @@ -3945,6 +3955,11 @@ class Item_field :public Item_ident,
> return 0;
> }
> bool ora_join_processor(void *arg) override;
> + bool recalc_maybe_null_processor(void *arg) override
> + {
> + set_maybe_null(field->maybe_null());
> + return 0;
> + }
> bool check_ora_join(Item **reference, bool outer_ref_fixed);
> void cleanup() override;
> Item_equal *get_item_equal() override { return item_equal; }
> @@ -5887,6 +5902,11 @@ class Item_func_or_sum: public Item_result_field,
> return true;
> return (this->*processor)(arg);
> }
> + bool recalc_maybe_null_processor(void *arg) override
> + {
> + set_maybe_null(arg_check_maybe_null());
> + return 0;
> + }
In the testcase, we come across this method for the item "t2.a is null"
(aka "isnull(t2.a)"). And here it sets its maybe_null to true. But
isnull/is null returns either 0 or 1, so can never have value NULL. Is
it ok to set its maybe_null to true then? How about add an
Item_func_isnull::recalc_maybe_null_processor() that simply does
set_maybe_null(false)?
(rr) p this
$18 = (Item_func_isnull * const) 0x7f9e1801a000
(rr) p arg_check_maybe_null()
$19 = true
(rr) dbp this
$20 = 0x5602a0000700 <dbug_item_print_buf> "t2.a is null"
> /*
> Built-in schema, e.g. mariadb_schema, oracle_schema, maxdb_schema
> */
> @@ -6216,6 +6236,11 @@ class Item_ref :public Item_ident
> return cleanup_processor(arg);
> }
> bool ora_join_processor(void *arg) override;
> + bool recalc_maybe_null_processor(void *arg) override
> + {
> + set_maybe_null((*ref)->maybe_null());
> + return 0;
> + }
> Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg) override
> { return (*ref)->field_transformer_for_having_pushdown(thd, arg); }
> };
Why only implement the processor for these four subclasses only
(Item_field, Item_func_or_sum, Item_ref, Item_in_subselect)? Is it
because oracle (+) only affects these types of items?
> diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
> index 86c8807a75a..c65d5253c34 100644
> --- a/sql/item_cmpfunc.h
> +++ b/sql/item_cmpfunc.h
> @@ -3341,7 +3341,7 @@ class Item_cond :public Item_bool_func
> void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
> List<Item> &fields, uint flags) override;
> friend int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
> - COND **conds);
> + COND **conds, List<Item> *all_fields);
> void copy_andor_arguments(THD *thd, Item_cond *item);
> bool walk(Item_processor processor, void *arg,
> item_walk_flags flags) override;
> diff --git a/sql/item_subselect.h b/sql/item_subselect.h
> index cb58c62196c..e2c160b0ae3 100644
> --- a/sql/item_subselect.h
> +++ b/sql/item_subselect.h
> @@ -800,7 +800,12 @@ class Item_in_subselect :public Item_exists_subselect
> }
> return FALSE;
> }
> -
> + bool recalc_maybe_null_processor(void *arg) override
> + {
> + if (left_expr->maybe_null())
> + set_maybe_null(true);
> + return 0;
> + }
Can you add a test that covers the Item_in_subselect method here?
> friend class Item_ref_null_helper;
> friend class Item_is_not_null_test;
> friend class Item_in_optimizer;
> diff --git a/sql/sql_base.cc b/sql/sql_base.cc
> index e4c914f8e7c..7f4cff4e253 100644
> --- a/sql/sql_base.cc
> +++ b/sql/sql_base.cc
> @@ -8913,10 +8913,11 @@ bool setup_on_expr(THD *thd, TABLE_LIST *table, bool is_update)
> [... 53 lines elided]
> #endif /* SQL_BASE_INCLUDED */
> diff --git a/sql/sql_oracle_outer_join.cc b/sql/sql_oracle_outer_join.cc
> index 524dd3391db..84988d0147a 100644
> --- a/sql/sql_oracle_outer_join.cc
> +++ b/sql/sql_oracle_outer_join.cc
> @@ -744,7 +744,8 @@ static bool init_tables_array(TABLE_LIST *tables,
> bool setup_oracle_join(THD *thd, Item **conds,
> TABLE_LIST *tables,
> SQL_I_List<TABLE_LIST> &select_table_list,
> - List<TABLE_LIST> *select_join_list)
> + List<TABLE_LIST> *select_join_list,
> + List<Item> *all_fields)
> {
> DBUG_ENTER("setup_oracle_join");
> uint n_tables= select_table_list.elements;
> @@ -964,6 +965,9 @@ bool setup_oracle_join(THD *thd, Item **conds,
> {
> DBUG_ASSERT(curr->on_conds.elements > 0);
> curr->table->outer_join|=JOIN_TYPE_LEFT;
> + // it is done after setting table map so maybe_null also set
I assume "setting table map" here refers to a call to setup_table_map().
If so, I suggest that the comment be made more readable, such as:
// update maybe_null which was previously set in setup_table_map()
> + if (curr->table->table)
> + curr->table->table->maybe_null= JOIN_TYPE_LEFT;
> if (curr->on_conds.elements == 1)
> {
> curr->table->on_expr= curr->on_conds.head();
> @@ -1028,6 +1032,34 @@ bool setup_oracle_join(THD *thd, Item **conds,
> if (add_conditions_to_where(thd, conds, std::move(return_to_where)))
> DBUG_RETURN(TRUE);
> + // refresh nulability of already fixed parts (WHERE, SELECT list, moved ON)
Thoughts:
- s/nulability/nullability/
- all_fields is not just the SELECT list.
- conds[0] = WHERE + moved ON
So I suggest update the comment to be something more accurate like:
// refresh nullability of already fixed parts: WHERE and moved ON,
// all_fields including the SELECT list, and original ON
> + if (conds[0])
> + {
> + conds[0]->update_used_tables();
> + conds[0]->walk(&Item::recalc_maybe_null_processor, 0, 0);
> + }
> + if (all_fields)
> + {
> + List_iterator<Item> it(*all_fields);
> + Item *item;
> + while((item= it++))
> + {
> + item->update_used_tables();
> + item->walk(&Item::recalc_maybe_null_processor, 0, 0);
> + }
> + }
> + for (i= 0; i < n_tables; i++)
> + {
> + // we have to count becaust this lists are included in other lists
> + List_iterator<Item> it(*all_fields);
> + Item *item;
> + for (uint j= 0; j < tab[i].on_conds.elements && (item= it++); j++)
> + {
> + item->update_used_tables();
> + item->walk(&Item::recalc_maybe_null_processor, 0, 0);
> + }
> + }
> +
> DBUG_RETURN(FALSE);
> }
Are the three calls to Item::update_used_tables() needed? I commented
them out and nothing bad happened to the test.
> diff --git a/sql/sql_select.cc b/sql/sql_select.cc
> index d3bfcb8475c..9473746c150 100644
> --- a/sql/sql_select.cc
> +++ b/sql/sql_select.cc
> @@ -954,7 +954,7 @@ setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
> DBUG_ENTER("setup_without_group");
> thd->lex->allow_sum_func.clear_bit(select->nest_level);
> - res= setup_conds(thd, tables, leaves, conds);
> + res= setup_conds(thd, tables, leaves, conds, &all_fields);
> /* it's not wrong to have non-aggregated columns in a WHERE */
> select->set_non_agg_field_used(saved_non_agg_field_used);
> diff --git a/sql/table.cc b/sql/table.cc
> index 8717c740c4f..1ee7184bdb8 100644
> --- a/sql/table.cc
> +++ b/sql/table.cc
> @@ -10569,7 +10569,7 @@ bool TR_table::query(ulonglong trx_id)
> Item *field= newx Item_field(thd, &slex.context, (*this)[FLD_TRX_ID]);
> Item *value= newx Item_int(thd, trx_id);
> COND *conds= newx Item_func_eq(thd, field, value);
> - if (unlikely((error= setup_conds(thd, this, dummy, &conds))))
> + if (unlikely((error= setup_conds(thd, this, dummy, &conds, NULL))))
> return false;
> select= make_select(table, 0, 0, conds, NULL, 0, &error);
> if (unlikely(error || !select))
> @@ -10608,7 +10608,7 @@ bool TR_table::query(MYSQL_TIME &commit_time, bool backwards)
> conds= newx Item_func_ge(thd, field, value);
> else
> conds= newx Item_func_le(thd, field, value);
> - if (unlikely((error= setup_conds(thd, this, dummy, &conds))))
> + if (unlikely((error= setup_conds(thd, this, dummy, &conds, NULL))))
> return false;
> // FIXME: (performance) force index 'commit_timestamp'
> select= make_select(table, 0, 0, conds, NULL, 0, &error);
> --
I'm not sure what the TR_table is about - from git blame it looks like
it has something to do with versioning. Is the oracle (+) join relevant
here? If not (e.g. these tables are not accessible by users) then I
suppose it's ok to pass NULL here.
> 2.39.2
Best,
Yuchen