Hi, Alexander! On Jan 13, Alexander Barkov wrote:
revision-id: 99e2a49acfc (mariadb-10.5.13-33-g99e2a49acfc) parent(s): 2776635cb98 author: Alexander Barkov committer: Alexander Barkov timestamp: 2022-01-10 18:05:55 +0400 message:
MDEV-27018 IF and COALESCE lose "json" property
Hybrid functions (IF, COALESCE, etc) did not preserve the JSON property from their arguments. The same problem was repeatable for single row subselects.
The problem happened because the method Item::is_json_type() was inconsistently implemented across the Item hierarchy. For example, Item_hybrid_func and Item_singlerow_subselect did not override is_json_type().
Solution:
- Removing Item::is_json_type()
- Implementing specific JSON type handlers: Type_handler_string_json Type_handler_varchar_json Type_handler_tiny_blob_json Type_handler_blob_json Type_handler_medium_blob_json Type_handler_long_blob_json
- Reusing the existing data type infrastructure to pass JSON type handlers across all item types, including classes Item_hybrid_func and Item_singlerow_subselect. Note, these two classes themselves do not need any changes!
- Extending the data type infrastructure so data types can inherit their properties (e.g. aggregation rules) from their base data types. E.g. VARCHAR/JSON acts as VARCHAR, LONGTEXT/JSON acts as LONGTEXT when mixed to a non-JSON data type. This is done by: - adding virtual method Type_handler::type_handler_base() - adding class Recursive_type_pair_iterator - refactoring Type_handler_hybrid_field_type methods aggregate_for_result(), aggregate_for_min_max(), aggregate_for_num_op() to use Recursive_type_pair_iterator.
This change also fixes:
MDEV-27361 Hybrid functions with JSON arguments do not send format metadata
diff --git a/sql/field.cc b/sql/field.cc index 2c768527ced..8fa3bbd538c 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -7277,6 +7277,19 @@ bool Field_longstr::send(Protocol *protocol) }
+const Type_handler *Field_string::type_handler() const +{ + if (is_var_string()) + return &type_handler_var_string;
shouldn't it be after json check?
+ /* + This is a temporary solution and will be fixed soon (in 10.9?). + Type_handler_string_json will provide its own Field_string_json. + */ + if (Type_handler_json_common::has_json_valid_constraint(this)) + return &type_handler_string_json; + return &type_handler_string; +} + /* Copy a string and fill with space */
int Field_string::store(const char *from, size_t length,CHARSET_INFO *cs) diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index ddf5fc32ea4..81003be4656 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -1441,6 +1441,32 @@ longlong Item_func_json_contains_path::val_int() }
+/* + This reproduces behavior according to the former + Item_func_conv_charset::is_json_type() which returned args[0]->is_json_type(). + JSON functions with multiple string input with different character sets + wrap some arguments into Item_func_conv_charset. So the former + Item_func_conv_charset::is_json_type() took the JSON propery from args[0], + i.e. from the original argument before the conversion. + This is probably not always correct because an *explicit* + `CONVERT(arg USING charset)` is actually a general purpose string + expression, not a JSON expression. +*/ +static bool is_json_type(const Item *item) +{ + for ( ; ; ) + { + if (Type_handler_json_common::is_json_type_handler(item->type_handler())) + return true; + const Item_func_conv_charset *func; + if (!(func= dynamic_cast<const Item_func_conv_charset*>(item))) + return false; + item= func->arguments()[0];
can you have nested CONVERT()'s ?
+ } + return false; +} + + static int append_json_value(String *str, Item *item, String *tmp_val) { if (item->type_handler()->is_bool_type()) diff --git a/sql/sql_type_json.h b/sql/sql_type_json.h index 6c4ee8cb2eb..4a394809a06 100644 --- a/sql/sql_type_json.h +++ b/sql/sql_type_json.h @@ -21,18 +21,145 @@ ... +template <class BASE, const Named_type_handler<BASE> &thbase> +class Type_handler_general_purpose_string_to_json: + public BASE, + public Type_handler_json_common { ... + bool Item_hybrid_func_fix_attributes(THD *thd, + const char *name, + Type_handler_hybrid_field_type *hybrid, + Type_all_attributes *attr, + Item **items, uint nitems) + const override + { + if (BASE::Item_hybrid_func_fix_attributes(thd, name, hybrid, attr, + items, nitems)) + return true; + /* + The above call can change the type handler on "hybrid", e.g. + choose a proper BLOB type handler according to the calculated max_length. + Convert general purpose string type handler to its JSON counterpart. + This makes hybrid functions preserve JSON data types, e.g.: + COALESCE(json_expr1, json_expr2) -> JSON + */ + hybrid->set_handler(json_type_handler_from_generic(hybrid->type_handler()));
When this line would change hybrid->type_handler() ?
+ return false; + } }; ..... diff --git a/sql/sql_type.cc b/sql/sql_type.cc index c1801c1ae3e..3b2753d80a6 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -1755,19 +1755,110 @@ const Type_handler *Type_handler_typelib::cast_to_int_type_handler() const
/***************************************************************************/
+class Recursive_type_pair_iterator +{ + const Type_handler *m_a; + const Type_handler *m_b; + uint m_switched_to_base_count; +public: + Recursive_type_pair_iterator(const Type_handler *a, + const Type_handler *b, + uint switched_to_base_count= 0) + :m_a(a), m_b(b), m_switched_to_base_count(switched_to_base_count) + { } + const Type_handler *a() const { return m_a; } + const Type_handler *b() const { return m_b; } + Recursive_type_pair_iterator base() const + { + Recursive_type_pair_iterator res(m_a->type_handler_base(), + m_b->type_handler_base()); + res.m_switched_to_base_count= (res.m_a != NULL) + (res.m_b != NULL); + if (res.m_a == NULL) + res.m_a= m_a; + if (res.m_b == NULL) + res.m_b= m_b; + return res;
that's an unusual semantics, not what I would expect from an iterator. It'd expect it to iterare with it++ or it.next() but not it = it.base()
+ } + bool done() const + { + switch (m_switched_to_base_count) + { + case 2:
I don't think case 2 is possible anymore. ...
+ } + } +}; + + bool Type_handler_hybrid_field_type::aggregate_for_result(const Type_handler *other) { - const Type_handler *hres; - const Type_collection *c; - if (!(c= Type_handler::type_collection_for_aggregation(m_type_handler, other)) || - !(hres= c->aggregate_for_result(m_type_handler, other))) - hres= type_handler_data-> - m_type_aggregator_for_result.find_handler(m_type_handler, other); - if (!hres) - return true; - m_type_handler= hres; - return false; + Recursive_type_pair_iterator it(m_type_handler, other); + for ( ; ; )
do you really still need to do recursive? in what cases?
+ { + const Type_handler *hres; + const Type_collection *c; + if (((c= Type_handler::type_collection_for_aggregation(it.a(), it.b())) && + (hres= c->aggregate_for_result(it.a(), it.b()))) || + (hres= type_handler_data-> + m_type_aggregator_for_result.find_handler(it.a(), it.b()))) + { + m_type_handler= hres; + return false; + } + if ((it= it.base()).done()) + break; + } + return true; }
Regards, Sergei VP of MariaDB Server Engineering and security@mariadb.org