[Commits] 4f528bb514c: MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
revision-id: 4f528bb514c607d848b30119cea135e37e0d9c69 (mariadb-10.5.0-425-g4f528bb514c) parent(s): 1cd8e8213b6019794ca7f01e8276cfd1ba6408cf author: Varun Gupta committer: Varun Gupta timestamp: 2020-03-24 10:13:28 +0530 message: MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results For JSON_ARRAYAGG with DISTINCT also store the null bytes for the record inside the Unique object --- mysql-test/main/func_json.result | 4 +- sql/item_jsonfunc.cc | 36 ++++++++++++-- sql/item_jsonfunc.h | 9 ++-- sql/item_sum.cc | 101 ++++++++++++++++++++++++++++++++++----- sql/item_sum.h | 13 +++-- 5 files changed, 133 insertions(+), 30 deletions(-) diff --git a/mysql-test/main/func_json.result b/mysql-test/main/func_json.result index be6ea1a7b61..9a60f395d0d 100644 --- a/mysql-test/main/func_json.result +++ b/mysql-test/main/func_json.result @@ -1160,13 +1160,13 @@ JSON_ARRAYAGG(DISTINCT a) [1,2,3] SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1; JSON_ARRAYAGG(DISTINCT b) -["Hello","World","This","Will","Work","!",null] +[null,"!","Hello","This","Will","Work","World"] SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1; JSON_ARRAYAGG(DISTINCT a LIMIT 2) [1,2] SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1; JSON_ARRAYAGG(DISTINCT b LIMIT 2) -["Hello","World"] +[null,"!"] # # JSON aggregation # diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index a3b5d3b7fac..ed496df8169 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -1452,6 +1452,31 @@ static int append_json_value(String *str, Item *item, String *tmp_val) } +bool append_json_value(String *str, String *res, Item *item, bool null_value) +{ + if (null_value) + return str->append("null", 4); + + if (item->type_handler()->is_bool_type()) + { + return ((*res)[0] == '1') ? + str->append("true", 4) : + str->append("false", 5); + } + + if (item->is_json_type()) + return str->append(res->ptr(), res->length()); + + if (item->result_type() == STRING_RESULT) + { + return str->append("\"", 1) || + st_append_escaped(str, res) || + str->append("\"", 1); + } + return st_append_escaped(str, res); +} + + static int append_json_keyname(String *str, Item *item, String *tmp_val) { String *sv= item->val_str(tmp_val); @@ -3621,12 +3646,13 @@ int Arg_comparator::compare_e_json_str_basic(Item *j, Item *s) } -String* Item_func_json_arrayagg::convert_to_json(Item *item, String *res) +bool Item_func_json_arrayagg::convert_to_json(String *str, String *res, + Item *item, bool null_value) { - String tmp; - res->length(0); - append_json_value(res, item, &tmp); - return res; + str->length(0); + if (append_json_value(str, res, item, null_value)) + return true; + return false; } diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h index 44f9e8146c2..c155ed50396 100644 --- a/sql/item_jsonfunc.h +++ b/sql/item_jsonfunc.h @@ -545,6 +545,7 @@ class Item_func_json_arrayagg : public Item_func_group_concat Item_func_group_concat(thd, context_arg, is_distinct, is_select, is_order, is_separator, limit_clause, row_limit, offset_limit) { + exclude_nulls= FALSE; } Item_func_json_arrayagg(THD *thd, Item_func_json_arrayagg *item); bool is_json_type() { return true; } @@ -552,14 +553,10 @@ class Item_func_json_arrayagg : public Item_func_group_concat const char *func_name() const { return "json_arrayagg("; } enum Sumfunctype sum_func() const {return JSON_ARRAYAGG_FUNC;} - String* convert_to_json(Item *item, String *str); + bool convert_to_json(String *str, String *res, Item *item, bool null_value); String* val_str(String *str); - /* Overrides Item_func_group_concat::add() */ - bool add() - { - return Item_func_group_concat::add(false); - } + Item *get_copy(THD *thd) { return get_item_copy<Item_func_json_arrayagg>(thd, this); } }; diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 5c732095573..a04dbdc19e2 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -3518,7 +3518,7 @@ String *Item_sum_udf_str::val_str(String *str) */ extern "C" -int group_concat_key_cmp_with_distinct(void* arg, const void* key1, +int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void* key2) { Item_func_group_concat *item_func= (Item_func_group_concat*)arg; @@ -3552,6 +3552,54 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1, } +int group_concat_key_cmp_with_distinct_with_nulls(void* arg, + const void* key1_arg, + const void* key2_arg) +{ + Item_func_group_concat *item_func= (Item_func_group_concat*)arg; + + uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes; + uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes; + + for (uint i= 0; i < item_func->arg_count_field; i++) + { + Item *item= item_func->args[i]; + /* + If item is a const item then either get_tmp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + continue; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + */ + Field *field= item->get_tmp_table_field(); + + if (!field) + continue; + + if (field->is_null_in_record((uchar*)key1_arg) && + field->is_null_in_record((uchar*)key2_arg)) + continue; + + if (field->is_null_in_record((uchar*)key1_arg)) + return -1; + + if (field->is_null_in_record((uchar*)key2_arg)) + return 1; + + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp(key1 + offset, key2 + offset); + if (res) + return res; + } + return 0; +} + + /** function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... ) */ @@ -3621,6 +3669,7 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), uint max_length= (uint)table->in_use->variables.group_concat_max_len; String tmp((char *)table->record[1], table->s->reclength, default_charset_info); + const bool exclude_nulls= item->exclude_nulls; String tmp2; uchar *key= (uchar *) key_arg; String *result= &item->result; @@ -3650,9 +3699,11 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), result->append(*item->separator); + bool is_null= false; for (; arg < arg_end; arg++) { String *res; + is_null= false; /* We have to use get_tmp_table_field() instead of real_item()->get_tmp_table_field() because we want the field in @@ -3661,7 +3712,11 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), because it contains both order and arg list fields. */ if ((*arg)->const_item()) + { res= (*arg)->val_str(&tmp); + if ((*arg)->null_value) + is_null= TRUE; + } else { Field *field= (*arg)->get_tmp_table_field(); @@ -3670,7 +3725,11 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), uint offset= (field->offset(field->table->record[0]) - table->s->null_bytes); DBUG_ASSERT(offset < table->s->reclength); - res= field->val_str(&tmp, key + offset); + if (!exclude_nulls && field->is_null_in_record(key)) + is_null= true; + res= field->val_str(&tmp, + key + offset + (exclude_nulls ? 0 : + table->s->null_bytes +offset)); } else res= (*arg)->val_str(&tmp); @@ -3684,10 +3743,12 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), appending it to the result */ Item_func_json_arrayagg *arrayagg= (Item_func_json_arrayagg *) item_arg; - res= arrayagg->convert_to_json(*arg, res); + if (arrayagg->convert_to_json(&tmp2, res, *arg, is_null)) + return 1; + result->append(tmp2); } - - result->append(*res); + else + result->append(*res); } } @@ -3753,7 +3814,7 @@ Item_func_group_concat(THD *thd, Name_resolution_context *context_arg, warning_for_row(FALSE), force_copy_fields(0), row_limit(NULL), offset_limit(NULL), limit_clause(limit_clause), - copy_offset_limit(0), copy_row_limit(0), original(0) + copy_offset_limit(0), copy_row_limit(0), original(0), exclude_nulls(TRUE) { Item *item_select; Item **arg_ptr; @@ -3822,7 +3883,8 @@ Item_func_group_concat::Item_func_group_concat(THD *thd, force_copy_fields(item->force_copy_fields), row_limit(item->row_limit), offset_limit(item->offset_limit), limit_clause(item->limit_clause),copy_offset_limit(item->copy_offset_limit), - copy_row_limit(item->copy_row_limit), original(item) + copy_row_limit(item->copy_row_limit), original(item), + exclude_nulls(item->exclude_nulls) { quick_group= item->quick_group; result.set_charset(collation.collation); @@ -3994,7 +4056,7 @@ bool Item_func_group_concat::repack_tree(THD *thd) */ #define GCONCAT_REPACK_FACTOR (1 << 10) -bool Item_func_group_concat::add(bool exclude_nulls) +bool Item_func_group_concat::add() { if (always_null && exclude_nulls) return 0; @@ -4020,6 +4082,14 @@ bool Item_func_group_concat::add(bool exclude_nulls) if (tree && (res= field->val_str(&buf))) row_str_len+= res->length(); } + else + { + /* + should not reach here, we create temp table for all the arguments of + the group_concat function + */ + DBUG_ASSERT(0); + } } null_value= FALSE; @@ -4029,7 +4099,9 @@ bool Item_func_group_concat::add(bool exclude_nulls) { /* Filter out duplicate rows. */ uint count= unique_filter->elements_in_tree(); - unique_filter->unique_add(table->record[0] + table->s->null_bytes); + unique_filter->unique_add(exclude_nulls ? + table->record[0] + table->s->null_bytes : + table->record[0]); if (count == unique_filter->elements_in_tree()) row_eligible= FALSE; } @@ -4055,7 +4127,9 @@ bool Item_func_group_concat::add(bool exclude_nulls) row to the output buffer here. That will be done in val_str. */ if (row_eligible && !warning_for_row && (!tree && !distinct)) - dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this); + dump_leaf_key(exclude_nulls ? + table->record[0] + table->s->null_bytes : + table->record[0], 1, this); return 0; } @@ -4258,9 +4332,12 @@ bool Item_func_group_concat::setup(THD *thd) } if (distinct) - unique_filter= new Unique(group_concat_key_cmp_with_distinct, + unique_filter= new Unique((exclude_nulls ? + group_concat_key_cmp_with_distinct : + group_concat_key_cmp_with_distinct_with_nulls), (void*)this, - tree_key_length, + tree_key_length + (exclude_nulls ? 0: + table->s->null_bytes), ram_limitation(thd)); if ((row_limit && row_limit->cmp_type() != INT_RESULT) || (offset_limit && offset_limit->cmp_type() != INT_RESULT)) diff --git a/sql/item_sum.h b/sql/item_sum.h index b10d9f5974d..908dbe3cce9 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -1841,6 +1841,8 @@ class Item_sum_udf_str :public Item_sum_double C_MODE_START int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void* key2); +int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1, + const void* key2); int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2); int dump_leaf_key(void* key_arg, @@ -1897,14 +1899,19 @@ class Item_func_group_concat : public Item_sum */ Item_func_group_concat *original; + bool exclude_nulls; + /* Used by Item_func_group_concat and Item_func_json_arrayagg. The latter needs null values but the former doesn't. */ - bool add(bool exclude_nulls); + bool add(); friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void* key2); + friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg, + const void* key1, + const void* key2); friend int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2); friend int dump_leaf_key(void* key_arg, @@ -1940,10 +1947,6 @@ class Item_func_group_concat : public Item_sum return &type_handler_varchar; } void clear(); - bool add() - { - return add(true); - } void reset_field() { DBUG_ASSERT(0); } // not used void update_field() { DBUG_ASSERT(0); } // not used bool fix_fields(THD *,Item **);
participants (1)
-
Varun