Re: [Maria-developers] [Commits] 4f528bb514c: MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results

Hi Varun, I only have input about comments/variable names. Please find it below. On Tue, Mar 24, 2020 at 10:14:38AM +0530, Varun wrote:
revision-id: 4f528bb514c607d848b30119cea135e37e0d9c69 (mariadb-10.5.0-425-g4f528bb514c) parent(s): 1cd8e8213b6019794ca7f01e8276cfd1ba6408cf author: Varun Gupta committer: Varun Gupta timestamp: 2020-03-24 10:13:28 +0530 message:
MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
For JSON_ARRAYAGG with DISTINCT also store the null bytes for the record inside the Unique object
--- mysql-test/main/func_json.result | 4 +- sql/item_jsonfunc.cc | 36 ++++++++++++-- sql/item_jsonfunc.h | 9 ++-- sql/item_sum.cc | 101 ++++++++++++++++++++++++++++++++++----- sql/item_sum.h | 13 +++-- 5 files changed, 133 insertions(+), 30 deletions(-)
diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index a3b5d3b7fac..ed496df8169 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -1452,6 +1452,31 @@ static int append_json_value(String *str, Item *item, String *tmp_val) }
+bool append_json_value(String *str, String *res, Item *item, bool null_value)
There is another append_json_value right above this one. That function has no comments but let's not follow that bad pattern. - Please add a function comment describing this function: @brief should describe this function @detail should describe the difference from the one above. It is very counter-intuitive that the parameter named "res" does not hold the result of this function. Please rename it (to value?) (can it be changed to be const String* ?)
+{ + if (null_value) + return str->append("null", 4); + + if (item->type_handler()->is_bool_type()) + { + return ((*res)[0] == '1') ? + str->append("true", 4) : + str->append("false", 5); + } + + if (item->is_json_type()) + return str->append(res->ptr(), res->length()); + + if (item->result_type() == STRING_RESULT) + { + return str->append("\"", 1) || + st_append_escaped(str, res) || + str->append("\"", 1); + } + return st_append_escaped(str, res);
This made me wonder why something is escaped if it is not enclosed in quotes, but now I see this is just copied from above.
+} + + static int append_json_keyname(String *str, Item *item, String *tmp_val) { String *sv= item->val_str(tmp_val);
diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 5c732095573..a04dbdc19e2 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -3552,6 +3552,54 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1, }
+int group_concat_key_cmp_with_distinct_with_nulls(void* arg, + const void* key1_arg, + const void* key2_arg) +{ + Item_func_group_concat *item_func= (Item_func_group_concat*)arg; + + uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes; + uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes; + + for (uint i= 0; i < item_func->arg_count_field; i++)
This loop is actually not needed, right? JSON_ARRAYAGG only accepts one argument. Please add a note.
+ { + Item *item= item_func->args[i]; + /* + If item is a const item then either get_tmp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + continue; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + */ + Field *field= item->get_tmp_table_field(); + + if (!field) + continue; + + if (field->is_null_in_record((uchar*)key1_arg) && + field->is_null_in_record((uchar*)key2_arg)) + continue; + + if (field->is_null_in_record((uchar*)key1_arg)) + return -1; + + if (field->is_null_in_record((uchar*)key2_arg)) + return 1; + + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp(key1 + offset, key2 + offset); + if (res) + return res; + } + return 0; +} + + /** function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... ) */ @@ -3650,9 +3699,11 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), result->append(*item->separator);
+ bool is_null= false; for (; arg < arg_end; arg++) { String *res; + is_null= false; (1) /* We have to use get_tmp_table_field() instead of real_item()->get_tmp_table_field() because we want the field in @@ -3661,7 +3712,11 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), because it contains both order and arg list fields. */ if ((*arg)->const_item()) + { res= (*arg)->val_str(&tmp); + if ((*arg)->null_value) + is_null= TRUE; (2) Please either use true/false (preferred) or TRUE / FALSE.
+ } else { Field *field= (*arg)->get_tmp_table_field(); ...
diff --git a/sql/item_sum.h b/sql/item_sum.h index b10d9f5974d..908dbe3cce9 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -1897,14 +1899,19 @@ class Item_func_group_concat : public Item_sum */ Item_func_group_concat *original;
+ bool exclude_nulls; + /* Used by Item_func_group_concat and Item_func_json_arrayagg. The latter needs null values but the former doesn't. */ Is this comment relevant here? I think it's better to remove this and instead add a comment for the exclude_nulls above.
- bool add(bool exclude_nulls); + bool add();
friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void* key2); + friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg, + const void* key1, + const void* key2); friend int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2); friend int dump_leaf_key(void* key_arg,
BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
participants (1)
-
Sergey Petrunia