
Review of: MDEV-36213 Doubled memory usage (11.4.4 <-> 11.4.5) Fixing the code adding MySQL _0900_ collations as _uca1400_ aliases not to perform deep initialization of the corresponding _uca1400_ collations. -----
b. it introduced cyclic dependency between /mysys and /strings - /mysys/charset-def.c depended on /strings/ctype-uca.c - /strings/ctype-uca.c depended on /mysys/charset.c
Please remove the above comment as this patch is not removing the dependencies. Cyclic dependencies between my_sys and strings is not a problem. We always had it and will probably always will. There are 10 files in strings that includes my_sys.h You should think that string manipulations and main character set should be in strings and other things mysys. The separation mainly is to make things easier to find.
MY_CHARSET_LOADER::once_alloc call, which normally points to my_once_alloc().
I think it always points to once_alloc in the server code. conf_to_src.c, which uses malloc, is only used in uca-dump. I think you can change the commit comment to say that in the server all collations are allocated through my_once_alloc(). +++ b/mysql-test/main/ctype_utf8mb4_0900_mem.result +# +# I_S queries for initialized collations should not add memory +# +2416 utf8mb4_uca1400_spanish2_ai_ci +2417 utf8mb4_uca1400_spanish2_ai_cs +2418 utf8mb4_uca1400_spanish2_as_ci +2419 utf8mb4_uca1400_spanish2_as_cs +2420 utf8mb4_uca1400_spanish2_nopad_ai_ci +2421 utf8mb4_uca1400_spanish2_nopad_ai_cs +2422 utf8mb4_uca1400_spanish2_nopad_as_ci +2423 utf8mb4_uca1400_spanish2_nopad_as_cs +<1M utf8mb4_uca1400_spanish2% +270 utf8mb4_es_trad_0900_ai_ci +293 utf8mb4_es_trad_0900_as_cs +<1M utf8mb4_%es_trad_0900% The output is confusing to read. I was expecting memory checks for all rows. Please add # Reading collections before executing selects and # checking memory before the memory checks in the output. +--echo # End of 11.4 tests diff --git a/mysys/charset-def.c b/mysys/charset-def.c index e2e5a747eb5..5c2fac91773 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -184,76 +184,10 @@ extern struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci; #endif /* HAVE_UCA_COLLATIONS */ -static my_bool -my_uca1400_collation_definition_add(MY_CHARSET_LOADER *loader, - my_cs_encoding_t charset_id, - uint tailoring_id, - my_bool nopad, - my_bool secondary_level, - my_bool tertiary_level) -{ - struct charset_info_st *tmp; - uint collation_id= my_uca1400_make_builtin_collation_id(charset_id, - tailoring_id, - nopad, - secondary_level, - tertiary_level); - if (!collation_id) - return FALSE; - if (!(tmp= (struct charset_info_st*) - my_once_alloc(sizeof(CHARSET_INFO),MYF(0)))) - return TRUE; - if (my_uca1400_collation_definition_init(loader, tmp, collation_id)) - return TRUE; - add_compiled_collation(tmp); - return FALSE; -} - It would have been VERY helpful, from a review point of view, if you would first have moved functions to different files in a separate commit and then do the changes. Changes that did something simple and mechanical, that could have been reviewed quickly could also have been in different commits, like adding my_loader to several function parameters. diff --git a/sql/field.cc b/sql/field.cc index 7358b142621..19fd3528bd0 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -2719,7 +2719,7 @@ bool Field_null::is_equal(const Column_definition &new_field) const { DBUG_ASSERT(!compression_method()); return (new_field.type_handler() == type_handler() && - !compare_collations(new_field.charset, field_charset()) && + new_field.charset->eq_collation(field_charset()) && new_field.length == max_display_length()); } Why not keep the old compare_collations() function and have it defined to do new_field.charset->eq_collation(param ?) Then you would not have to change the above function and 7 other places? Less not required changes makes merges easier! I think you should consider using compare_colations() instead if eq_collation() in the new code. The performance and code generated should be identical. index 3650eca61b9..84193ba3b97 100644 --- a/strings/ctype-uca.c @@ -39565,137 +39381,4 @@ LEX_CSTRING my_ci_get_collation_name_uca(CHARSET_INFO *cs, return cs->coll_name; } diff --git a/strings/ctype-uca0900.c b/strings/ctype-uca0900.c new file mode 100644 index 00000000000..11577daed44 --- /dev/null +++ b/strings/ctype-uca0900.c @@ -0,0 +1,209 @@ +/* Copyright (c) 2025, MariaDB Should be MariaDB Corporation <cut> +static my_bool +mysql_uca0900_collation_definition_add(MY_CHARSET_LOADER *loader, + const struct mysql_0900_to_mariadb_1400_mapping *map, + uint alias_id) Fix indentation for the above. Parameters should be on same level as ( As this is a static function, you can just call the function collation_definition_add() +{ + char comment_buffer[MY_CS_COLLATION_NAME_SIZE + 15]; + char alias_buffer[MY_CS_COLLATION_NAME_SIZE + 1]; + char name1400_buffer[MY_CS_COLLATION_NAME_SIZE + 1]; + LEX_CSTRING comment= {comment_buffer, 0}; + LEX_CSTRING alias_name= {alias_buffer, 0}; + LEX_CSTRING name1400= {name1400_buffer, 0}; + LEX_CSTRING utf8mb4= {STRING_WITH_LEN("utf8mb4")}; + uint offset_in_map= alias_id - mysql_0900_collation_start; + uint id1400= mysql_0900_mapping[offset_in_map].collation_id; The above two lines are the same as: uint id1400= map->collation_id; return my_uca1400_collation_alloc_and_init(loader, alias_name, +++ b/strings/ctype-uca0900.h @@ -0,0 +1,47 @@ +#ifndef CTYPE_UCA_0900_H +#define CTYPE_UCA_0900_H +/* Copyright (c) 2025, MariaDB MariaDB Corporation <cut> +++ b/strings/ctype-uca1400.c @@ -0,0 +1,434 @@ +/* Copyright (c) 2025, MariaDB MariaDB Corporation <cut> +#include "ctype-uca1400data.h" Remove extra line +extern struct charset_info_st my_charset_utf8mb3_unicode_520_nopad_ci; +extern struct charset_info_st my_charset_utf8mb3_unicode_520_ci; +extern struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci; +extern struct charset_info_st my_charset_utf8mb4_unicode_520_ci; +extern struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci; +extern struct charset_info_st my_charset_ucs2_unicode_520_ci; +extern struct charset_info_st my_charset_utf16_unicode_520_nopad_ci; +extern struct charset_info_st my_charset_utf16_unicode_520_ci; +extern struct charset_info_st my_charset_utf32_unicode_520_nopad_ci; +extern struct charset_info_st my_charset_utf32_unicode_520_ci; +extern struct charset_info_st my_charset_utf8mb4_turkish_uca_ci; Please add all external declarations in include files. This ensures all components sees exactly the same declarations. (We have had in the past extern declarations in source file that did not match with the real one and caused some not wanted effects). Normally a c/c++ source file (not include file) should never have any extern declarations! <cut> +/* + Make an UCA-14.0.0 full collation name using its id, + then allocate and add the collation. +*/ +static +my_bool my_uca1400_collation_definition_add(MY_CHARSET_LOADER *loader, uint id) +{ + char coll_name_buffer[MY_CS_COLLATION_NAME_SIZE + 1]; + LEX_CSTRING coll_name; + LEX_CSTRING comment= {"",0}; + uca_collation_def_param_t param= my_uca1400_collation_param_by_id(id); + CHARSET_INFO *src= my_uca1400_collation_source(param.cs_id, + param.nopad_flags); + const MY_UCA1400_COLLATION_DEFINITION *def= + &my_uca1400_collation_definitions[param.tailoring_id]; + + DBUG_ASSERT(id); Please add DBUG_ASSERT(id) before first usage of the id. <cut> +my_bool my_uca1400_collation_definitions_add(MY_CHARSET_LOADER *loader) <cut> + for (tertiary_level= 0; tertiary_level < 2; tertiary_level++) + { + uint collation_id= my_uca1400_make_builtin_collation_id(charset_id, + tailoring_id, + (my_bool) nopad, + (my_bool) secondary_level, + (my_bool) tertiary_level); Fix indentation for the above. +++ b/strings/ctype.c <cut> + +LEX_CSTRING my_ci_make_comment_for_alias(char *buffer, size_t buffer_size, + const char *srcname) +{ + LEX_CSTRING res= {buffer, 0}; + DBUG_ASSERT(buffer_size > 0); + res.length= strxnmov(buffer, buffer_size - 1, "Alias for ", srcname, NullS) - + buffer; + return res; +} Above is ONLY used in ctype-uca0900.c Please move it there and make it static. diff --git a/strings/strings_def.h b/strings/strings_def.h index 2c226e890c7..b137cc4af4f 100644 --- a/strings/strings_def.h +++ b/strings/strings_def.h @@ -20,6 +20,7 @@ #undef DBUG_ASSERT_AS_PRINTF #include <my_global.h> /* Define standard vars */ #include "m_string.h" /* Exernal definitions of string functions */ +#include "m_ctype.h" /* We can't use the original DBUG_ASSERT() (which includes _db_flush()) @@ -148,6 +149,16 @@ void my_ci_set_level_flags(struct charset_info_st *cs, uint flags); uint my_casefold_multiply_1(CHARSET_INFO *cs); uint my_casefold_multiply_2(CHARSET_INFO *cs); +my_bool my_ci_eq_collation_generic(CHARSET_INFO *self, CHARSET_INFO *other); + +LEX_CSTRING my_ci_make_comment_for_alias(char *buffer, size_t buffer_size, + const char *srcname); Above is ONLY used in ctype-uca0900.c Please move it there and make it static. <cut> Regards, Monty