Re: [Maria-developers] 0fc221a012d: MDEV-8334: Rename utf8 to utf8mb3
Hi, Rucha! Thanks for combining all commits in one and a cleanup. See comments below. On Mar 18, Rucha Deodhar wrote:
revision-id: 0fc221a012d (mariadb-10.5.2-402-g0fc221a012d) parent(s): a1542f8a573 author: Rucha Deodhar <rucha.deodhar@mariadb.com> committer: Rucha Deodhar <rucha.deodhar@mariadb.com> timestamp: 2021-03-16 23:31:12 +0530 message:
MDEV-8334: Rename utf8 to utf8mb3
This patch changes the main name of 3 byte character set from utf8 to utf8mb3. New old_mode UTF8_IS_UTF8MB3 is added and set TRUE by default, so that utf8 would mean utf8mb3. If not set, utf8 would mean utf8mb4.
diff --git a/client/mysqlcheck.c b/client/mysqlcheck.c index fb3103a318d..a8990d8cb6b 100644 --- a/client/mysqlcheck.c +++ b/client/mysqlcheck.c @@ -437,7 +437,7 @@ static int get_options(int *argc, char ***argv) if (!default_charset) { if (opt_fix_db_names || opt_fix_table_names) - default_charset= (char*) "utf8"; + default_charset= (char*) "utf8mb3";
why not to keep it utf8?
else default_charset= (char*) MYSQL_AUTODETECT_CHARSET_NAME; } diff --git a/client/mysqldump.c b/client/mysqldump.c index 7c363973da2..900456b31b2 100644 --- a/client/mysqldump.c +++ b/client/mysqldump.c @@ -3235,7 +3235,7 @@ static uint get_table_structure(const char *table, const char *db, char *table_t { fprintf(sql_file, "/*!40101 SET @saved_cs_client = @@character_set_client */;\n" - "/*!40101 SET character_set_client = utf8 */;\n" + "/*!40101 SET character_set_client = utf8mb3 */;\n"
why not to keep it utf8?
"%s%s;\n" "/*!40101 SET character_set_client = @saved_cs_client */;\n", is_log_table ? "CREATE TABLE IF NOT EXISTS " : "", diff --git a/extra/mariabackup/backup_mysql.cc b/extra/mariabackup/backup_mysql.cc index 3083326a7e0..c62252257b9 100644 --- a/extra/mariabackup/backup_mysql.cc +++ b/extra/mariabackup/backup_mysql.cc @@ -117,7 +117,7 @@ xb_mysql_connect() mysql_options(connection, MYSQL_PLUGIN_DIR, xb_plugin_dir); } mysql_options(connection, MYSQL_OPT_PROTOCOL, &opt_protocol); - mysql_options(connection,MYSQL_SET_CHARSET_NAME, "utf8"); + mysql_options(connection,MYSQL_SET_CHARSET_NAME, "utf8mb3");
why not to keep it utf8?
msg("Connecting to MySQL server host: %s, user: %s, password: %s, " "port: %s, socket: %s", opt_host ? opt_host : "localhost", @@ -1506,7 +1506,7 @@ write_xtrabackup_info(MYSQL *connection, const char * filename, bool history, "incremental ENUM('Y', 'N') DEFAULT NULL," "format ENUM('file', 'tar', 'xbstream') DEFAULT NULL," "compressed ENUM('Y', 'N') DEFAULT NULL" - ") CHARACTER SET utf8 ENGINE=innodb", false); + ") CHARACTER SET utf8mb3 ENGINE=innodb", false);
why not to keep it utf8?
#define ESCAPE_BOOL(expr) ((expr)?"'Y'":"'N'") diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 62fdace654d..0f500c35ff6 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -1716,7 +1716,7 @@ static int create_bootstrap_file() if(!f) return -1;
- fputs("SET NAMES UTF8;\n",f); + fputs("SET NAMES UTF8MB3;\n",f);
why not to keep it utf8?
enumerate_ibd_files(append_export_table); for (std::set<std::string>::iterator it = tables_for_export.begin(); it != tables_for_export.end(); it++) diff --git a/mysql-test/main/create-uca.test b/mysql-test/main/create-uca.test index 0acb51f7286..f73f6114962 100644 --- a/mysql-test/main/create-uca.test +++ b/mysql-test/main/create-uca.test @@ -1,5 +1,5 @@ # Prerequisites -let collation=utf8_unicode_ci; +let collation=utf8mb3_unicode_ci;
that's fine, as we generally don't want tests to depend on the old_mode.
--source include/have_collation.inc
# Initial cleanup diff --git a/mysql-test/main/ctype_ldml.result b/mysql-test/main/ctype_ldml.result index 22b7a316111..7c284520733 100644 --- a/mysql-test/main/ctype_ldml.result +++ b/mysql-test/main/ctype_ldml.result @@ -8,7 +8,6 @@ Variable_name Value character_sets_dir MYSQL_TEST_DIR/std_data/ldml/ show collation like 'utf8_phone_ci'; Collation Charset Id Default Compiled Sortlen -utf8_phone_ci utf8 352 8
I suppose the test should show there is a _phone_ci collation. As you've renamed it, you need to adjust the test to do show collation like 'utf8mb3_phone_ci';
CREATE TABLE t1 ( name VARCHAR(64), phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci @@ -37,7 +36,6 @@ Bar +7-912-800-80-01 DROP TABLE t1; show collation like 'utf8_test_ci'; Collation Charset Id Default Compiled Sortlen -utf8_test_ci utf8 353 8
same
create table t1 (c1 char(1) character set utf8 collate utf8_test_ci); insert into t1 values ('a'); select * from t1 where c1='b'; @@ -526,7 +524,6 @@ DROP TABLE t1; SET NAMES utf8 COLLATE utf8_phone_ci; SHOW COLLATION LIKE 'utf8_phone_ci'; Collation Charset Id Default Compiled Sortlen -utf8_phone_ci utf8 352 8
and here
SET NAMES utf8; SELECT hex(weight_string(_utf8mb4'a' collate utf8mb4_test_400_ci)); hex(weight_string(_utf8mb4'a' collate utf8mb4_test_400_ci)) diff --git a/mysql-test/main/show_check.result b/mysql-test/main/show_check.result index d031c792922..1c85ef596c9 100644 --- a/mysql-test/main/show_check.result +++ b/mysql-test/main/show_check.result @@ -874,12 +874,11 @@ set names utf8; ---------------------------------------------------------------- SHOW CHARACTER SET LIKE 'utf8'; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr -def information_schema CHARACTER_SETS CHARACTER_SETS CHARACTER_SET_NAME Charset 253 96 4 N 1 0 33 -def information_schema CHARACTER_SETS CHARACTER_SETS DESCRIPTION Description 253 180 13 N 1 0 33 -def information_schema CHARACTER_SETS CHARACTER_SETS DEFAULT_COLLATE_NAME Default collation 253 96 15 N 1 0 33 -def information_schema CHARACTER_SETS CHARACTER_SETS MAXLEN Maxlen 8 3 1 N 32769 0 63 +def information_schema CHARACTER_SETS CHARACTER_SETS CHARACTER_SET_NAME Charset 253 96 0 N 1 0 33 +def information_schema CHARACTER_SETS CHARACTER_SETS DESCRIPTION Description 253 180 0 N 1 0 33 +def information_schema CHARACTER_SETS CHARACTER_SETS DEFAULT_COLLATE_NAME Default collation 253 96 0 N 1 0 33 +def information_schema CHARACTER_SETS CHARACTER_SETS MAXLEN Maxlen 8 3 0 N 32769 0 63 Charset Description Default collation Maxlen -utf8 UTF-8 Unicode utf8_general_ci 3
again, I suspect this test should now do `SHOW CHARACTER SET LIKE 'utf8mb3';
---------------------------------------------------------------- SHOW COLLATION LIKE 'latin1_bin'; Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr diff --git a/mysql-test/suite/funcs_1/r/charset_collation.result b/mysql-test/suite/funcs_1/r/charset_collation.result index 31bd30c5acf..6b52e80d6ba 100644 --- a/mysql-test/suite/funcs_1/r/charset_collation.result +++ b/mysql-test/suite/funcs_1/r/charset_collation.result @@ -9,7 +9,6 @@ ORDER BY character_set_name; CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN binary binary Binary pseudo charset 1 latin1 latin1_swedish_ci cp1252 West European 1 -utf8 utf8_general_ci UTF-8 Unicode 3
and here too, changing the test is in order
SELECT * FROM information_schema.collations diff --git a/mysql-test/suite/funcs_1/r/is_column_privileges.result b/mysql-test/suite/funcs_1/r/is_column_privileges.result index b6be9118048..46b2d515041 100644 --- a/mysql-test/suite/funcs_1/r/is_column_privileges.result +++ b/mysql-test/suite/funcs_1/r/is_column_privileges.result @@ -45,7 +45,7 @@ COLUMN_PRIVILEGES CREATE TEMPORARY TABLE `COLUMN_PRIVILEGES` ( `COLUMN_NAME` varchar(64) NOT NULL DEFAULT '', `PRIVILEGE_TYPE` varchar(64) NOT NULL DEFAULT '', `IS_GRANTABLE` varchar(3) NOT NULL DEFAULT '' -) ENGINE=MEMORY DEFAULT CHARSET=utf8 +) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3
Just a thought. Did you also fix all --embedded, all --ps, and all --big --big tests to pass?
SHOW COLUMNS FROM information_schema.COLUMN_PRIVILEGES; Field Type Null Key Default Extra GRANTEE varchar(190) NO diff --git a/mysql-test/suite/innodb/r/innodb_ctype_ldml.result b/mysql-test/suite/innodb/r/innodb_ctype_ldml.result index 502f57156c3..8a96c023134 100644 --- a/mysql-test/suite/innodb/r/innodb_ctype_ldml.result +++ b/mysql-test/suite/innodb/r/innodb_ctype_ldml.result @@ -8,7 +8,6 @@ Variable_name Value character_sets_dir MYSQL_TEST_DIR/std_data/ldml/ show collation like 'utf8_phone_ci'; Collation Charset Id Default Compiled Sortlen -utf8_phone_ci utf8 352 8
deja vu. I rememeber I've commented on this very thing already :) please, update this test too.
CREATE TABLE t1 ( name VARCHAR(64), phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci Binary files a/mysql-test/suite/sys_vars/r/character_set_results_basic.result and b/mysql-test/suite/sys_vars/r/character_set_results_basic.result differ diff --git a/mysql-test/suite/sys_vars/r/old_mode_basic.result b/mysql-test/suite/sys_vars/r/old_mode_basic.result index 39c8e554be2..a6b95f1c60c 100644 --- a/mysql-test/suite/sys_vars/r/old_mode_basic.result +++ b/mysql-test/suite/sys_vars/r/old_mode_basic.result @@ -167,8 +167,100 @@ NO_PROGRESS_INFO SET @@global.old_mode = @global_start_value; SELECT @@global.old_mode; @@global.old_mode - +UTF8_IS_UTF8MB3 SET @@session.old_mode = @session_start_value; SELECT @@session.old_mode; @@session.old_mode - +UTF8_IS_UTF8MB3 +# +# Beginning of 10.6 test +# +# MDEV-8334: Rename utf8 to utf8mb3 +#
Ah! Great.
+# Save and display old values +SET @save_old_mode = @@OLD_MODE; +SET @save_character_set_server = @@character_set_server; +SET @save_character_set_client = @@character_set_client; +SET @save_character_set_results = @@character_set_results; +SET @save_character_set_connection = @@character_set_connection; +SET @save_character_set_filesystem = @@character_set_filesystem; +SET @save_character_set_database = @@character_set_database; +SET @save_collation_connection = @@collation_connection; +SET @save_collation_server = @@collation_server; +SET @save_collation_database = @@collation_database; +SELECT @@OLD_MODE; +@@OLD_MODE +UTF8_IS_UTF8MB3 +SELECT @@character_set_server,@@character_set_client,@@character_set_results, +@@character_set_connection, @@character_set_filesystem, @@character_set_database, +@@collation_connection, @@collation_server, @@collation_database; +@@character_set_server @@character_set_client @@character_set_results @@character_set_connection @@character_set_filesystem @@character_set_database @@collation_connection @@collation_server @@collation_database +latin1 latin1 latin1 latin1 binary latin1 latin1_swedish_ci latin1_swedish_ci latin1_swedish_ci +# +# UTF8MB3 alias for UTF8 +# +SET @@character_set_server = utf8; +SET @@character_set_client = utf8; +SET @@character_set_results = utf8; +SET @@character_set_connection = utf8; +SET @@character_set_filesystem = utf8; +SET @@character_set_database = utf8; +SET @@collation_connection = utf8_general_ci; +SET @@collation_server = utf8_unicode_ci; +SET @@collation_database = utf8_bin; +SELECT @@character_set_server, @@character_set_client, @@character_set_results, +@@character_set_connection, @@character_set_filesystem, @@character_set_database, +@@collation_connection, @@collation_server, @@collation_database; +@@character_set_server @@character_set_client @@character_set_results @@character_set_connection @@character_set_filesystem @@character_set_database @@collation_connection @@collation_server @@collation_database +utf8mb3 utf8mb3 utf8mb3 utf8mb3 utf8mb3 utf8mb3 utf8mb3_general_ci utf8mb3_unicode_ci utf8mb3_bin +CREATE DATABASE db1 CHARACTER SET = 'utf8' COLLATE = 'utf8_general_ci'; +ALTER DATABASE db1 CHARACTER SET = 'utf8' COLLATE = 'utf8_unicode_ci'; +CREATE TABLE tb1 (id1 INT) CHARACTER SET 'utf8' COLLATE 'utf8_bin'; +SHOW CREATE TABLE tb1; +Table Create Table +tb1 CREATE TABLE `tb1` ( + `id1` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin +DROP TABLE tb1; +DROP DATABASE db1; +# +# UTF8MB4 is alias for UTF8 +# +SET @@OLD_MODE=0; +SET @@character_set_server = utf8; +SET @@character_set_client = utf8; +SET @@character_set_results = utf8; +SET @@character_set_connection = utf8; +SET @@character_set_filesystem = utf8; +SET @@character_set_database = utf8; +SET @@collation_connection = utf8_general_ci; +SET @@collation_server = utf8_unicode_ci; +SET @@collation_database = utf8_bin; +SELECT @@character_set_server, @@character_set_client, @@character_set_results, +@@character_set_connection, @@character_set_filesystem, @@character_set_database, +@@collation_connection, @@collation_server, @@collation_database; +@@character_set_server @@character_set_client @@character_set_results @@character_set_connection @@character_set_filesystem @@character_set_database @@collation_connection @@collation_server @@collation_database +utf8mb4 utf8mb4 utf8mb4 utf8mb4 utf8mb4 utf8mb4 utf8mb4_general_ci utf8mb4_unicode_ci utf8mb4_bin +CREATE DATABASE db1 CHARACTER SET = 'utf8' COLLATE = 'utf8_general_ci'; +ALTER DATABASE db1 CHARACTER SET = 'utf8' COLLATE = 'utf8_unicode_ci'; +CREATE TABLE tb1 (id1 INT) CHARACTER SET 'utf8' COLLATE 'utf8_bin'; +SHOW CREATE TABLE tb1; +Table Create Table +tb1 CREATE TABLE `tb1` ( + `id1` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin +DROP TABLE tb1; +DROP DATABASE db1; +SET @@OLD_MODE = @save_old_mode; +SET @@character_set_server = @save_character_set_server; +SET @@character_set_client = @save_character_set_client; +SET @@character_set_results = @save_character_set_results; +SET @@character_set_connection = @save_character_set_connection; +SET @@character_set_filesystem = @save_character_set_filesystem; +SET @@character_set_database = @save_character_set_database; +SET @@collation_connection = @save_collation_connection; +SET @@collation_server = @save_collation_server; +SET @@collation_database = @save_collation_database; +# +# End of 10.6 test +# diff --git a/mysys/charset.c b/mysys/charset.c index 32cfeb56e2d..dbb2749d217 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -763,22 +764,13 @@ get_charset_number_internal(const char *charset_name, uint cs_flags) }
-static const char* -get_charset_name_alias(const char *name) -{ - if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3")) - return "utf8"; - return NULL; -} - - uint get_charset_number(const char *charset_name, uint cs_flags) { uint id; my_pthread_once(&charsets_initialized, init_available_charsets); if ((id= get_charset_number_internal(charset_name, cs_flags))) return id; - if ((charset_name= get_charset_name_alias(charset_name))) + if ((charset_name= !my_strcasecmp(&my_charset_latin1, charset_name, "utf8") ? "utf8mb3" : NULL))
Huh? Why do you not check MY_UTF8_IS_UTF8MB3 here?
return get_charset_number_internal(charset_name, cs_flags); return 0; } @@ -820,7 +812,7 @@ static CHARSET_INFO *find_collation_data_inheritance_source(CHARSET_INFO *cs) char name[MY_CS_NAME_SIZE + 1]; memcpy(name, beg, end - beg); name[end - beg]= '\0'; - return inheritance_source_by_id(cs, get_collation_number(name)); + return inheritance_source_by_id(cs, get_collation_number(name,MYF(0)));
and not here?
} return NULL; } @@ -961,7 +953,28 @@ my_collation_get_by_name(MY_CHARSET_LOADER *loader, CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) { MY_CHARSET_LOADER loader; + my_bool utf8_is_utf8mb3= flags & MY_UTF8_IS_UTF8MB3 ? 1 : 0; + char *copy_of_name= (char*)cs_name; + char start[6], result[64]; + char *temp_cs_name; + my_charset_loader_init_mysys(&loader); + + if (!strcasecmp("utf8",copy_of_name)) + cs_name = (const char*)(utf8_is_utf8mb3 ? "utf8mb3" : "utf8mb4"); + + strncpy(start, cs_name, 5); + temp_cs_name= (char *)(utf8_is_utf8mb3 ? "utf8mb3_":"utf8mb4_"); + + if (!strncasecmp("utf8_", start,5)) + { + copy_of_name+= 5; + result[63]='\0'; + strcpy(result, temp_cs_name); + strcat(result, copy_of_name); + result[strlen(copy_of_name)+strlen(temp_cs_name)]='\0'; + cs_name= (const char *) result; + }
And why do you do all that ^^^ ? Old code didn't try to change utf8mb3 to utf8 here, because, I suppose, my_collation_get_by_name() below did all that. Why did you add alias resolution where none was?
return my_collation_get_by_name(&loader, cs_name, flags); }
@@ -1005,12 +1018,16 @@ get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags) { MY_CHARSET_LOADER loader; my_charset_loader_init_mysys(&loader); + + if (!strcasecmp("utf8",cs_name)) + cs_name= (const char*)(flags & MY_UTF8_IS_UTF8MB3 ? "utf8mb3" : "utf8mb4");
same here
+ return my_charset_get_by_name(&loader, cs_name, cs_flags, flags); }
/** - Resolve character set by the character set name (utf8, latin1, ...). + Resolve character set by the character set name (utf8mb3, latin1, ...).
The function tries to resolve character set by the specified name. If there is character set with the given name, it is assigned to the "cs" @@ -1453,8 +1472,8 @@ static const MY_CSET_OS_NAME charsets[] =
{"US-ASCII", "latin1", my_cs_approx},
- {"utf8", "utf8", my_cs_exact}, - {"utf-8", "utf8", my_cs_exact}, + {"utf8mb3", "utf8mb3", my_cs_exact},
eh, no. Try to understand what this array is for.
+ {"utf-8", "utf8mb3", my_cs_exact}, #endif {NULL, NULL, 0} }; diff --git a/plugin/handler_socket/client/hslongrun.cpp b/plugin/handler_socket/client/hslongrun.cpp index b7c02951340..7f88d48fff2 100644 --- a/plugin/handler_socket/client/hslongrun.cpp +++ b/plugin/handler_socket/client/hslongrun.cpp @@ -897,7 +897,7 @@ hs_longrun_init_table(const config& conf, int num_prepare, "v1 varchar(32) not null," "v2 varchar(32) not null," "v3 varchar(32) not null" - ") character set utf8 collate utf8_bin engine = innodb"); + ") character set utf8mb3 collate utf8_bin engine = innodb");
just keep it utf8
for (int i = 0; i < num_prepare; ++i) { const std::string i_str = to_stdstring(i); const std::string v1 = "pv1_" + i_str; diff --git a/plugin/handler_socket/client/hstest.pl b/plugin/handler_socket/client/hstest.pl index 1363e153c44..5924d8a0ce5 100755 --- a/plugin/handler_socket/client/hstest.pl +++ b/plugin/handler_socket/client/hstest.pl @@ -52,7 +52,7 @@ for my $action (@actions) { "k $keytype primary key" . ",v varchar(32) not null" . $moreflds . - ") character set utf8 collate utf8_bin " . + ") character set utf8mb3 collate utf8_bin " .
and here. forget about handlersocket
"engine = $engine"); } elsif ($action eq "insert") { print("INSERT $db.$table tablesize=$tablesize\n"); diff --git a/plugin/win_auth_client/common.cc b/plugin/win_auth_client/common.cc index 8b7319252ac..ddd34aec7da 100644 --- a/plugin/win_auth_client/common.cc +++ b/plugin/win_auth_client/common.cc @@ -384,7 +384,7 @@ char* wchar_to_utf8(const wchar_t *string, size_t *len) buf= (char*)malloc(buf_len + 1); if (!buf) { - DBUG_PRINT("error",("Out of memory when converting string '%S' to utf8", + DBUG_PRINT("error",("Out of memory when converting string '%S' to utf8mb3",
Nope, see what this function is doing.
string)); return NULL; } @@ -408,7 +408,7 @@ char* wchar_to_utf8(const wchar_t *string, size_t *len)
#ifndef DBUG_OFF Error_message_buf error_buf; - DBUG_PRINT("error", ("Could not convert string '%S' to utf8" + DBUG_PRINT("error", ("Could not convert string '%S' to utf8mb3"
same
", WideCharToMultiByte() failed with error %X (%s)", string, GetLastError(), get_last_error_message(error_buf))); @@ -451,7 +451,7 @@ wchar_t* utf8_to_wchar(const char *string, size_t *len)
if (!buf) { - DBUG_PRINT("error",("Out of memory when converting utf8 string '%s'" + DBUG_PRINT("error",("Out of memory when converting utf8mb3 string '%s'"
same
" to wide-char representation", string)); return NULL; } diff --git a/scripts/fill_help_tables.sql b/scripts/fill_help_tables.sql index d0efb750330..ad7c4fce9a4 100644 --- a/scripts/fill_help_tables.sql +++ b/scripts/fill_help_tables.sql @@ -22,7 +22,7 @@
don't change help tables, please. they'll be regenerated from the documentation
-- mysql -u root -p mysql < file_name
-set names 'utf8'; +set names 'utf8mb3';
set sql_log_bin = 0;
diff --git a/scripts/mysql_system_tables.sql b/scripts/mysql_system_tables.sql index e390f36a98b..7c8532577a1 100644 --- a/scripts/mysql_system_tables.sql +++ b/scripts/mysql_system_tables.sql @@ -209,7 +209,7 @@ SET @create_transaction_registry="CREATE TABLE IF NOT EXISTS transaction_registr UNIQUE KEY (commit_id), INDEX (begin_timestamp), INDEX (commit_timestamp, transaction_id) -) ENGINE=INNODB DEFAULT CHARSET=utf8 COLLATE=utf8_bin STATS_PERSISTENT=0"; +) ENGINE=INNODB DEFAULT CHARSET=utf8mb3 COLLATE=utf8_bin STATS_PERSISTENT=0";
here and in all other places in this file and other .sql files: you need to change the collation too, not just the charset.
SET @str=IF(@have_innodb <> 0, @create_innodb_table_stats, "SET @dummy = 0"); PREPARE stmt FROM @str; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 0bf21e02002..cc7568990b4 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -4039,7 +4039,10 @@ static int init_common_variables() *next_character_set_name++= '\0'; if (!(default_charset_info= get_charset_by_csname(default_character_set_name, - MY_CS_PRIMARY, MYF(MY_WME)))) + MY_CS_PRIMARY, + global_system_variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? + MYF(MY_UTF8_IS_UTF8MB3 | MY_WME) : MYF(MY_WME))))
may be you'd better get this multi-line assignment out of if() ? e.g. myf utf8_flag= global_system_variables.old_behavior & OLD_MODE_UTF8_IS_UTF8MB3 : 0; default_charset_info= get_charset_by_csname(default_character_set_name, MY_CS_PRIMARY, MYF(utf8_flag | MY_WME)); if (!default_charset_info) ... or add a helper thd->utf8_alias() as I suggested below
{ if (next_character_set_name) { @@ -4056,7 +4059,10 @@ static int init_common_variables() if (default_collation_name) { CHARSET_INFO *default_collation; - default_collation= get_charset_by_name(default_collation_name, MYF(0)); + default_collation= get_charset_by_name(default_collation_name, + global_system_variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? + MYF(MY_UTF8_IS_UTF8MB3) : MYF(0));
and here you'll be able to use utf8_flag without the conditional operator
if (!default_collation) { #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE @@ -4097,7 +4103,10 @@ static int init_common_variables()
if (!(character_set_filesystem= get_charset_by_csname(character_set_filesystem_name, - MY_CS_PRIMARY, MYF(MY_WME)))) + MY_CS_PRIMARY, + global_system_variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? + MYF(MY_UTF8_IS_UTF8MB3 | MY_WME) : MYF(MY_WME))))
and here
return 1; global_system_variables.character_set_filesystem= character_set_filesystem;
@@ -7415,7 +7424,9 @@ static void usage(void) DBUG_ENTER("usage"); if (!(default_charset_info= get_charset_by_csname(default_character_set_name, MY_CS_PRIMARY, - MYF(MY_WME)))) + global_system_variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? + MYF(MY_UTF8_IS_UTF8MB3 | MY_WME) : MYF(MY_WME))))
split this one too?
exit(1); if (!default_collation_name) default_collation_name= (char*) default_charset_info->name; diff --git a/sql/sql_class.h b/sql/sql_class.h index 50b746fe514..15aa9ca8199 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1007,6 +1008,36 @@ inline void update_global_memory_status(int64 size) my_atomic_add64_explicit(ptr, size, MY_MEMORY_ORDER_RELAXED); }
+inline const char* get_alias_collation_or_charset_name(const char* name, + bool utf8_is_utf8mb3) +{ + char *copy_of_name= (char*)name; + char start[6], result[64]; + char *temp_cs_name; + + if (!strchr(name,'_')) + { + if (!strcasecmp("utf8",name)) + name = utf8_is_utf8mb3 ? "utf8mb3" : "utf8mb4"; + return name; + } + else + { + strncpy(start, name, 5); + temp_cs_name= (char *)(utf8_is_utf8mb3 ? "utf8mb3_":"utf8mb4_"); + if (!strncasecmp("utf8_", start,5)) + { + copy_of_name+= 5; + result[63]='\0'; + strcpy(result, temp_cs_name); + strcat(result, copy_of_name); + result[strlen(copy_of_name)+strlen(temp_cs_name)]='\0'; + strcpy((char*)name,result); + } + } + return name; +}
Please, no. First, you cannot just copy `result` into `name`, because `result` is longer, you'll overwrite whatever was in memory after name's value. Second, you don't need to resolve aliases here, charset code already does it, don't duplicate that. Just pass MY_UTF8_IS_UTF8MB3 down to my_collation_get_by_name() below.
+ /** Get collation by name, send error to client on failure. @param name Collation name diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 9bf16220535..f471d8edc66 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -583,9 +583,14 @@ bool load_db_opt(THD *thd, const char *path, Schema_specification_st *create) default-collation commands. */ if (!(create->default_table_charset= - get_charset_by_csname(pos+1, MY_CS_PRIMARY, MYF(0))) && + get_charset_by_csname(pos+1, MY_CS_PRIMARY, + thd->variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? + MYF(MY_UTF8_IS_UTF8MB3) : MYF(0))) && !(create->default_table_charset= - get_charset_by_name(pos+1, MYF(0)))) + get_charset_by_name(pos+1, thd->variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? + MYF(MY_UTF8_IS_UTF8MB3) : MYF(0))))
again, please move myf flags manipulations out of if()
{ sql_print_error("Error while loading database options: '%s':",path); sql_print_error(ER_THD(thd, ER_UNKNOWN_CHARACTER_SET),pos+1); @@ -595,7 +600,9 @@ bool load_db_opt(THD *thd, const char *path, Schema_specification_st *create) else if (!strncmp(buf,"default-collation", (pos-buf))) { if (!(create->default_table_charset= get_charset_by_name(pos+1, - MYF(0)))) + thd->variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? + MYF(MY_UTF8_IS_UTF8MB3) : MYF(0))))
and here
{ sql_print_error("Error while loading database options: '%s':",path); sql_print_error(ER_THD(thd, ER_UNKNOWN_COLLATION),pos+1); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 6871699dc5b..ad322eda097 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -2789,7 +2789,10 @@ int Lex_input_stream::scan_ident_middle(THD *thd, Lex_ident_cli_st *str, body_utf8_append(m_cpp_text_start, m_cpp_tok_start + length); ErrConvString csname(str->str + 1, str->length - 1, &my_charset_bin); CHARSET_INFO *cs= get_charset_by_csname(csname.ptr(), - MY_CS_PRIMARY, MYF(0)); + MY_CS_PRIMARY, + thd->variables.old_behavior & + OLD_MODE_UTF8_IS_UTF8MB3 ? + MYF(MY_UTF8_IS_UTF8MB3) : MYF(0));
may be it could be a helper in thd, like THD::utf8_alias() { return variables.old_behavior & OLD_MODE_UTF8_IS_UTF8MB3 ? MY_UTF8_IS_UTF8MB3 : 0; }
if (cs) { *introducer= cs; diff --git a/storage/connect/mysql-test/connect/my.cnf b/storage/connect/mysql-test/connect/my.cnf index 6310772d01f..83f0aa8ab30 100644 --- a/storage/connect/mysql-test/connect/my.cnf +++ b/storage/connect/mysql-test/connect/my.cnf @@ -14,4 +14,4 @@ MASTER_MYSOCK= @mysqld.1.socket SLAVE_MYPORT= @mysqld.2.port SLAVE_MYSOCK= @mysqld.2.socket
-PGCLIENTENCODING= UTF8 +PGCLIENTENCODING= UTF8MB3
eh... really? Are you sure you've tested it and it worked? by the name of it I suspect it's a postgresql client encoding.
diff --git a/storage/connect/mysql-test/connect/t/odbc_postgresql.sql b/storage/connect/mysql-test/connect/t/odbc_postgresql.sql index 1c302294393..3c78120a7a2 100644 --- a/storage/connect/mysql-test/connect/t/odbc_postgresql.sql +++ b/storage/connect/mysql-test/connect/t/odbc_postgresql.sql @@ -4,13 +4,13 @@ -- Run this script as a admin user: -- psql -U postgres < odbc_postgresql.sql
-SET NAMES 'UTF8'; +SET NAMES 'UTF8MB3';
same, postgresql
DROP DATABASE IF EXISTS mtr; DROP USER IF EXISTS mtr;
CREATE USER mtr WITH PASSWORD 'mtr'; -CREATE DATABASE mtr OWNER=mtr ENCODING='UTF8'; +CREATE DATABASE mtr OWNER=mtr ENCODING='UTF8MB3';
same
GRANT ALL ON DATABASE mtr TO mtr; \c mtr SET role mtr; diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index e5164fcc4fa..a26a05c2cf9 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -330,7 +330,7 @@ fts_word_t* fts_word_init( /*==========*/ fts_word_t* word, /*!< in: word to initialize */ - byte* utf8, /*!< in: UTF-8 string */ + byte* utf8mb3, /*!< in: UTF-8 string */
don't rename variables, please
ulint len) /*!< in: length of string in bytes */ { mem_heap_t* heap = mem_heap_create(sizeof(fts_node_t)); diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/collation_utf8_unicode_ci_french.result b/storage/mroonga/mysql-test/mroonga/storage/r/collation_utf8_unicode_ci_french.result index 3f24de87035..3659aa5aee8 100644 --- a/storage/mroonga/mysql-test/mroonga/storage/r/collation_utf8_unicode_ci_french.result +++ b/storage/mroonga/mysql-test/mroonga/storage/r/collation_utf8_unicode_ci_french.result @@ -7,5 +7,4 @@ FULLTEXT INDEX (content) INSERT INTO diaries VALUES ("Je suis un garçon."); SELECT * FROM diaries WHERE MATCH (content) AGAINST ("garcon"); content -Je suis un garçon.
looks like a bug
DROP TABLE diaries; diff --git a/storage/mroonga/mysql-test/mroonga/storage/r/collation_utf8_unicode_ci_japanese.result b/storage/mroonga/mysql-test/mroonga/storage/r/collation_utf8_unicode_ci_japanese.result index 94ef2608b81..79dac1e63a7 100644 --- a/storage/mroonga/mysql-test/mroonga/storage/r/collation_utf8_unicode_ci_japanese.result +++ b/storage/mroonga/mysql-test/mroonga/storage/r/collation_utf8_unicode_ci_japanese.result @@ -7,5 +7,4 @@ FULLTEXT INDEX (content) INSERT INTO diaries VALUES ("ひらがなとカタカナを覚えました。"); SELECT * FROM diaries WHERE MATCH (content) AGAINST ("かたかな"); content -ひらがなとカタカナを覚えました。
that too
DROP TABLE diaries; diff --git a/storage/mroonga/vendor/groonga/CMakeLists.txt b/storage/mroonga/vendor/groonga/CMakeLists.txt index d271d4c4eb9..fc134b81cde 100644 --- a/storage/mroonga/vendor/groonga/CMakeLists.txt +++ b/storage/mroonga/vendor/groonga/CMakeLists.txt @@ -268,7 +268,7 @@ if(UNIX) ac_check_funcs(pthread_condattr_setpshared) endif()
-option(GRN_WITH_NFKC "use NFKC based UTF8 normalization." ON) +option(GRN_WITH_NFKC "use NFKC based UTF8MB3 normalization." ON)
not here, please
if(WIN32) ac_check_headers(winsock2.h) diff --git a/storage/mroonga/vendor/groonga/benchmark/bench-nfkc.c b/storage/mroonga/vendor/groonga/benchmark/bench-nfkc.c index ebae95b273b..3997b933e87 100644 --- a/storage/mroonga/vendor/groonga/benchmark/bench-nfkc.c +++ b/storage/mroonga/vendor/groonga/benchmark/bench-nfkc.c @@ -83,11 +83,11 @@ static void bench_char_type(gpointer user_data) { uint64_t code_point; - char utf8[7]; + char utf8mb3[7];
don't rename variables
for (code_point = 1; code_point < MAX_UNICODE; code_point++) { - ucs2utf8(code_point, (unsigned char *)utf8); - grn_nfkc50_char_type(utf8); + ucs2utf8(code_point, (unsigned char *)utf8mb3); + grn_nfkc50_char_type(utf8mb3); } }
diff --git a/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/tool/dump_difference_utf8.rb b/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/tool/dump_difference_utf8.rb index 4b6fde8c7b0..ce20a2c5b40 100644 --- a/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/tool/dump_difference_utf8.rb +++ b/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/tool/dump_difference_utf8.rb @@ -36,8 +36,8 @@ parser.sorted_pages.each do |page, characters| next if base == sort n_differences += 1 utf8s = [base, upper, lower, sort] - formatted_code_points = utf8s.collect do |utf8| - "%#07x" % Unicode.from_utf8(utf8) + formatted_code_points = utf8s.collect do |utf8mb3| + "%#07x" % Unicode.from_utf8(utf8mb3)
just revert all changes under storage/mroonga/vendor/*
end if sort.bytesize > base.bytesize n_expanded_sort_characters += 1 diff --git a/storage/sphinx/mysql-test/sphinx/my.cnf b/storage/sphinx/mysql-test/sphinx/my.cnf index f60380b7171..22cc06914f4 100644 --- a/storage/sphinx/mysql-test/sphinx/my.cnf +++ b/storage/sphinx/mysql-test/sphinx/my.cnf @@ -7,7 +7,7 @@ xmlpipe_command = cat @ENV.MTR_SUITE_DIR/testdata.xml [index test1] source = src1 docinfo = extern -charset_type = utf-8 +charset_type = utf-8mb3
revert
path = @ENV.MYSQLTEST_VARDIR/searchd/test1
[indexer] diff --git a/storage/spider/mysql-test/spider/bg/my.cnf b/storage/spider/mysql-test/spider/bg/my.cnf index 246099c623e..39f5bd01c67 100644 --- a/storage/spider/mysql-test/spider/bg/my.cnf +++ b/storage/spider/mysql-test/spider/bg/my.cnf @@ -75,15 +75,15 @@ MASTER_1_MYSOCK= @mysqld.1.1.socket MASTER_1_ENGINE_TYPE= Spider #MASTER_1_ENGINE_TYPE= MyISAM MASTER_1_ENGINE= ENGINE=Spider -MASTER_1_CHARSET= DEFAULT CHARSET=utf8 +MASTER_1_CHARSET= DEFAULT CHARSET=utf8mb3
Make sure to update both charset and collation here and in all other .cnf files in the spider suite
MASTER_1_ENGINE2= ENGINE=MyISAM -MASTER_1_CHARSET2= DEFAULT CHARSET=utf8 -MASTER_1_CHARSET3= DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci +MASTER_1_CHARSET2= DEFAULT CHARSET=utf8mb3 +MASTER_1_CHARSET3= DEFAULT CHARSET=utf8mb3 COLLATE=utf8_unicode_ci SLAVE1_1_MYPORT= @mysqld.4.1.port SLAVE1_1_MYSOCK= @mysqld.4.1.socket SLAVE1_1_ENGINE_TYPE= MyISAM SLAVE1_1_ENGINE= ENGINE=MyISAM -SLAVE1_1_CHARSET= DEFAULT CHARSET=utf8 +SLAVE1_1_CHARSET= DEFAULT CHARSET=utf8mb3 USE_CHILD_GROUP2= 1 OUTPUT_CHILD_GROUP2= 0 CHILD2_1_MYPORT= @mysqld.2.1.port diff --git a/storage/spider/spd_init_query.h b/storage/spider/spd_init_query.h index 19b04d50b82..f12cef377e2 100644 --- a/storage/spider/spd_init_query.h +++ b/storage/spider/spd_init_query.h @@ -559,7 +559,7 @@ static LEX_STRING spider_init_queries[] = { " table_name char(64) not null default ''," " primary key (table_id)," " unique uk1(db_name, table_name)" - " ) engine=Aria transactional=1 default charset=utf8 collate=utf8_bin;" + " ) engine=Aria transactional=1 default charset=utf8mb3 collate=utf8_bin;"
always change both charset and collation (everywhere in this file)
" create table if not exists mysql.spider_rewrite_table_tables(" " table_id bigint unsigned not null," " partition_id bigint unsigned not null auto_increment," diff --git a/tests/mysql_client_test.c b/tests/mysql_client_test.c index 0043786d477..5d9213591fb 100644 --- a/tests/mysql_client_test.c +++ b/tests/mysql_client_test.c @@ -19236,7 +19236,7 @@ static void test_bug12337762() rc= mysql_query(mysql, "create table charset_tab("\ "txt1 varchar(32) character set Latin1,"\ "txt2 varchar(32) character set Latin1 collate latin1_bin,"\ - "txt3 varchar(32) character set utf8 collate utf8_bin"\ + "txt3 varchar(32) character set utf8mb3 collate utf8_bin"\
both charset and collation
")");
DIE_UNLESS(rc == 0);
Regards, Sergei VP of MariaDB Server Engineering and security@mariadb.org
participants (1)
-
Sergei Golubchik