Re: [Maria-developers] 7b20964dd240: MDEV-8334: Rename utf8 to utf8mb3
Hi, Rucha! On Apr 15, Rucha Deodhar wrote:
revision-id: 7b20964dd240 parent(s): e9a2c9e author: Rucha Deodhar <rucha.deodhar@mariadb.com> timestamp: 2021-03-26 00:55:56 +0530 message:
MDEV-8334: Rename utf8 to utf8mb3
This patch is made as a part of MDEV-8334 to fix failing test in unit and main test suite so that utf8mb3 characterset is recognized. Failing tests: main.mysql_client_test main.mysql_client_test_comp unit.conc_basic-t unit.conc_charset unit.conc_connection
diff --git a/libmariadb/ma_charset.c b/libmariadb/ma_charset.c index ee4b0f47..307cd522 100644 --- a/libmariadb/ma_charset.c +++ b/libmariadb/ma_charset.c @@ -67,6 +67,10 @@ #include <langinfo.h> #endif
+#define IS_UTF8(c)\ +(!strcasecmp((c), "utf8") || !strcasecmp((c), "utf8mb3") ||\ + !strcasecmp((c), "utf8mb4") || !strcasecmp((c), "utf-8")) + /* +----------------------------------------------------------------------+ | PHP Version 5 | @@ -1269,7 +1275,7 @@ struct st_madb_os_charset MADB_OS_CHARSET[]= {"57010", "ISCII Gujarati", NULL, NULL, MADB_CS_UNSUPPORTED}, {"57011", "ISCII Punjabi", NULL, NULL, MADB_CS_UNSUPPORTED}, {"65000", "utf-7 Unicode (UTF-7)", NULL, NULL, MADB_CS_UNSUPPORTED}, - {"65001", "utf-8 Unicode (UTF-8)", "utf8", NULL, MADB_CS_EXACT}, + {"65001", "utf-8 Unicode (UTF-8)", "utf8mb3", NULL, MADB_CS_EXACT},
No, keep this utf8, it's still a valid charset name, the server can figure it out what to map it to.
/* non Windows */ #else /* iconv encodings */ @@ -1337,8 +1343,8 @@ struct st_madb_os_charset MADB_OS_CHARSET[]= {"gb2312", "GB2312", "gb2312", "GB2312", MADB_CS_EXACT}, {"gbk", "GBK", "gbk", "GBK", MADB_CS_EXACT}, {"georgianps", "Georgian", "geostd8", "GEORGIAN-PS", MADB_CS_EXACT}, - {"utf8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT}, - {"utf-8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT}, + {"utf8mb3", "UTF8MB3", "utf8mb3", "UTF-8", MADB_CS_EXACT}, + {"utf-8", "UTF8MB3", "utf8mb3", "UTF-8", MADB_CS_EXACT},
same here
#endif {NULL, NULL, NULL, NULL, 0} }; @@ -1361,8 +1367,8 @@ const char *madb_get_os_character_set() return MADB_DEFAULT_CHARSET_NAME; while (MADB_OS_CHARSET[i].identifier) { - if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED && - strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0) + if ((MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED && + strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0) || IS_UTF8(p))
why?
return MADB_OS_CHARSET[i].charset; i++; } diff --git a/unittest/libmariadb/basic-t.c b/unittest/libmariadb/basic-t.c index c22e6c2b..e2943964 100644 --- a/unittest/libmariadb/basic-t.c +++ b/unittest/libmariadb/basic-t.c @@ -310,7 +310,8 @@ static int use_utf8(MYSQL *my)
while ((row= mysql_fetch_row(res)) != NULL) { - FAIL_IF(strcmp(row[0], "utf8"), "wrong character set"); + FAIL_IF(strcmp(row[0], get_utf8_name(mysql_get_server_version(my),"utf8")), + "wrong character set");
technically, C/C is a separate project, can run on any server with any config file. So it'd be safer to check that row[0] starts from utf8 and not assume that it depends on a server version in a specific way.
} FAIL_IF(mysql_errno(my), mysql_error(my)); mysql_free_result(res); diff --git a/unittest/libmariadb/charset.c b/unittest/libmariadb/charset.c index 898b6dad..ffa877bc 100644 --- a/unittest/libmariadb/charset.c +++ b/unittest/libmariadb/charset.c @@ -71,14 +71,20 @@ int bug_8378(MYSQL *mysql) { int test_client_character_set(MYSQL *mysql) { MY_CHARSET_INFO cs; + char collation_name[19]; char *csname= (char*) "utf8"; char *csdefault= (char*)mysql_character_set_name(mysql);
+ strcpy(collation_name,(const char*)get_utf8_name(mysql_get_server_version(mysql), + "utf8_general_ci")); +
This one is simpler. It only tests that mysql_set_character_set() works. Just don't use utf8, make it test on something else, e.g. on latin2.
FAIL_IF(mysql_set_character_set(mysql, csname), mysql_error(mysql));
mysql_get_character_set_info(mysql, &cs);
- FAIL_IF(strcmp(cs.csname, "utf8") || strcmp(cs.name, "utf8_general_ci"), "Character set != UTF8"); + FAIL_IF(strcmp(cs.csname, get_utf8_name(mysql_get_server_version(mysql),"utf8")) || + strcmp(cs.name, collation_name), + "Wrong UTF8 characterset"); FAIL_IF(mysql_set_character_set(mysql, csdefault), mysql_error(mysql));
return OK; @@ -537,6 +544,9 @@ static int test_bug30472(MYSQL *mysql)
SKIP_MAXSCALE;
+ strcpy(collation_name,(const char*)get_utf8_name(mysql_get_server_version(mysql), + "utf8_general_ci")); +
same here, the bug is https://bugs.mysql.com/bug.php?id=30472 "libmysql doesn't reset charset, insert_id after succ. mysql_change_user() call" so, does not need utf8 specifically. Change it to some easier to use charset.
if (mysql_get_server_version(mysql) < 50100 || !is_mariadb) { diag("Test requires MySQL Server version 5.1 or above"); diff --git a/unittest/libmariadb/connection.c b/unittest/libmariadb/connection.c index 70d347ce..eb9b39bb 100644 --- a/unittest/libmariadb/connection.c +++ b/unittest/libmariadb/connection.c @@ -644,9 +644,8 @@ int test_conc26(MYSQL *unused __attribute__((unused)))
FAIL_IF(my_test_connect(mysql, hostname, "notexistinguser", "password", schema, port, NULL, CLIENT_REMEMBER_OPTIONS), "Error expected"); - - FAIL_IF(!mysql->options.charset_name || strcmp(mysql->options.charset_name, "utf8") != 0, - "expected charsetname=utf8"); + FAIL_IF(!mysql->options.charset_name || strcmp(mysql->options.charset_name, "utf8") != 0, + "Wrong utf8 characterset for this version");
again, CONC-26 is "CLIENT_REMEMBER_OPTIONS flag missing" it doesn't apparently need utf8 specifically, so just use a different non-default charset there.
mysql_close(mysql);
mysql= mysql_init(NULL); @@ -981,7 +980,8 @@ static int test_sess_track_db(MYSQL *mysql) printf("# SESSION_TRACK_VARIABLES: %*.*s\n", (int)len, (int)len, data); } while (!mysql_session_track_get_next(mysql, SESSION_TRACK_SYSTEM_VARIABLES, &data, &len)); diag("charset: %s", mysql->charset->csname); - FAIL_IF(strcmp(mysql->charset->csname, "utf8"), "Expected charset 'utf8'"); + FAIL_IF(strcmp(mysql->charset->csname, get_utf8_name(mysql_get_server_version(mysql),"utf8")), + "Wrong utf8 characterset for this version");
same here
rc= mysql_query(mysql, "SET NAMES latin1"); check_mysql_rc(rc, mysql); diff --git a/unittest/libmariadb/my_test.h b/unittest/libmariadb/my_test.h index c30d1b6d..a040c3d9 100644 --- a/unittest/libmariadb/my_test.h +++ b/unittest/libmariadb/my_test.h @@ -701,3 +701,23 @@ void run_tests(struct my_tests_st *test) { } }
+static inline const char* get_utf8_name(unsigned long server_version, + const char* name) +{ + const char *csname= server_version >= 100600 ? "utf8mb3" : "utf8"; + char *corrected_name= malloc(19*sizeof(char)); + corrected_name[18]='\0'; + + if (!strchr(name, '_')) + { + strcpy(corrected_name,csname); + corrected_name[strlen(csname)]='\0'; + } + else + { + strcpy(corrected_name,csname); + strcat(corrected_name,"_general_ci"); + corrected_name[strlen(csname)+11]= '\0'; + } + return (const char*)corrected_name; +}
shouldn't be needed Regards, Sergei VP of MariaDB Server Engineering and security@mariadb.org
participants (1)
-
Sergei Golubchik