Hi, Alexander, On Mar 08, Alexander Barkov wrote:
commit a473db9e4fe Author: Alexander Barkov <bar@mariadb.com> Date: Thu Nov 2 14:16:09 2023 +0400
MDEV-25829 Change default collation to utf8mb4_1400_ai_ci
"utf8mb4_uca1400_ai_ci". It's still not too late to rename the MDEV and change commits to match. I'm not reviewing InnoDB changes, I hope Marko did
Step#3 The main patch
diff --git a/mysql-test/include/ctype_utf8mb4.inc b/mysql-test/include/ctype_utf8mb4.inc index 436b0f2782f..73011cef81d 100644 --- a/mysql-test/include/ctype_utf8mb4.inc +++ b/mysql-test/include/ctype_utf8mb4.inc @@ -58,11 +58,11 @@ select CONVERT(_koi8r' # "a\0" < "a" # "a\0" < "a "
-SELECT 'a' = 'a '; -SELECT 'a\0' < 'a'; -SELECT 'a\0' < 'a '; -SELECT 'a\t' < 'a'; -SELECT 'a\t' < 'a '; +SELECT 'a' = 'a ' collate utf8mb4_general_ci; +SELECT 'a\0' < 'a' collate utf8mb4_general_ci; +SELECT 'a\0' < 'a ' collate utf8mb4_general_ci; +SELECT 'a\t' < 'a' collate utf8mb4_general_ci; +SELECT 'a\t' < 'a ' collate utf8mb4_general_ci;
1. what will happen in utf8mb4_uca1400_ai_ci? 2. if you specifically want this file to test utf8mb4_general_ci, you can do SET NAMES, cannot you? Once for the whole file. Or set character_set_collations
# # The same for binary collation diff --git a/mysql-test/main/ctype_euckr.result b/mysql-test/main/ctype_euckr.result index 9e030f9cc6d..8de90f89a26 100644 --- a/mysql-test/main/ctype_euckr.result +++ b/mysql-test/main/ctype_euckr.result @@ -2244,7 +2244,7 @@ FE80 DELETE FROM t2 WHERE a='?'; ALTER TABLE t2 ADD u VARCHAR(1) CHARACTER SET utf8, ADD a2 VARCHAR(1) CHARACTER SET euckr; UPDATE IGNORE t2 SET u=a, a2=u; -SELECT s as unassigned_code FROM t2 WHERE u='?'; +SELECT s as unassigned_code FROM t2 WHERE u=binary'?';
why?
unassigned_code A2E8 A2E9 diff --git a/mysql-test/main/ctype_like_range.result b/mysql-test/main/ctype_like_range.result index e5e5e61126b..fa694b0c250 100644 --- a/mysql-test/main/ctype_like_range.result +++ b/mysql-test/main/ctype_like_range.result @@ -284,7 +284,7 @@ id name val 32 mn 63616161616161616161616161616161 32 mx 63616161616161616161616161616161 32 sp -------------------------------- -ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8; +ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_general_ci;
do you have tests for ctype_like_range with utf8mb3_uca1400_ai_ci ?
INSERT INTO t1 (a) VALUES (_ucs2 0x0425),(_ucs2 0x045F); INSERT INTO t1 (a) VALUES (_ucs2 0x2525),(_ucs2 0x5F5F); SELECT * FROM v1; diff --git a/mysql-test/main/ctype_utf8.test b/mysql-test/main/ctype_utf8.test index a875fe51f3a..2f56d10bccf 100644 --- a/mysql-test/main/ctype_utf8.test +++ b/mysql-test/main/ctype_utf8.test @@ -24,7 +24,7 @@ drop database if exists mysqltest; --disable_warnings drop table if exists t1,t2; --enable_warnings -set names utf8; +set names utf8 collate utf8_general_ci;
why not to change the test to work with utf8mb3_uca1400_ai_ci?
select left(_utf8 0xD0B0D0B1D0B2,1); select right(_utf8 0xD0B0D0B2D0B2,1); diff --git a/mysql-test/main/dyncol.test b/mysql-test/main/dyncol.test index 493e9b3842d..ba302861e03 100644 --- a/mysql-test/main/dyncol.test +++ b/mysql-test/main/dyncol.test @@ -10,10 +10,10 @@ --echo # column create --echo # select hex(COLUMN_CREATE(1, NULL AS char character set utf8)); -select hex(COLUMN_CREATE(1, "afaf" AS char character set utf8)); -select hex(COLUMN_CREATE(1, 1212 AS char character set utf8)); -select hex(COLUMN_CREATE(1, 12.12 AS char character set utf8)); -select hex(COLUMN_CREATE(1, 99999999999999999999999999999 AS char character set utf8)); +select hex(COLUMN_CREATE(1, "afaf" AS char character set utf8 collate utf8_general_ci)); +select hex(COLUMN_CREATE(1, 1212 AS char character set utf8 collate utf8_general_ci)); +select hex(COLUMN_CREATE(1, 12.12 AS char character set utf8 collate utf8_general_ci)); +select hex(COLUMN_CREATE(1, 99999999999999999999999999999 AS char character set utf8 collate utf8_general_ci));
why?
select hex(COLUMN_CREATE(1, NULL AS unsigned int)); select hex(COLUMN_CREATE(1, 1212 AS unsigned int)); select hex(COLUMN_CREATE(1, 7 AS unsigned int)); diff --git a/mysql-test/main/func_json.result b/mysql-test/main/func_json.result index ab35822a6ce..fce2caa2842 100644 --- a/mysql-test/main/func_json.result +++ b/mysql-test/main/func_json.result @@ -414,6 +414,13 @@ select json_object('foo', json_unquote(json_object('bar', c)),'qux', c) as fld f fld {"foo": "{\"bar\": \"abc\"}", "qux": "abc"} {"foo": "{\"bar\": \"def\"}", "qux": "def"} +create table t2 as select json_object('foo', json_unquote(json_object('bar', c)),'qux', c) as fld from t1 limit 0; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `fld` varchar(39) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +drop table t2;
why do you test here?
drop table t1; select json_object("a", json_object("b", "abcd")); json_object("a", json_object("b", "abcd")) diff --git a/mysql-test/main/func_regexp_pcre.test b/mysql-test/main/func_regexp_pcre.test index 77f5af6e0ff..7cf9e8007de 100644 --- a/mysql-test/main/func_regexp_pcre.test +++ b/mysql-test/main/func_regexp_pcre.test @@ -23,17 +23,17 @@ SELECT 'Ã ' RLIKE '\\x{00C0}' COLLATE utf8_bin; SELECT 'Ã' RLIKE '\\x{00C0}' COLLATE utf8_bin;
# Checking how (?i) and (?-i) affect case sensitivity -CREATE TABLE t1 (s VARCHAR(10) CHARACTER SET utf8); +CREATE TABLE t1 (s VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_general_ci);
why?
INSERT INTO t1 VALUES ('a'),('A'); -CREATE TABLE t2 (p VARCHAR(10) CHARACTER SET utf8); +CREATE TABLE t2 (p VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_general_ci); INSERT INTO t2 VALUES ('a'),('(?i)a'),('(?-i)a'),('A'),('(?i)A'),('(?-i)A'); SELECT s,p,s RLIKE p, s COLLATE utf8_bin RLIKE p FROM t1,t2 ORDER BY BINARY s, BINARY p; DROP TABLE t1,t2;
# Checking Unicode character classes -CREATE TABLE t1 (ch VARCHAR(22)) CHARACTER SET utf8; -CREATE TABLE t2 (class VARCHAR(32)) CHARACTER SET utf8; +CREATE TABLE t1 (ch VARCHAR(22)) CHARACTER SET utf8 COLLATE utf8_general_ci; +CREATE TABLE t2 (class VARCHAR(32)) CHARACTER SET utf8 COLLATE utf8_general_ci;
and here
INSERT INTO t1 VALUES ('Я'),('Σ'),('A'),('Ã'); INSERT INTO t1 VALUES ('Ñ'),('Ï'),('a'),('à '); INSERT INTO t1 VALUES ('ã'),('ê°·'),('ප'); diff --git a/mysql-test/main/mrr_icp_extra.result b/mysql-test/main/mrr_icp_extra.result index 8f6ee88acc6..d7cde103288 100644 --- a/mysql-test/main/mrr_icp_extra.result +++ b/mysql-test/main/mrr_icp_extra.result @@ -754,7 +754,7 @@ t1 CREATE TABLE `t1` ( KEY `t` (`t`(5)) ) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci drop table t1; -create table t1 (v char(10) character set utf8); +create table t1 (v char(10) character set utf8 collate utf8_general_ci);
why here?
show create table t1; Table Create Table t1 CREATE TABLE `t1` ( diff --git a/mysql-test/main/myisam.test b/mysql-test/main/myisam.test index 1a20f97a54f..4a283737122 100644 --- a/mysql-test/main/myisam.test +++ b/mysql-test/main/myisam.test @@ -1039,7 +1039,7 @@ create table t1 (v varchar(65536)); show create table t1; drop table t1; set statement sql_mode = 'NO_ENGINE_SUBSTITUTION' for -create table t1 (v varchar(65530) character set utf8); +create table t1 (v varchar(65530) character set utf8 collate utf8_general_ci);
why? collation doesn't matter here, does it?
show create table t1; drop table t1;
diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result index 274f29e34dc..85059fb3370 100644 --- a/mysql-test/main/order_by.result +++ b/mysql-test/main/order_by.result @@ -4142,7 +4142,7 @@ drop table t1; # # MDEV-21922: Allow packing addon fields even if they don't honour max_length_for_sort_data # -create table t1 (a varchar(200) character set utf8, b int); +create table t1 (a varchar(200) character set utf8 collate utf8_general_ci, b int);
why? I think I see too many forced utf8_general_ci in tests and it's difficult to believe that the collation actually matters in all these cases.
insert into t1 select seq, seq from seq_1_to_10; select * from t1 order by a; a b diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 508ea9f644e..5731ad1f04b 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -842,7 +842,7 @@ String *Item_func_json_quote::val_str(String *str) bool Item_func_json_unquote::fix_length_and_dec(THD *thd) { collation.set(&my_charset_utf8mb3_general_ci, - DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); + DERIVATION_CAST, MY_REPERTOIRE_ASCII);
why? I guess that's why you have json_unquote test above.
max_length= args[0]->max_length; set_maybe_null(); return FALSE;
Regards, Sergei Chief Architect, MariaDB Server and security@mariadb.org