Hi, Alexander, No comments about actual code changes, only about tests. Please, see below On Jun 12, Alexander Barkov wrote:
revision-id: 98ebe0a3afc (mariadb-11.5.1-12-g98ebe0a3afc) parent(s): 186a30de58b author: Alexander Barkov committer: Alexander Barkov timestamp: 2024-06-11 14:17:11 +0400 message:
MDEV-19123 Change default charset from latin1 to utf8mb4
Changing the default server character set from latin1 to utf8mb4.
diff --git a/mysql-test/main/column_compression.test b/mysql-test/main/column_compression.test --- a/mysql-test/main/column_compression.test +++ b/mysql-test/main/column_compression.test @@ -9,20 +9,20 @@ let $typec= BLOB COMPRESSED; let $typeu= BLOB; --source column_compression.inc
-let $typec= TEXT COMPRESSED; -let $typeu= TEXT; +let $typec= TEXT COMPRESSED CHARACTER SET latin1; +let $typeu= TEXT CHARACTER SET latin1;
why?
--source column_compression.inc
let $typec= VARBINARY(10000) COMPRESSED; let $typeu= VARBINARY(10000); --source column_compression.inc
-let $typec= VARCHAR(10000) COMPRESSED; -let $typeu= VARCHAR(10000); +let $typec= VARCHAR(10000) COMPRESSED CHARACTER SET latin1; +let $typeu= VARCHAR(10000) CHARACTER SET latin1; --source column_compression.inc
let $typec= TEXT COMPRESSED CHARSET ucs2; -let $typeu= TEXT; +let $typeu= TEXT CHARACTER SET latin1; --source column_compression.inc
SET column_compression_zlib_wrap=DEFAULT; diff --git a/mysql-test/main/create.result b/mysql-test/main/create.result --- a/mysql-test/main/create.result +++ b/mysql-test/main/create.result @@ -506,9 +506,9 @@ FROM t1; SHOW CREATE TABLE t2; Table Create Table t2 CREATE TABLE `t2` ( - `ifnull(c_tinytext, CAST('yet another binary data' AS BINARY))` tinyblob DEFAULT NULL, - `ifnull(c_text, CAST('yet another binary data' AS BINARY))` blob DEFAULT NULL, - `ifnull(c_mediumtext, CAST('yet another binary data' AS BINARY))` mediumblob DEFAULT NULL, + `ifnull(c_tinytext, CAST('yet another binary data' AS BINARY))` blob DEFAULT NULL, + `ifnull(c_text, CAST('yet another binary data' AS BINARY))` mediumblob DEFAULT NULL, + `ifnull(c_mediumtext, CAST('yet another binary data' AS BINARY))` longblob DEFAULT NULL,
looks wrong UPD: there were more changes like this below, I didn't comment on them
`ifnull(c_longtext, CAST('yet another binary data' AS BINARY))` longblob DEFAULT NULL ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci DROP TABLE t2; diff --git a/mysql-test/main/ctype_utf8_def_upgrade.result b/mysql-test/main/ctype_utf8_def_upgrade.result --- a/mysql-test/main/ctype_utf8_def_upgrade.result +++ b/mysql-test/main/ctype_utf8_def_upgrade.result @@ -61,23 +61,23 @@ t1 CREATE TABLE `t1` ( PRIMARY KEY (`Host`,`Db`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci COMMENT='Host privileges; Merged with database privileges' DROP TABLE t1; -SET @@character_set_database=DEFAULT; +SET @@collation_database=DEFAULT;
why?
# Now do the same, but doing 'ALTER DATABASE' to create the db.opt file, # instead of setting variables directly. # Emulate a pre-4.1 database without db.opt SHOW CREATE DATABASE db1; Database Create Database -db1 CREATE DATABASE `db1` /*!40100 DEFAULT CHARACTER SET utf8mb3 COLLATE utf8mb3_uca1400_ai_ci */ +db1 CREATE DATABASE `db1` /*!40100 DEFAULT CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci */ USE db1; -SELECT @@character_set_database, 'taken from defaults' AS comment; -@@character_set_database comment -utf8mb3 taken from defaults +SELECT @@collation_database, 'taken from defaults' AS comment; +@@collation_database comment +utf8mb3_general_ci taken from defaults USE test; ALTER DATABASE db1 DEFAULT CHARACTER SET latin1; USE db1; -SELECT @@character_set_database, 'taken from db.opt' AS comment; -@@character_set_database comment -latin1 taken from db.opt +SELECT @@collation_database, 'taken from db.opt' AS comment; +@@collation_database comment +latin1_swedish_ci taken from db.opt SELECT COUNT(*) FROM t1; ERROR HY000: Got error 190 "Incompatible key or row definition between the MariaDB .frm file and the information in the storage engine. You can try REPAIR TABLE ... USE_FRM possibly followed by ALTER TABLE ... FORCE or dump and restore the table to fix this" from storage engine MyISAM REPAIR TABLE t1 USE_FRM; diff --git a/mysql-test/main/endspace.test b/mysql-test/main/endspace.test --- a/mysql-test/main/endspace.test +++ b/mysql-test/main/endspace.test @@ -13,7 +13,7 @@ drop table if exists t1; # Test MyISAM tables. #
-create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)); +create table t1 (text1 varchar(32) not NULL, KEY key1 (text1)) charset=latin1;
why is that?
insert into t1 values ('teststring'), ('nothing'), ('teststring\t'); check table t1; select * from t1 ignore key (key1) where text1='teststring' or diff --git a/mysql-test/main/func_compress.test b/mysql-test/main/func_compress.test --- a/mysql-test/main/func_compress.test +++ b/mysql-test/main/func_compress.test @@ -1,5 +1,9 @@ -- source include/have_compress.inc -- source include/have_normal_zlib.inc + +--source include/test_db_charset_latin1.inc
same as in column_compression.test ?
+ + # # Test for compress and uncompress functions: # diff --git a/mysql-test/main/long_unique.result b/mysql-test/main/long_unique.result --- a/mysql-test/main/long_unique.result +++ b/mysql-test/main/long_unique.result @@ -41,7 +41,7 @@ Ignored NO
MyISAM file: DATADIR/test/t1 Record format: Packed -Character set: latin1_swedish_ci (8) +Character set: ? (0)
huh?
Data records: 10 Deleted blocks: 0 Recordlength: 12
diff --git a/mysql-test/main/mysqlbinlog_row_compressed.result b/mysql-test/main/mysqlbinlog_row_compressed.result --- a/mysql-test/main/mysqlbinlog_row_compressed.result +++ b/mysql-test/main/mysqlbinlog_row_compressed.result @@ -40,9 +40,10 @@ SET @@session.sql_mode=#/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C latin1 *//*!*/; SET @@session.character_set_client=latin1,@@session.collation_connection=8,@@session.collation_server=#/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_uca1400_ai_ci,ucs2=ucs2_uca1400_ai_ci,utf8mb4=utf8mb4_uca1400_ai_ci,utf16=utf16_uca1400_ai_ci,utf32=utf32_uca1400_ai_ci'/*!*/;
why did this appear?
SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; -CREATE TABLE t1 (pk INT PRIMARY KEY, f1 INT, f2 INT, f3 TINYINT, f4 MEDIUMINT, f5 BIGINT, f6 INT, f7 INT, f8 char(1)) +CREATE TABLE t1 (pk INT PRIMARY KEY, f1 INT, f2 INT, f3 TINYINT, f4 MEDIUMINT, f5 BIGINT, f6 INT, f7 INT, f8 char(1)) CHARSET=latin1 /*!*/; # at <pos> #<date> server id 1 end_log_pos # CRC32 XXX GTID 0-1-2 ddl thread_id=TID diff --git a/mysql-test/main/sp.result b/mysql-test/main/sp.result --- a/mysql-test/main/sp.result +++ b/mysql-test/main/sp.result @@ -6800,6 +6800,8 @@ DROP FUNCTION IF EXISTS f2; #
CREATE FUNCTION f1() RETURNS VARCHAR(65525) RETURN 'Hello'; +Warnings: +Note 1246 Converting column '' from VARCHAR to TEXT
this could use latin1, I suppose
CREATE FUNCTION f2() RETURNS TINYINT RETURN 1;
diff --git a/mysql-test/suite/binlog_encryption/rpl_special_charset.opt b/mysql-test/suite/binlog_encryption/rpl_special_charset.opt --- a/mysql-test/suite/binlog_encryption/rpl_special_charset.opt +++ b/mysql-test/suite/binlog_encryption/rpl_special_charset.opt @@ -1 +1 @@ ---character-set-server=utf16 +--character-set-server=utf16 --collation-server=utf16_general_ci
why?
diff --git a/mysql-test/suite/federated/assisted_discovery.test b/mysql-test/suite/federated/assisted_discovery.test --- a/mysql-test/suite/federated/assisted_discovery.test +++ b/mysql-test/suite/federated/assisted_discovery.test @@ -38,7 +38,7 @@ create table t1 ( d varchar(4096) not null, primary key (a), key (b,c,d(255)) -); +) CHARSET=latin1;
why?
show create table t1;
connection master; diff --git a/mysql-test/suite/funcs_1/r/is_columns_innodb.result b/mysql-test/suite/funcs_1/r/is_columns_innodb.result --- a/mysql-test/suite/funcs_1/r/is_columns_innodb.result +++ b/mysql-test/suite/funcs_1/r/is_columns_innodb.result @@ -739,6 +740,9 @@ WHERE table_schema LIKE 'test%' AND CHARACTER_OCTET_LENGTH / CHARACTER_MAXIMUM_LENGTH <> 1 ORDER BY CHARACTER_SET_NAME, COLLATION_NAME, COL_CML; COL_CML DATA_TYPE CHARACTER_SET_NAME COLLATION_NAME +4.0000 char utf8mb4 utf8mb4_uca1400_ai_ci +4.0000 enum utf8mb4 utf8mb4_uca1400_ai_ci +4.0000 set utf8mb4 utf8mb4_uca1400_ai_ci
I suppose this changed the intention of the test
Warnings: Warning 1365 Division by 0 Warning 1365 Division by 0 diff --git a/mysql-test/suite/gcol/r/innodb_virtual_index.result b/mysql-test/suite/gcol/r/innodb_virtual_index.result --- a/mysql-test/suite/gcol/r/innodb_virtual_index.result +++ b/mysql-test/suite/gcol/r/innodb_virtual_index.result @@ -114,7 +114,7 @@ KEY `vbidxcol` (`vbidxcol`), KEY `a_2` (`a`,`vbidxcol`), KEY `vbidxcol_2` (`vbidxcol`), FULLTEXT KEY `ftsic` (`c`,`b`) -) ENGINE=InnoDB; +) ENGINE=InnoDB CHARSET=latin1;
why latin1 everywhere in this file?
Warnings: Note 1831 Duplicate index `vbidxcol_2`. This is deprecated and will be disallowed in a future release ALTER TABLE ibstd_08 ADD COLUMN nc07006 BIGINT AUTO_INCREMENT NOT NULL , ADD KEY auto_nc07006(nc07006); diff --git a/mysql-test/suite/innodb/r/online_table_rebuild.result b/mysql-test/suite/innodb/r/online_table_rebuild.result --- a/mysql-test/suite/innodb/r/online_table_rebuild.result +++ b/mysql-test/suite/innodb/r/online_table_rebuild.result @@ -10,7 +10,7 @@ INSERT INTO t1 VALUES(2, repeat('b', 100), repeat('a', 100)); COMMIT; SET DEBUG_SYNC="now SIGNAL dml_commit"; connection default; -ERROR 23000: Duplicate entry 'bbbbbbbbbb' for key 'f2' +ERROR 23000: Duplicate entry 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' for key 'f2'
why?
connection default; SET DEBUG_SYNC="inplace_after_index_build SIGNAL dml_start WAIT_FOR dml_commit"; ALTER TABLE t1 ADD PRIMARY KEY(f1); diff --git a/mysql-test/suite/innodb/t/alter_primary_key.test b/mysql-test/suite/innodb/t/alter_primary_key.test --- a/mysql-test/suite/innodb/t/alter_primary_key.test +++ b/mysql-test/suite/innodb/t/alter_primary_key.test @@ -1,6 +1,9 @@ --source innodb_default_row_format.inc --source include/have_debug.inc --source include/have_debug_sync.inc +--disable_query_log +--source include/test_db_charset_latin1.inc +--enable_query_log
you don't disable query log in other tests, why here?
--echo # --echo # MDEV-23244 ALTER TABLE…ADD PRIMARY KEY fails to flag diff --git a/mysql-test/suite/perfschema/t/short_option_1-master.opt b/mysql-test/suite/perfschema/t/short_option_1-master.opt --- a/mysql-test/suite/perfschema/t/short_option_1-master.opt +++ b/mysql-test/suite/perfschema/t/short_option_1-master.opt @@ -1 +1 @@ --a -Cutf8 -W1 +-a -Cutf8 -W1 --collation-server=utf8_general_ci
was it necessary?
diff --git a/mysql-test/suite/rpl/include/rpl_charset.inc b/mysql-test/suite/rpl/include/rpl_charset.inc --- a/mysql-test/suite/rpl/include/rpl_charset.inc +++ b/mysql-test/suite/rpl/include/rpl_charset.inc @@ -145,4 +145,11 @@ sync_slave_with_master; --echo #
+connection master; +SET GLOBAL collation_server=utf8mb4_uca1400_ai_ci; +SET SESSION collation_server=utf8mb4_uca1400_ai_ci; +connection slave; +SET GLOBAL collation_server=utf8mb4_uca1400_ai_ci; +SET SESSION collation_server=utf8mb4_uca1400_ai_ci;
why?
+ --source include/rpl_end.inc diff --git a/mysql-test/suite/sys_vars/inc/secure_timestamp_func.inc b/mysql-test/suite/sys_vars/inc/secure_timestamp_func.inc --- a/mysql-test/suite/sys_vars/inc/secure_timestamp_func.inc +++ b/mysql-test/suite/sys_vars/inc/secure_timestamp_func.inc @@ -58,7 +58,7 @@ set time_zone='+00:00'; set timestamp=1234567890.101112; select @@timestamp, now(6);
-create table t1 (b varchar(20), a timestamp(6) default current_timestamp(6)); +create table t1 (b varchar(20), a timestamp(6) default current_timestamp(6)) charset=latin1;
why? charset shouldn't affect secure_timestamp functionality (same in all other secure_timestamp tests)
insert t1 (b) values ('replicated'); sync_slave_with_master; create trigger t1rbr before insert on t1 for each row set new.a=now(6); diff --git a/storage/connect/mysql-test/connect/r/bson.result b/storage/connect/mysql-test/connect/r/bson.result --- a/storage/connect/mysql-test/connect/r/bson.result +++ b/storage/connect/mysql-test/connect/r/bson.result @@ -321,7 +321,7 @@ WHO CHAR(12), WEEK INT(2) JPATH='$.WEEK[2].NUMBER', WHAT CHAR(32) JPATH='$.WEEK[2].EXPENSE[*].WHAT', AMOUNT DOUBLE(8,2) JPATH='$.WEEK[2].EXPENSE[*].AMOUNT') -ENGINE=CONNECT TABLE_TYPE=BSON FILE_NAME='expense.json'; +ENGINE=CONNECT CHARSET=latin1 TABLE_TYPE=BSON FILE_NAME='expense.json';
is it necessary? are there any connect/bson tests for utf8mb4?
SELECT * FROM t4; WHO WEEK WHAT AMOUNT Joe 5 Beer 14.00 diff --git a/storage/connect/mysql-test/connect/r/endian.result b/storage/connect/mysql-test/connect/r/endian.result --- a/storage/connect/mysql-test/connect/r/endian.result +++ b/storage/connect/mysql-test/connect/r/endian.result @@ -10,7 +10,7 @@ birth DATE NOT NULL FIELD_FORMAT='L', id CHAR(5) NOT NULL FIELD_FORMAT='L2', salary DOUBLE(9,2) NOT NULL DEFAULT 0.00 FIELD_FORMAT='LF', dept INT(4) NOT NULL FIELD_FORMAT='L2' -) ENGINE=CONNECT TABLE_TYPE=BIN BLOCK_SIZE=5 FILE_NAME='Testbal.dat'; +) ENGINE=CONNECT CHARSET=latin1 TABLE_TYPE=BIN BLOCK_SIZE=5 FILE_NAME='Testbal.dat';
or any connect utf8mb4 tests whatsoever?
SELECT * FROM t1; fig name birth id salary dept 5500 ARCHIBALD 1980-01-25 3789 4380.50 318 diff --git a/storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result b/storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result --- a/storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result +++ b/storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS t1,t2; -CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1; +CREATE TABLE t1 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1 CHARSET=latin1;
again. was it necessary? are there any rocksdb tests with utf8mb4?
INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'); -CREATE TABLE t2 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1; +CREATE TABLE t2 (a INT PRIMARY KEY, b CHAR(8)) ENGINE=rocksdb CHECKSUM=1 CHARSET=latin1; CHECKSUM TABLE t1; Table Checksum test.t1 4259194219
Regards, Sergei Chief Architect, MariaDB Server and security@mariadb.org