#At lp:maria based on revid:monty@askmonty.org-20091203113411-cmr8g2lcp45n0prv 2776 Michael Widenius 2009-12-03 Applied patch from to fix some problems with Croatian character set and LIKE queries Author: Alexander Barkov License: GPL modified: mysql-test/r/ctype_ucs.result mysql-test/t/ctype_ucs.test strings/ctype-ucs2.c per-file messages: mysql-test/t/ctype_ucs.test Added test case for Croatina character set === modified file 'mysql-test/r/ctype_ucs.result' --- a/mysql-test/r/ctype_ucs.result 2008-12-23 14:21:01 +0000 +++ b/mysql-test/r/ctype_ucs.result 2009-12-03 12:02:37 +0000 @@ -1211,3 +1211,47 @@ HEX(DAYNAME(19700101)) 0427043504420432043504400433 SET character_set_connection=latin1; End of 5.0 tests +Start of 5.1 tests +SET NAMES utf8; +CREATE TABLE t1 ( +a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci, +key(a) +); +INSERT INTO t1 VALUES +('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'), +('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'), +('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz'); +INSERT INTO t1 VALUES ('ca'),('cz'),('ch'); +INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E'); +EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index +EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 30 Using where; Using index +SELECT * FROM t1 WHERE a LIKE 'c%'; +a +ca +cc +cz +ch +EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index +SELECT * FROM t1 WHERE a LIKE 'ch%'; +a +ch +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci; +EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index +SELECT hex(concat('d',_ucs2 0x017E,'%')); +hex(concat('d',_ucs2 0x017E,'%')) +0064017E0025 +EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index +SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%'); +hex(a) +0064017E +DROP TABLE t1; === modified file 'mysql-test/t/ctype_ucs.test' --- a/mysql-test/t/ctype_ucs.test 2008-12-23 14:21:01 +0000 +++ b/mysql-test/t/ctype_ucs.test 2009-12-03 12:02:37 +0000 @@ -723,3 +723,34 @@ SELECT HEX(DAYNAME(19700101)); SET character_set_connection=latin1; --echo End of 5.0 tests + + +--echo Start of 5.1 tests +# +# Checking my_like_range_ucs2 +# +SET NAMES utf8; +CREATE TABLE t1 ( + a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci, + key(a) +); +INSERT INTO t1 VALUES +('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'), +('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'), +('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz'); +INSERT INTO t1 VALUES ('ca'),('cz'),('ch'); +INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E'); +# This one should scan only one row +EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%'; +# This one should scan many rows: 'c' is a contraction head +EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%'; +SELECT * FROM t1 WHERE a LIKE 'c%'; +EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%'; +SELECT * FROM t1 WHERE a LIKE 'ch%'; +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci; +EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%'; +SELECT hex(concat('d',_ucs2 0x017E,'%')); +EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%'); +SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%'); + +DROP TABLE t1; === modified file 'strings/ctype-ucs2.c' --- a/strings/ctype-ucs2.c 2009-11-30 12:42:24 +0000 +++ b/strings/ctype-ucs2.c 2009-12-03 12:02:37 +0000 @@ -1498,6 +1498,14 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO } } + +static inline my_wc_t +ucs2_to_wc(const uchar *ptr) +{ + return (((uint) ptr[0]) << 8) + ptr[1]; +} + + /* ** Calculate min_str and max_str that ranges a LIKE string. ** Arguments: @@ -1531,6 +1539,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0 ; ptr+=2, charlen--) { + my_wc_t wc; if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end) { ptr+=2; /* Skip escape */ @@ -1567,9 +1576,9 @@ fill_max_and_min: } if (have_contractions && ptr + 3 < end && - ptr[0] == '\0' && - my_uca_can_be_contraction_head(cs, (uchar) ptr[1])) + my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr)))) { + my_wc_t wc2; /* Contraction head found */ if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many)) { @@ -1581,9 +1590,8 @@ fill_max_and_min: Check if the second letter can be contraction part, and if two letters really produce a contraction. */ - if (ptr[2] == '\0' && - my_uca_can_be_contraction_tail(cs, (uchar) ptr[3]) && - my_uca_contraction2_weight(cs,(uchar) ptr[1], (uchar) ptr[3])) + if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) && + my_uca_contraction2_weight(cs, wc , wc2)) { /* Contraction found */ if (charlen == 1 || min_str + 2 >= min_end)