[Maria-developers] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (monty:2715)
#At lp:maria based on revid:monty@askmonty.org-20090630120129-6gan4k9dyjxj83e4 2715 Michael Widenius 2009-07-02 Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f. This code is based on Alexander Barkov's code that he has done in MySQL 6.0 modified: include/m_ctype.h libmysqld/lib_sql.cc mysys/charset.c scripts/mysql_install_db.sh sql/protocol.cc sql/protocol.h sql/sql_string.cc strings/conf_to_src.c strings/ctype-extra.c strings/ctype-sjis.c strings/ctype-uca.c strings/ctype-ucs2.c strings/ctype-utf8.c strings/ctype.c per-file messages: include/m_ctype.h Added MY_CS_NONASCII marker libmysqld/lib_sql.cc Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments mysys/charset.c Mark character sets with MY_CS_NONASCII scripts/mysql_install_db.sh Fixed messages to refer to MariaDB instead of MySQL sql/protocol.cc Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/protocol.h Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/sql_string.cc Quicker copy of strings with no characters above 0x7f strings/conf_to_src.c Added printing of MY_CS_NONASCII strings/ctype-extra.c Mark incompatible character sets with MY_CS_NONASCII Removed duplicated character set geostd strings/ctype-sjis.c Mark incompatible character sets with MY_CS_NONASCII strings/ctype-uca.c Mark incompatible character sets with MY_CS_NONASCII strings/ctype-ucs2.c Mark incompatible character sets with MY_CS_NONASCII strings/ctype-utf8.c Mark incompatible character sets with MY_CS_NONASCII strings/ctype.c Added function to check if character set is compatible with latin1 in ranges 0x00-0x7f === modified file 'include/m_ctype.h' --- a/include/m_ctype.h 2008-12-23 14:21:01 +0000 +++ b/include/m_ctype.h 2009-07-02 10:15:33 +0000 @@ -87,6 +87,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_CS_CSSORT 1024 /* if case sensitive sort order */ #define MY_CS_HIDDEN 2048 /* don't display in SHOW */ #define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */ +#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */ #define MY_CHARSET_UNDEFINED 0 /* Character repertoire flags */ @@ -517,6 +518,7 @@ uint my_charset_repertoire(CHARSET_INFO #define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b))) #define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num)) +my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs); #define use_mb(s) ((s)->cset->ismbchar != NULL) #define my_ismbchar(s, a, b) ((s)->cset->ismbchar((s), (a), (b))) === modified file 'libmysqld/lib_sql.cc' --- a/libmysqld/lib_sql.cc 2009-02-24 11:29:49 +0000 +++ b/libmysqld/lib_sql.cc 2009-07-02 10:15:33 +0000 @@ -1124,6 +1124,7 @@ bool Protocol_text::store_null() return false; } + bool Protocol::net_store_data(const uchar *from, size_t length) { char *field_buf; @@ -1143,6 +1144,30 @@ bool Protocol::net_store_data(const ucha return FALSE; } + +bool Protocol::net_store_data(const uchar *from, size_t length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +{ + uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen; + uint dummy_error; + char *field_buf; + if (!thd->mysql) // bootstrap file handling + return false; + + if (!(field_buf= (char*) alloc_root(alloc, conv_length + sizeof(uint) + 1))) + return true; + *next_field= field_buf + sizeof(uint); + length= copy_and_convert(*next_field, conv_length, to_cs, + (const char*) from, length, from_cs, &dummy_error); + *(uint *) field_buf= length; + (*next_field)[length]= 0; + if (next_mysql_field->max_length < length) + next_mysql_field->max_length= length; + ++next_field; + ++next_mysql_field; + return false; +} + #if defined(_MSC_VER) && _MSC_VER < 1400 #define vsnprintf _vsnprintf #endif === modified file 'mysys/charset.c' --- a/mysys/charset.c 2009-02-13 16:41:47 +0000 +++ b/mysys/charset.c 2009-07-02 10:15:33 +0000 @@ -248,6 +248,7 @@ static int add_collation(CHARSET_INFO *c { #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS) copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci); + newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII; #endif } else if (!strcmp(cs->csname, "utf8")) @@ -280,6 +281,8 @@ static int add_collation(CHARSET_INFO *c if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number])) all_charsets[cs->number]->state|= MY_CS_PUREASCII; + if (!my_charset_is_ascii_compatible(cs)) + all_charsets[cs->number]->state|= MY_CS_NONASCII; } } else === modified file 'scripts/mysql_install_db.sh' --- a/scripts/mysql_install_db.sh 2009-01-06 15:08:15 +0000 +++ b/scripts/mysql_install_db.sh 2009-07-02 10:15:33 +0000 @@ -1,5 +1,5 @@ #!/bin/sh -# Copyright (C) 2002-2003 MySQL AB +# Copyright (C) 2002-2003 MySQL AB & Monty Program Ab # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,7 +14,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -# This scripts creates the MySQL Server system tables +# This scripts creates the MariaDB Server system tables # # All unrecognized arguments to this script are passed to mysqld. @@ -38,26 +38,27 @@ usage() { cat <<EOF Usage: $0 [OPTIONS] - --basedir=path The path to the MySQL installation directory. + --basedir=path The path to the MariaDB installation directory. --builddir=path If using --srcdir with out-of-directory builds, you will need to set this to the location of the build directory where built files reside. - --cross-bootstrap For internal use. Used when building the MySQL system + --cross-bootstrap For internal use. Used when building the MariaDB system tables on a different host than the target. - --datadir=path The path to the MySQL data directory. + --datadir=path The path to the MariaDB data directory. --force Causes mysql_install_db to run even if DNS does not work. In that case, grant table entries that normally use hostnames will use IP addresses. - --ldata=path The path to the MySQL data directory. Same as --datadir. + --ldata=path The path to the MariaDB data directory. Same as + --datadir. --rpm For internal use. This option is used by RPM files - during the MySQL installation process. + during the MariaDB installation process. --skip-name-resolve Use IP addresses rather than hostnames when creating grant table entries. This option can be useful if your DNS does not work. - --srcdir=path The path to the MySQL source directory. This option + --srcdir=path The path to the MariaDB source directory. This option uses the compiled binaries and support files within the source tree, useful for if you don't want to install - MySQL yet and just want to create the system tables. + MariaDB yet and just want to create the system tables. --user=user_name The login username to use for running mysqld. Files and directories created by mysqld will be owned by this user. You must be root to use this option. By default @@ -116,7 +117,7 @@ parse_arguments() defaults="$arg" ;; --cross-bootstrap|--windows) - # Used when building the MySQL system tables on a different host than + # Used when building the MariaDB system tables on a different host than # the target. The platform-independent files that are created in # --datadir on the host can be copied to the target system. # @@ -338,10 +339,10 @@ then fi echo "WARNING: The host '$hostname' could not be looked up with resolveip." echo "This probably means that your libc libraries are not 100 % compatible" - echo "with this binary MySQL version. The MySQL daemon, mysqld, should work" + echo "with this binary MariaDB version. The MariaDB daemon, mysqld, should work" echo "normally with the exception that host name resolving will not work." echo "This means that you should use IP addresses instead of hostnames" - echo "when specifying MySQL privileges !" + echo "when specifying MariaDB privileges !" fi fi @@ -388,7 +389,7 @@ mysqld_install_cmd_line="$mysqld_bootstr --net_buffer_length=16K" # Create the system and help tables by passing them to "mysqld --bootstrap" -s_echo "Installing MySQL system tables..." +s_echo "Installing MariaDB/MySQL system tables..." if { echo "use mysql;"; cat $create_system_tables $fill_system_tables; } | eval "$filter_cmd_line" | $mysqld_install_cmd_line > /dev/null then s_echo "OK" @@ -410,14 +411,16 @@ else echo "Try 'mysqld --help' if you have problems with paths. Using --log" echo "gives you a log in $ldata that may be helpful." echo - echo "The latest information about MySQL is available on the web at" - echo "http://www.mysql.com/. Please consult the MySQL manual section" + echo "The latest information about MariaDB is available on the web at" + echo "http://askmonty.org/wiki/index.php/MariaDB". + echo "If you have a problem, you can consult the MySQL manual section" echo "'Problems running mysql_install_db', and the manual section that" - echo "describes problems on your OS. Another information source are the" - echo "MySQL email archives available at http://lists.mysql.com/." + echo "describes problems on your OS at http://dev.mysql.com/doc/" + echo "MariaDB is hosted on launchpad; You can find the latest source and" + echo "email lists at http://launchpad.net/maria" echo echo "Please check all of the above before mailing us! And remember, if" - echo "you do mail us, you MUST use the $scriptdir/mysqlbug script!" + echo "you do mail us, you should use the $scriptdir/mysqlbug script!" echo exit 1 fi @@ -442,7 +445,7 @@ then s_echo "support-files/mysql.server to the right place for your system" echo - echo "PLEASE REMEMBER TO SET A PASSWORD FOR THE MySQL root USER !" + echo "PLEASE REMEMBER TO SET A PASSWORD FOR THE MariaDB root USER !" echo "To do so, start the server, then issue the following commands:" echo echo "$bindir/mysqladmin -u root password 'new-password'" @@ -455,23 +458,28 @@ then echo "databases and anonymous user created by default. This is" echo "strongly recommended for production servers." echo - echo "See the manual for more instructions." + echo "See the MySQL manual for more instructions." if test "$in_rpm" -eq 0 then echo - echo "You can start the MySQL daemon with:" + echo "You can start the MariaDB daemon with:" echo "cd $basedir ; $bindir/mysqld_safe &" echo - echo "You can test the MySQL daemon with mysql-test-run.pl" + echo "You can test the MariaDB daemon with mysql-test-run.pl" echo "cd $basedir/mysql-test ; perl mysql-test-run.pl" fi echo echo "Please report any problems with the $scriptdir/mysqlbug script!" echo - echo "The latest information about MySQL is available at http://www.mysql.com/" - echo "Support MySQL by buying support/licenses from http://shop.mysql.com/" + echo "The latest information about MariaDB is available at http://www.askmonty.org/." + echo "You can find additional information about the MySQL part at:" + echo "http://dev.mysql.com" + echo "Support MariaDB development by buying support/new features from" + echo "Monty Program Ab. You can contact us about this at sales@askmonty.org". + echo "Alternatively consider joining our community based development effort:" + echo "http://askmonty.org/wiki/index.php/MariaDB#How_can_I_participate_in_the_deve..." echo fi === modified file 'sql/protocol.cc' --- a/sql/protocol.cc 2009-04-25 10:05:32 +0000 +++ b/sql/protocol.cc 2009-07-02 10:15:33 +0000 @@ -58,6 +58,65 @@ bool Protocol_binary::net_store_data(con } +/* + net_store_data() - extended version with character set conversion. + + It is optimized for short strings whose length after + conversion is garanteed to be less than 251, which accupies + exactly one byte to store length. It allows not to use + the "convert" member as a temporary buffer, conversion + is done directly to the "packet" member. + The limit 251 is good enough to optimize send_fields() + because column, table, database names fit into this limit. +*/ + +#ifndef EMBEDDED_LIBRARY +bool Protocol::net_store_data(const uchar *from, size_t length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +{ + uint dummy_errors; + /* Calculate maxumum possible result length */ + size_t conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen; + ulong packet_length, new_length; + char *length_pos, *to; + + if (conv_length > 250) + { + /* + For strings with conv_length greater than 250 bytes + we don't know how many bytes we will need to store length: one or two, + because we don't know result length until conversion is done. + For example, when converting from utf8 (mbmaxlen=3) to latin1, + conv_length=300 means that the result length can vary between 100 to 300. + length=100 needs one byte, length=300 needs to bytes. + + Thus conversion directly to "packet" is not worthy. + Let's use "convert" as a temporary buffer. + */ + return (convert->copy((const char*) from, length, from_cs, to_cs, + &dummy_errors) || + net_store_data((const uchar*) convert->ptr(), convert->length())); + } + + packet_length= packet->length(); + new_length= packet_length + conv_length + 1; + + if (new_length > packet->alloced_length() && packet->realloc(new_length)) + return 1; + + length_pos= (char*) packet->ptr() + packet_length; + to= length_pos + 1; + + to+= copy_and_convert(to, conv_length, to_cs, + (const char*) from, length, from_cs, &dummy_errors); + + net_store_length((uchar*) length_pos, to - length_pos - 1); + packet->length((uint) (to - packet->ptr())); + return 0; +} +#endif + + /** Send a error string to client. @@ -773,10 +832,10 @@ bool Protocol::store_string_aux(const ch fromcs != &my_charset_bin && tocs != &my_charset_bin) { - uint dummy_errors; - return (convert->copy(from, length, fromcs, tocs, &dummy_errors) || - net_store_data((uchar*) convert->ptr(), convert->length())); + /* Store with conversion */ + return net_store_data((uchar*) from, length, fromcs, tocs); } + /* Store without conversion */ return net_store_data((uchar*) from, length); } @@ -802,7 +861,7 @@ bool Protocol_text::store(const char *fr { CHARSET_INFO *tocs= this->thd->variables.character_set_results; #ifndef DBUG_OFF - DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %*s", field_pos, + DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %.*s", field_pos, field_count, (int) length, from)); DBUG_ASSERT(field_pos < field_count); DBUG_ASSERT(field_types == 0 || === modified file 'sql/protocol.h' --- a/sql/protocol.h 2007-12-20 21:11:37 +0000 +++ b/sql/protocol.h 2009-07-02 10:15:33 +0000 @@ -42,6 +42,8 @@ protected: MYSQL_FIELD *next_mysql_field; MEM_ROOT *alloc; #endif + bool net_store_data(const uchar *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs); bool store_string_aux(const char *from, size_t length, CHARSET_INFO *fromcs, CHARSET_INFO *tocs); public: === modified file 'sql/sql_string.cc' --- a/sql/sql_string.cc 2009-04-25 10:05:32 +0000 +++ b/sql/sql_string.cc 2009-07-02 10:15:33 +0000 @@ -782,10 +782,11 @@ String *copy_if_not_alloced(String *to,S */ -uint32 -copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, - const char *from, uint32 from_length, CHARSET_INFO *from_cs, - uint *errors) +static uint32 +copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, + CHARSET_INFO *from_cs, + uint *errors) { int cnvres; my_wc_t wc; @@ -900,6 +901,65 @@ my_copy_with_hex_escaping(CHARSET_INFO * } /* + Optimized for quick copying of ASCII characters in the range 0x00..0x7F. +*/ +uint32 +copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, CHARSET_INFO *from_cs, + uint *errors) +{ + /* + If any of the character sets is not ASCII compatible, + immediately switch to slow mb_wc->wc_mb method. + */ + if ((to_cs->state | from_cs->state) & MY_CS_NONASCII) + return copy_and_convert_extended(to, to_length, to_cs, + from, from_length, from_cs, errors); + + uint32 length= min(to_length, from_length), length2= length; + +#if defined(__i386__) + /* + Special loop for i386, it allows to refer to a + non-aligned memory block as UINT32, which makes + it possible to copy four bytes at once. This + gives about 10% performance improvement comparing + to byte-by-byte loop. + */ + for ( ; length >= 4; length-= 4, from+= 4, to+= 4) + { + if ((*(uint32*)from) & 0x80808080) + break; + *((uint32*) to)= *((const uint32*) from); + } +#endif + + for (; ; *to++= *from++, length--) + { + if (!length) + { + *errors= 0; + return length2; + } + if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */ + { + uint32 copied_length= length2 - length; + to_length-= copied_length; + from_length-= copied_length; + return copied_length + copy_and_convert_extended(to, to_length, + to_cs, + from, from_length, + from_cs, + errors); + } + } + + DBUG_ASSERT(FALSE); // Should never get to here + return 0; // Make compiler happy +} + + +/* copy a string, with optional character set conversion, with optional left padding (for binary -> UCS2 conversion) === modified file 'strings/conf_to_src.c' --- a/strings/conf_to_src.c 2008-11-14 16:29:38 +0000 +++ b/strings/conf_to_src.c 2009-07-02 10:15:33 +0000 @@ -184,11 +184,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs) { fprintf(f,"{\n"); fprintf(f," %d,%d,%d,\n",cs->number,0,0); - fprintf(f," MY_CS_COMPILED%s%s%s%s,\n", + fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n", cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "", cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "", is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "", - my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : ""); + my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "", + !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": ""); if (cs->name) { === modified file 'strings/ctype-extra.c' --- a/strings/ctype-extra.c 2007-08-20 11:47:31 +0000 +++ b/strings/ctype-extra.c 2009-07-02 10:15:33 +0000 @@ -6804,7 +6804,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_swe7 { 10,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_NONASCII, "swe7", /* cset name */ "swe7_swedish_ci", /* coll name */ "", /* comment */ @@ -8454,7 +8454,7 @@ CHARSET_INFO compiled_charsets[] = { #ifdef HAVE_CHARSET_swe7 { 82,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, "swe7", /* cset name */ "swe7_bin", /* coll name */ "", /* comment */ @@ -8550,72 +8550,6 @@ CHARSET_INFO compiled_charsets[] = { } , #endif -#ifdef HAVE_CHARSET_geostd8 -{ - 92,0,0, - MY_CS_COMPILED|MY_CS_PRIMARY, - "geostd8", /* cset name */ - "geostd8_general_ci", /* coll name */ - "", /* comment */ - NULL, /* tailoring */ - ctype_geostd8_general_ci, /* ctype */ - to_lower_geostd8_general_ci, /* lower */ - to_upper_geostd8_general_ci, /* upper */ - sort_order_geostd8_general_ci, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ - to_uni_geostd8_general_ci, /* to_uni */ - NULL, /* from_uni */ - my_unicase_default, /* caseinfo */ - NULL, /* state map */ - NULL, /* ident map */ - 1, /* strxfrm_multiply*/ - 1, /* caseup_multiply*/ - 1, /* casedn_multiply*/ - 1, /* mbminlen */ - 1, /* mbmaxlen */ - 0, /* min_sort_char */ - 255, /* max_sort_char */ - ' ', /* pad_char */ - 0, /* escape_with_backslash_is_dangerous */ - &my_charset_8bit_handler, - &my_collation_8bit_simple_ci_handler, -} -, -#endif -#ifdef HAVE_CHARSET_geostd8 -{ - 93,0,0, - MY_CS_COMPILED|MY_CS_BINSORT, - "geostd8", /* cset name */ - "geostd8_bin", /* coll name */ - "", /* comment */ - NULL, /* tailoring */ - ctype_geostd8_bin, /* ctype */ - to_lower_geostd8_bin, /* lower */ - to_upper_geostd8_bin, /* upper */ - NULL, /* sort_order */ - NULL, /* contractions */ - NULL, /* sort_order_big*/ - to_uni_geostd8_bin, /* to_uni */ - NULL, /* from_uni */ - my_unicase_default, /* caseinfo */ - NULL, /* state map */ - NULL, /* ident map */ - 1, /* strxfrm_multiply*/ - 1, /* caseup_multiply*/ - 1, /* casedn_multiply*/ - 1, /* mbminlen */ - 1, /* mbmaxlen */ - 0, /* min_sort_char */ - 255, /* max_sort_char */ - ' ', /* pad_char */ - 0, /* escape_with_backslash_is_dangerous */ - &my_charset_8bit_handler, - &my_collation_8bit_bin_handler, -} -, -#endif #ifdef HAVE_CHARSET_latin1 { 94,0,0, === modified file 'strings/ctype-sjis.c' --- a/strings/ctype-sjis.c 2007-10-04 07:10:15 +0000 +++ b/strings/ctype-sjis.c 2009-07-02 10:15:33 +0000 @@ -4672,7 +4672,7 @@ static MY_CHARSET_HANDLER my_charset_han CHARSET_INFO my_charset_sjis_japanese_ci= { 13,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM, /* state */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_NONASCII, /* state */ "sjis", /* cs name */ "sjis_japanese_ci", /* name */ "", /* comment */ @@ -4704,7 +4704,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci CHARSET_INFO my_charset_sjis_bin= { 88,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT, /* state */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_NONASCII, /* state */ "sjis", /* cs name */ "sjis_bin", /* name */ "", /* comment */ === modified file 'strings/ctype-uca.c' --- a/strings/ctype-uca.c 2007-07-03 09:06:57 +0000 +++ b/strings/ctype-uca.c 2009-07-02 10:15:33 +0000 @@ -8086,7 +8086,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_u CHARSET_INFO my_charset_ucs2_unicode_ci= { 128,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_unicode_ci", /* name */ "", /* comment */ @@ -8118,7 +8118,7 @@ CHARSET_INFO my_charset_ucs2_unicode_ci= CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= { 129,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_icelandic_ci",/* name */ "", /* comment */ @@ -8150,7 +8150,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_u CHARSET_INFO my_charset_ucs2_latvian_uca_ci= { 130,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_latvian_ci", /* name */ "", /* comment */ @@ -8182,7 +8182,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca CHARSET_INFO my_charset_ucs2_romanian_uca_ci= { 131,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_romanian_ci", /* name */ "", /* comment */ @@ -8214,7 +8214,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uc CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= { 132,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovenian_ci",/* name */ "", /* comment */ @@ -8246,7 +8246,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_u CHARSET_INFO my_charset_ucs2_polish_uca_ci= { 133,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_polish_ci", /* name */ "", /* comment */ @@ -8278,7 +8278,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ CHARSET_INFO my_charset_ucs2_estonian_uca_ci= { 134,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_estonian_ci", /* name */ "", /* comment */ @@ -8310,7 +8310,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uc CHARSET_INFO my_charset_ucs2_spanish_uca_ci= { 135,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish_ci", /* name */ "", /* comment */ @@ -8342,7 +8342,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca CHARSET_INFO my_charset_ucs2_swedish_uca_ci= { 136,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_swedish_ci", /* name */ "", /* comment */ @@ -8374,7 +8374,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca CHARSET_INFO my_charset_ucs2_turkish_uca_ci= { 137,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_turkish_ci", /* name */ "", /* comment */ @@ -8406,7 +8406,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca CHARSET_INFO my_charset_ucs2_czech_uca_ci= { 138,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_czech_ci", /* name */ "", /* comment */ @@ -8439,7 +8439,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_c CHARSET_INFO my_charset_ucs2_danish_uca_ci= { 139,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_danish_ci", /* name */ "", /* comment */ @@ -8471,7 +8471,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= { 140,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_lithuanian_ci",/* name */ "", /* comment */ @@ -8503,7 +8503,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_ CHARSET_INFO my_charset_ucs2_slovak_uca_ci= { 141,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_slovak_ci", /* name */ "", /* comment */ @@ -8535,7 +8535,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= { 142,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_spanish2_ci", /* name */ "", /* comment */ @@ -8568,7 +8568,7 @@ CHARSET_INFO my_charset_ucs2_spanish2_uc CHARSET_INFO my_charset_ucs2_roman_uca_ci= { 143,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_roman_ci", /* name */ "", /* comment */ @@ -8601,7 +8601,7 @@ CHARSET_INFO my_charset_ucs2_roman_uca_c CHARSET_INFO my_charset_ucs2_persian_uca_ci= { 144,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_persian_ci", /* name */ "", /* comment */ @@ -8634,7 +8634,7 @@ CHARSET_INFO my_charset_ucs2_persian_uca CHARSET_INFO my_charset_ucs2_esperanto_uca_ci= { 145,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_esperanto_ci",/* name */ "", /* comment */ @@ -8667,7 +8667,7 @@ CHARSET_INFO my_charset_ucs2_esperanto_u CHARSET_INFO my_charset_ucs2_hungarian_uca_ci= { 146,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_hungarian_ci",/* name */ "", /* comment */ === modified file 'strings/ctype-ucs2.c' --- a/strings/ctype-ucs2.c 2009-02-13 16:41:47 +0000 +++ b/strings/ctype-ucs2.c 2009-07-02 10:15:33 +0000 @@ -1717,7 +1717,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handl CHARSET_INFO my_charset_ucs2_general_ci= { 35,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_general_ci", /* name */ "", /* comment */ @@ -1749,7 +1749,7 @@ CHARSET_INFO my_charset_ucs2_general_ci= CHARSET_INFO my_charset_ucs2_bin= { 90,0,0, /* number */ - MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE, + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, "ucs2", /* cs name */ "ucs2_bin", /* name */ "", /* comment */ === modified file 'strings/ctype-utf8.c' --- a/strings/ctype-utf8.c 2008-02-11 12:28:33 +0000 +++ b/strings/ctype-utf8.c 2009-07-02 10:15:33 +0000 @@ -4204,7 +4204,7 @@ static MY_CHARSET_HANDLER my_charset_fil CHARSET_INFO my_charset_filename= { 17,0,0, /* number */ - MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN, + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_HIDDEN|MY_CS_NONASCII, "filename", /* cs name */ "filename", /* name */ "", /* comment */ === modified file 'strings/ctype.c' --- a/strings/ctype.c 2009-04-25 10:05:32 +0000 +++ b/strings/ctype.c 2009-07-02 10:15:33 +0000 @@ -405,3 +405,23 @@ my_charset_is_8bit_pure_ascii(CHARSET_IN } return 1; } + + +/* + Shared function between conf_to_src and mysys. + Check if a 8bit character set is compatible with + ascii on the range 0x00..0x7F. +*/ +my_bool +my_charset_is_ascii_compatible(CHARSET_INFO *cs) +{ + uint i; + if (!cs->tab_to_uni) + return 1; + for (i= 0; i < 128; i++) + { + if (cs->tab_to_uni[i] != i) + return 0; + } + return 1; +}
participants (1)
-
Michael Widenius