Re: [Maria-developers] afedc29f773: MDEV-14024 PCRE2.
Hi, Alexey! Thanks! Few minor comments, see below On Nov 23, Alexey Botchkov wrote:
revision-id: afedc29f773362f342756e9cdd9bbd9ed0f1abe2 (versioning-1.0.3-68-gafedc29f773) parent(s): 4ec8598c1dcb63499bce998142b8e5b8b09b2d30 author: Alexey Botchkov <holyfoot@askmonty.org> committer: Alexey Botchkov <holyfoot@askmonty.org> timestamp: 2018-06-26 14:10:46 +0400 message:
MDEV-14024 PCRE2.
Related changes in the server code.
diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake index 4c113929866..264f93a60a2 100644 --- a/cmake/pcre.cmake +++ b/cmake/pcre.cmake @@ -5,24 +5,17 @@ SET(WITH_PCRE "auto" CACHE STRING
MACRO (CHECK_PCRE) IF(WITH_PCRE STREQUAL "system" OR WITH_PCRE STREQUAL "auto") - CHECK_LIBRARY_EXISTS(pcre pcre_stack_guard "" HAVE_PCRE_STACK_GUARD) + CHECK_LIBRARY_EXISTS(pcre2-8 pcre2_match "" HAVE_PCRE2) - IF(NOT CMAKE_CROSSCOMPILING) - SET(CMAKE_REQUIRED_LIBRARIES "pcre") - CHECK_C_SOURCE_RUNS(" - #include <pcre.h> - int main() { - return -pcre_exec(NULL, NULL, NULL, -999, -999, 0, NULL, 0) < 256; - }" PCRE_STACK_SIZE_OK) - SET(CMAKE_REQUIRED_LIBRARIES) - ENDIF() ENDIF() - IF(NOT HAVE_PCRE_STACK_GUARD OR NOT PCRE_STACK_SIZE_OK OR - WITH_PCRE STREQUAL "bundled") + IF(NOT HAVE_PCRE2 OR WITH_PCRE STREQUAL "bundled") IF (WITH_PCRE STREQUAL "system") - MESSAGE(FATAL_ERROR "system pcre is not found or unusable") + MESSAGE(FATAL_ERROR "system pcre2-8 library is not found or unusable") ENDIF() - SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre ${CMAKE_SOURCE_DIR}/pcre) - ADD_SUBDIRECTORY(pcre) + SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre2 ${CMAKE_SOURCE_DIR}/pcre2 + ${CMAKE_BINARY_DIR}/pcre2/src ${CMAKE_SOURCE_DIR}/pcre2/src)
That's a bit strange. I'd expect only .../pcre2/src paths, not .../pcre2. you need pcre2.h and pcre2posix.h, they're both in src/ right?
+ SET(PCRE_BUILD_TESTS OFF CACHE BOOL "Disable tests.") + SET(PCRE2_BUILD_PCRE2GREP OFF CACHE BOOL "Disable pcre2grep") + ADD_SUBDIRECTORY(pcre2) ENDIF() ENDMACRO()
diff --git a/extra/mariabackup/CMakeLists.txt b/extra/mariabackup/CMakeLists.txt index 7df5fa17903..a9d7153b893 100644 --- a/extra/mariabackup/CMakeLists.txt +++ b/extra/mariabackup/CMakeLists.txt @@ -37,7 +37,7 @@ INCLUDE_DIRECTORIES( )
IF(NOT HAVE_SYSTEM_REGEX) - INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/pcre) + INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/pcre2 ${CMAKE_SOURCE_DIR}/pcre2/src)
Hmm, not INCLUDE_DIRECTORIES(${PCRE_INCLUDES}) ? pcre2.h is in the ${CMAKE_BINARY_DIR}, it's generated.
ENDIF()
IF(WITH_WSREP) diff --git a/mysql-test/main/func_regexp_pcre.test b/mysql-test/main/func_regexp_pcre.test index 21600390bb2..30969b3e9ae 100644 --- a/mysql-test/main/func_regexp_pcre.test +++ b/mysql-test/main/func_regexp_pcre.test @@ -382,6 +382,7 @@ SELECT 'AB' RLIKE 'A B'; SELECT 'AB' RLIKE 'A# this is a comment\nB'; SET default_regex_flags=DEFAULT;
+--error ER_REGEXP_ERROR SELECT 'Aq' RLIKE 'A\\q';
you forgot to update func_regexp_pcre.result
SET default_regex_flags='EXTRA'; --error ER_REGEXP_ERROR diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 76f4788c1cf..450a9c54176 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5485,124 +5486,44 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error) */ void Regexp_processor_pcre::pcre_exec_warn(int rc) const { - char buf[64]; - const char *errmsg= NULL; + PCRE2_UCHAR8 buf[128]; THD *thd= current_thd;
- /* - Make a descriptive message only for those pcre_exec() error codes - that can actually happen in MariaDB. - */ - switch (rc) + int errlen= pcre2_get_error_message(rc, buf, sizeof(buf)); + if (errlen <= 0) { - case PCRE_ERROR_NULL: - errmsg= "pcre_exec: null argument passed"; - break; - case PCRE_ERROR_BADOPTION: - errmsg= "pcre_exec: bad option"; - break; - case PCRE_ERROR_BADMAGIC: - errmsg= "pcre_exec: bad magic - not a compiled regex"; - break; - case PCRE_ERROR_UNKNOWN_OPCODE: - errmsg= "pcre_exec: error in compiled regex"; - break; - case PCRE_ERROR_NOMEMORY: - errmsg= "pcre_exec: Out of memory"; - break; - case PCRE_ERROR_NOSUBSTRING: - errmsg= "pcre_exec: no substring"; - break; - case PCRE_ERROR_MATCHLIMIT: - errmsg= "pcre_exec: match limit exceeded"; - break; - case PCRE_ERROR_CALLOUT: - errmsg= "pcre_exec: callout error"; - break; - case PCRE_ERROR_BADUTF8: - errmsg= "pcre_exec: Invalid utf8 byte sequence in the subject string"; - break; - case PCRE_ERROR_BADUTF8_OFFSET: - errmsg= "pcre_exec: Started at invalid location within utf8 byte sequence"; - break; - case PCRE_ERROR_PARTIAL: - errmsg= "pcre_exec: partial match"; - break; - case PCRE_ERROR_INTERNAL: - errmsg= "pcre_exec: internal error"; - break; - case PCRE_ERROR_BADCOUNT: - errmsg= "pcre_exec: ovesize is negative"; - break; - case PCRE_ERROR_RECURSIONLIMIT: - my_snprintf(buf, sizeof(buf), "pcre_exec: recursion limit of %ld exceeded", - m_pcre_extra.match_limit_recursion); - errmsg= buf; - break; - case PCRE_ERROR_BADNEWLINE: - errmsg= "pcre_exec: bad newline options"; - break; - case PCRE_ERROR_BADOFFSET: - errmsg= "pcre_exec: start offset negative or greater than string length"; - break; - case PCRE_ERROR_SHORTUTF8: - errmsg= "pcre_exec: ended in middle of utf8 sequence"; - break; - case PCRE_ERROR_JIT_STACKLIMIT: - errmsg= "pcre_exec: insufficient stack memory for JIT compile"; - break; - case PCRE_ERROR_RECURSELOOP: - errmsg= "pcre_exec: Recursion loop detected"; - break; - case PCRE_ERROR_BADMODE: - errmsg= "pcre_exec: compiled pattern passed to wrong bit library function"; - break; - case PCRE_ERROR_BADENDIANNESS: - errmsg= "pcre_exec: compiled pattern passed to wrong endianness processor"; - break; - case PCRE_ERROR_JIT_BADOPTION: - errmsg= "pcre_exec: bad jit option"; - break; - case PCRE_ERROR_BADLENGTH: - errmsg= "pcre_exec: negative length"; - break; - default: - /* - As other error codes should normally not happen, - we just report the error code without textual description - of the code. - */ - my_snprintf(buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc); - errmsg= buf; + my_snprintf((char *)buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc); } push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), errmsg); + ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), buf); }
/** Call pcre_exec() and send a warning if pcre_exec() returned with an error. */ -int Regexp_processor_pcre::pcre_exec_with_warn(const pcre *code, - const pcre_extra *extra, +int Regexp_processor_pcre::pcre_exec_with_warn(const pcre2_code *code, + pcre2_match_data *data, const char *subject, int length, int startoffset, - int options, int *ovector, - int ovecsize) + uint options) { - int rc= pcre_exec(code, extra, subject, length, - startoffset, options, ovector, ovecsize); + int rc= pcre2_match(code, (PCRE2_SPTR8) subject, (PCRE2_SIZE) length, + (PCRE2_SIZE) startoffset, options, data, NULL); DBUG_EXECUTE_IF("pcre_exec_error_123", rc= -123;); - if (unlikely(rc < PCRE_ERROR_NOMATCH)) - pcre_exec_warn(rc); + if (unlikely(rc < PCRE2_ERROR_NOMATCH)) + m_SubStrVec= NULL; + else + m_SubStrVec= pcre2_get_ovector_pointer(data); +
The function name is still pcre_exec_with_warn, the comment still says "Call pcre_exec() and send a warning". But neither seems to be happening here. where do you do warnings now?
return rc; }
bool Regexp_processor_pcre::exec(const char *str, size_t length, size_t offset) { - m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra, str, (int)length, (int)offset, 0, - m_SubStrVec, array_elements(m_SubStrVec)); + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data, + str, (int)length, (int)offset, 0); return false; }
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index ccb7d3b29ed..e8abae3487d 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -5692,19 +5692,21 @@ static const char *default_regex_flags_names[]= "DOTALL", // (?s) . matches anything including NL "DUPNAMES", // (?J) Allow duplicate names for subpatterns "EXTENDED", // (?x) Ignore white space and # comments - "EXTRA", // (?X) extra features (e.g. error on unknown escape character) + "EXTENDED_MORE",//(?xx) Ignore white space and # comments inside cheracter
perhaps, update all flag descriptions from man pcre2syntax? Like "EXTENDED", // (?x) extended: ignore white space except in classes "EXTENDED_MORE",//(?xx) as (?x) but also ignore space and tab in classes also, I don't quite like EXTENDED_MORE name. But I don't have a good suggestion. May be just re-purpose EXTENDED to do (?xx) without introducing a new flag?
+ "EXTRA", // means nothing since PCRE2
may be a warning when it's used?
"MULTILINE", // (?m) ^ and $ match newlines within data "UNGREEDY", // (?U) Invert greediness of quantifiers 0 }; static const int default_regex_flags_to_pcre[]= { - PCRE_DOTALL, - PCRE_DUPNAMES, - PCRE_EXTENDED, - PCRE_EXTRA, - PCRE_MULTILINE, - PCRE_UNGREEDY, + PCRE2_DOTALL, + PCRE2_DUPNAMES, + PCRE2_EXTENDED, + PCRE2_EXTENDED_MORE, + 0, /* EXTRA flag not available since PCRE2 */ + PCRE2_MULTILINE, + PCRE2_UNGREEDY, 0 }; int default_regex_flags_pcre(const THD *thd) diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake index 2f04a33558a..51257febc0d 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake @@ -1,6 +1,8 @@ ## feature detection find_package(Threads) -find_package(ZLIB REQUIRED) +IF(WITH_EXTERNAL_ZLIB) + find_package(ZLIB REQUIRED) +ENDIF()
push it in separate commit, please.
option(USE_VALGRIND "Build to run safely under valgrind (often slower)." ON) if(USE_VALGRIND)
Regards, Sergei Chief Architect MariaDB and security@mariadb.org
participants (1)
-
Sergei Golubchik