Re: 9d6de9212d5: MDEV-29095 REGEXP_REPLACE treats empty strings different than REPLACE in ORACLE mode
Hi, Alexander, On Jan 10, Alexander Barkov wrote:
revision-id: 9d6de9212d5 (mariadb-10.4.32-45-g9d6de9212d5) parent(s): bdfd93d30c1 author: Alexander Barkov committer: Alexander Barkov timestamp: 2023-11-24 13:23:56 +0400 message:
MDEV-29095 REGEXP_REPLACE treats empty strings different than REPLACE in ORACLE mode
Turning REGEXP_REPLACE into two schema-qualified functions: - mariadb_schema.regexp_replace() - oracle_schema.regexp_replace()
Fixing oracle_schema.regexp_replace(subj,pattern,replacement) to treat NULL in "replacement" as an empty string.
Adding new classes implementing oracle_schema.regexp_replace(): - Item_func_regexp_replace_oracle - Create_func_regexp_replace_oracle
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 92d5e196da4..b688ba97d3e 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1385,20 +1385,22 @@ bool Item_func_regexp_replace::append_replacement(String *str, }
-String *Item_func_regexp_replace::val_str(String *str) +String *Item_func_regexp_replace::val_str_internal(String *str, + bool null_to_empty) { DBUG_ASSERT(fixed == 1); char buff0[MAX_FIELD_WIDTH]; char buff2[MAX_FIELD_WIDTH]; String tmp0(buff0,sizeof(buff0),&my_charset_bin); String tmp2(buff2,sizeof(buff2),&my_charset_bin); - String *source= args[0]->val_str(&tmp0); - String *replace= args[2]->val_str(&tmp2); + String *source, *replace; LEX_CSTRING src, rpl; int startoffset= 0;
- if ((null_value= (args[0]->null_value || args[2]->null_value || - re.recompile(args[1])))) + if ((null_value= + (!(source= args[0]->val_str(&tmp0)) || + !(replace= args[2]->val_str_null_to_empty(&tmp2, null_to_empty)) ||
why are you fixing it differently from Item_func_replace? If you think this approach is better then, please, change Item_func_replace to use it too, in a followup commit.
+ re.recompile(args[1])))) return (String *) 0;
if (!(source= re.convert_if_needed(source, &re.subject_converter)) || diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 826c7f784af..e014cc5fcbc 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -412,11 +420,34 @@ class Item_func_regexp_replace :public Item_str_func re.cleanup(); DBUG_VOID_RETURN; } - String *val_str(String *str); + String *val_str(String *str) + { + return val_str_internal(str, false); + } bool fix_fields(THD *thd, Item **ref); bool fix_length_and_dec(); const char *func_name() const { return "regexp_replace"; } - Item *get_copy(THD *thd) { return 0;} + Item *get_copy(THD *thd) { return 0;} // QQ: why?
you could've checked the history in git. it was commit 343bcb152f8 Author: Igor Babaev <igor@askmonty.org> Date: Sun Nov 5 22:52:41 2017 -0800 Fixed mdev-14237 Server crash on query with regexp_substr It's better to prohibit pushdown of conditions that involve regexp_substr() and regexp_replace() into materialized derived tables / views until proper implementations of the get_copy() virtual method are provided for those functions. and I suspect the reason is Regexp_processor_pcre inside regexp Items.
+}; + + +class Item_func_regexp_replace_oracle: public Item_func_regexp_replace +{ +public: + Item_func_regexp_replace_oracle(THD *thd, Item *a, Item *b, Item *c) + :Item_func_regexp_replace(thd, a, b, c) + {} + const Schema *schema() const { return &oracle_schema_ref; } + bool fix_length_and_dec() + { + bool rc= Item_func_regexp_replace::fix_length_and_dec(); + maybe_null= true; // Empty result is converted to NULL + return rc; + } + String *val_str(String *str) + { + return val_str_internal(str, true); + } };
Regards, Sergei Chief Architect, MariaDB Server and security@mariadb.org
Hello Sergei, Please find a new patch here: https://github.com/MariaDB/server/commit/94fff9671daff70137bc806723363e9b507... On 1/10/24 17:08, Sergei Golubchik wrote:
Hi, Alexander,
On Jan 10, Alexander Barkov wrote:
revision-id: 9d6de9212d5 (mariadb-10.4.32-45-g9d6de9212d5) parent(s): bdfd93d30c1 author: Alexander Barkov committer: Alexander Barkov timestamp: 2023-11-24 13:23:56 +0400 message:
MDEV-29095 REGEXP_REPLACE treats empty strings different than REPLACE in ORACLE mode
Turning REGEXP_REPLACE into two schema-qualified functions: - mariadb_schema.regexp_replace() - oracle_schema.regexp_replace()
Fixing oracle_schema.regexp_replace(subj,pattern,replacement) to treat NULL in "replacement" as an empty string.
Adding new classes implementing oracle_schema.regexp_replace(): - Item_func_regexp_replace_oracle - Create_func_regexp_replace_oracle
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 92d5e196da4..b688ba97d3e 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1385,20 +1385,22 @@ bool Item_func_regexp_replace::append_replacement(String *str, }
-String *Item_func_regexp_replace::val_str(String *str) +String *Item_func_regexp_replace::val_str_internal(String *str, + bool null_to_empty) { DBUG_ASSERT(fixed == 1); char buff0[MAX_FIELD_WIDTH]; char buff2[MAX_FIELD_WIDTH]; String tmp0(buff0,sizeof(buff0),&my_charset_bin); String tmp2(buff2,sizeof(buff2),&my_charset_bin); - String *source= args[0]->val_str(&tmp0); - String *replace= args[2]->val_str(&tmp2); + String *source, *replace; LEX_CSTRING src, rpl; int startoffset= 0;
- if ((null_value= (args[0]->null_value || args[2]->null_value || - re.recompile(args[1])))) + if ((null_value= + (!(source= args[0]->val_str(&tmp0)) || + !(replace= args[2]->val_str_null_to_empty(&tmp2, null_to_empty)) ||
why are you fixing it differently from Item_func_replace? If you think this approach is better then, please, change Item_func_replace to use it too, in a followup commit.
Thanks for a good idea to reuse the new method val_str_null_to_empty() in Item_func_replace. I made it in the same commit, it's suites very well.
+ re.recompile(args[1])))) return (String *) 0;
if (!(source= re.convert_if_needed(source, &re.subject_converter)) || diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index 826c7f784af..e014cc5fcbc 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -412,11 +420,34 @@ class Item_func_regexp_replace :public Item_str_func re.cleanup(); DBUG_VOID_RETURN; } - String *val_str(String *str); + String *val_str(String *str) + { + return val_str_internal(str, false); + } bool fix_fields(THD *thd, Item **ref); bool fix_length_and_dec(); const char *func_name() const { return "regexp_replace"; } - Item *get_copy(THD *thd) { return 0;} + Item *get_copy(THD *thd) { return 0;} // QQ: why?
you could've checked the history in git. it was
commit 343bcb152f8 Author: Igor Babaev <igor@askmonty.org> Date: Sun Nov 5 22:52:41 2017 -0800
Fixed mdev-14237 Server crash on query with regexp_substr
It's better to prohibit pushdown of conditions that involve regexp_substr() and regexp_replace() into materialized derived tables / views until proper implementations of the get_copy() virtual method are provided for those functions.
and I suspect the reason is Regexp_processor_pcre inside regexp Items.
Thanks. I removed the "QQ" comment.
+}; + + +class Item_func_regexp_replace_oracle: public Item_func_regexp_replace +{ +public: + Item_func_regexp_replace_oracle(THD *thd, Item *a, Item *b, Item *c) + :Item_func_regexp_replace(thd, a, b, c) + {} + const Schema *schema() const { return &oracle_schema_ref; } + bool fix_length_and_dec() + { + bool rc= Item_func_regexp_replace::fix_length_and_dec(); + maybe_null= true; // Empty result is converted to NULL + return rc; + } + String *val_str(String *str) + { + return val_str_internal(str, true); + } };
Regards, Sergei Chief Architect, MariaDB Server and security@mariadb.org
participants (2)
-
Alexander Barkov
-
Sergei Golubchik