Hi, Yuchen,
Thanks!
There are two kinds of comments below, you'll see which is which.
I realize that this patch mainly just moves parts of spider code around.
Some of my comments apply to the new code that didn't exist before and
appeared only in this patch. It's what needs to be fixed (or discussed,
if you disagree) before pushing.
Other comments apply to the old spider code that this patch moved. If
you'd like you can say "I'm just moving spider code around" and ignore
all comments of that kind.
/Sergei
On Jan 22, Yuchen Pei wrote:
> commit dcd69cbd19a
> Author: Yuchen Pei <yuchen.pei(a)mariadb.com>
> Date: Tue Jan 3 16:24:04 2023 +1100
>
> MDEV-29447 MDEV-26285 Refactor spider_db_mbase_util::open_item_func
>
> Porting commit 3836098c29ef1b7ff9d5fbde99b690eab73a0df1 (MDEV-26285)
Because 3836098c29ef will not be, apparently, pushed anywhere,
you'd better put a self-containing comment here. May be use the comment
of 3836098c29ef?
Note that you shouldn't push your 10.4-11.0 versions of the patch directly,
only push the 10.3 one (when ok) and let it propagate to 11.0 through
merges. Your 10.4-11.0 patches are very useful, though, they will serve as
guidelines for whoever will merge this change up.
> to current versions 10.3+ to fix a problem (MDEV-29447) where field
> items that are arguments of a func item may be used before created /
> initialised.
>
> Signed-off-by: Yuchen Pei <yuchen.pei(a)mariadb.com>
>
> diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc
> index e942d1d9063..a886a4e6e99 100644
> --- a/storage/spider/spd_db_mysql.cc
> +++ b/storage/spider/spd_db_mysql.cc
> @@ -4035,6 +4042,170 @@ int spider_db_mbase_util::open_item_func(
> uint alias_length,
> bool use_fields,
> spider_fields *fields
> +) {
> + DBUG_ENTER("spider_db_mbase_util::check_item_func");
incorrect function name in DBUG_ENTER
> +
> + int error = check_item_func(item_func, spider, alias,
> + alias_length, use_fields, fields);
> + if (error)
> + DBUG_RETURN(error);
> + if (!str)
> + DBUG_RETURN(0);
> +
> + DBUG_RETURN(print_item_func(item_func, spider, str, alias,
> + alias_length, use_fields, fields));
> +}
> +
> +namespace {
> + bool item_func_is_timestampdiff(
We generally use static in these cases.
Why would an anonymous namespace be better?
> + const char *func_name,
> + int func_name_length
> + ) {
> + return func_name_length == 13 &&
> + !strncasecmp("timestampdiff", func_name, func_name_length);
We build with rtti nowadays, so you can use dynamic_cast instead.
and you won't need func_name and func_name_length.
Ideally you won't need a check for timestampdiff at all
> + }
> +
> + bool not_func_should_be_skipped(
> + Item_func *item_func
> + ){
> + DBUG_ENTER("not_func_should_be_skipped");
add DBUG_ASSERT(item_func->functype() == Item_func::NOT_FUNC);
> + Item **item_list = item_func->arguments();
> +
> + if (item_list[0]->type() == Item::COND_ITEM)
> + {
> + DBUG_PRINT("info",("spider item_list[0] is COND_ITEM"));
> + Item_cond *item_cond = (Item_cond *) item_list[0];
> + if (item_cond->functype() == Item_func::COND_AND_FUNC)
> + {
> + DBUG_PRINT("info",("spider item_cond is COND_AND_FUNC"));
> + List_iterator_fast<Item> lif(*(item_cond->argument_list()));
> + bool has_expr_cache_item = FALSE;
> + bool has_isnotnull_func = FALSE;
> + bool has_other_item = FALSE;
> + while(Item *item = lif++)
> + {
> + if (
> + item->type() == Item::EXPR_CACHE_ITEM
> + ) {
> + DBUG_PRINT("info",("spider EXPR_CACHE_ITEM"));
> + has_expr_cache_item = TRUE;
> + } else
> + if (
> + item->type() == Item::FUNC_ITEM &&
> + ((Item_func *) item)->functype() == Item_func::ISNOTNULL_FUNC
> + ) {
> + DBUG_PRINT("info",("spider ISNOTNULL_FUNC"));
> + has_isnotnull_func = TRUE;
> + } else {
> + DBUG_PRINT("info",("spider has other item"));
> + DBUG_PRINT("info",("spider COND type=%d", item->type()));
> + has_other_item = TRUE;
> + }
> + }
> + if (has_expr_cache_item && has_isnotnull_func && !has_other_item)
So, "not func should be skipped" if it's
NOT (expr1 AND expr2 AND ...)
and all expr's are either IS NOT NULL or a cached subquery. Why?
IS NULL, IS NOT FALSE, non-cached subqueries, everything else is fine,
IS NOT NULL if it is followed by `AND 2>1` is fine, OR is fine.
Doesn't make much sense to me.
> + {
> + DBUG_PRINT("info",("spider NOT EXISTS skip"));
> + DBUG_RETURN(TRUE);
> + }
> + }
> + }
> + DBUG_RETURN(FALSE);
> + }
> +}
> +
> +/**
> + Check if the given item_func and its arguments can be pushed down to
> + a data node. This function is recursive because we need to also check
> + the arguments of the item_func.
> +
> + @return 0: success, otherwise: error
> + */
> +int spider_db_mbase_util::check_item_func(
> + Item_func *item_func,
> + ha_spider *spider,
> + const char *alias,
> + uint alias_length,
> + bool use_fields,
> + spider_fields *fields
> +) {
> + DBUG_ENTER("spider_db_mbase_util::check_item_func");
> +
> + Item_func::Functype func_type = item_func->functype();
> + DBUG_PRINT("info",("spider functype = %d", func_type));
> +
> + const char *func_name = (char*) item_func->func_name();
> + int func_name_length = strlen(func_name);
> + DBUG_PRINT("info",("spider func_name = %s", func_name));
> +
> + /* The blacklist of the functions that cannot be pushed down */
> + if (
> + func_type == Item_func::TRIG_COND_FUNC ||
> + func_type == Item_func::CASE_SEARCHED_FUNC ||
> + func_type == Item_func::CASE_SIMPLE_FUNC
Does CASE need to be skipped at all?
> + ) {
> + DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM);
> + }
> +
> + if (func_type == Item_func::NOT_FUNC)
> + {
> + /* Why the following check is necessary? */
indeed
> + if(not_func_should_be_skipped(item_func))
> + DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM);
> + }
> +
> + if (func_type == Item_func::UDF_FUNC)
old code used switch here. I think it looks more natural than a sequence
of if's. A compiler might optimize them internally into a switch,
but I'd better do it explicitly.
> + {
> + int use_pushdown_udf = spider_param_use_pushdown_udf(
> + spider->trx->thd, spider->share->use_pushdown_udf);
> + if (!use_pushdown_udf) {
> + DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM);
> + }
> + }
> +
> + if (func_type == Item_func::FT_FUNC) {
> + if (spider_db_check_ft_idx(item_func, spider) == MAX_KEY)
> + DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM);
> + }
> +
> + if (func_type == Item_func::UNKNOWN_FUNC)
> + {
> + if (item_func_is_timestampdiff(func_name, func_name_length)) {
it doesn't seem that it needs to be skipped at all
> + DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM);
> + }
> + }
> + /* End of the blacklist */
> +
> + /* Check the arguments recursively */
> + if (uint item_count = item_func->argument_count())
> + {
> + Item **item_list = item_func->arguments();
> + for (uint roop_count = 0; roop_count < item_count; roop_count++)
> + {
> + Item *item = item_list[roop_count];
> + if (int error_num = spider_db_print_item_type(item, NULL, spider, NULL,
> + alias, alias_length, dbton_id, use_fields, fields))
it'd be more logical to invoke check_item_func() here instead of
spider_db_print_item_type with str==NULL
> + DBUG_RETURN(error_num);
> + }
> + }
> +
> + DBUG_RETURN(0);
> +}
> +
> +/**
> + The function print the string corresponding to the given item_func to str.
> + The function is assumed to be called only when the check by the function
> + check_item_func() has passed.
> +
> + @return 0: success, otherwise: error
> + */
> +int spider_db_mbase_util::print_item_func(
> + Item_func *item_func,
> + ha_spider *spider,
> + spider_string *str,
> + const char *alias,
> + uint alias_length,
> + bool use_fields,
> + spider_fields *fields
> ) {
> int error_num;
> Item *item, **item_list = item_func->arguments();
> @@ -4049,13 +4220,14 @@ int spider_db_mbase_util::open_item_func(
> last_str_length = SPIDER_SQL_NULL_CHAR_LEN;
> int use_pushdown_udf;
> bool merge_func = FALSE;
> - DBUG_ENTER("spider_db_mbase_util::open_item_func");
> - if (str)
> - {
> + DBUG_ENTER("spider_db_mbase_util::print_item_func");
> + DBUG_ASSERT(!check_item_func(item_func, spider, alias, alias_length,
> + use_fields, fields) && str);
Make it two asserts please. One for !check_item_func(...)
and the other DBUG_ASSERT(str);
Generally, always try to split assert into as many separate asserts as
possible, don't do assert(A && B); So that when it fails it'd provide as much
information as possible about what exactly went wrong.
> +
> if (str->reserve(SPIDER_SQL_OPEN_PAREN_LEN))
> DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN);
> - }
> +
> DBUG_PRINT("info",("spider functype = %d", item_func->functype()));
> switch (item_func->functype())
> {
> @@ -4374,94 +4523,12 @@ int spider_db_mbase_util::open_item_func(
> {
> if (!strncasecmp("utc_timestamp", func_name, func_name_length))
pretty much all that code can use dynamic_cast nowadays
> {
> - if (str)
> str->length(str->length() - SPIDER_SQL_OPEN_PAREN_LEN);
> DBUG_RETURN(spider_db_open_item_string(item_func, NULL, spider, str,
> alias, alias_length, dbton_id, use_fields, fields));
> } else if (!strncasecmp("timestampdiff", func_name, func_name_length))
> {
> -#ifdef ITEM_FUNC_TIMESTAMPDIFF_ARE_PUBLIC
but it _is_ public, so you can push down Item_func_timestamp_diff all right
> - Item_func_timestamp_diff *item_func_timestamp_diff =
> - (Item_func_timestamp_diff *) item_func;
> - if (str)
> - {
> - const char *interval_str;
> - uint interval_len;
> - switch (item_func_timestamp_diff->int_type)
> - {
> - case INTERVAL_YEAR:
> - interval_str = SPIDER_SQL_YEAR_STR;
> - interval_len = SPIDER_SQL_YEAR_LEN;
> - break;
> - case INTERVAL_QUARTER:
> - interval_str = SPIDER_SQL_QUARTER_STR;
> - interval_len = SPIDER_SQL_QUARTER_LEN;
> - break;
> - case INTERVAL_MONTH:
> - interval_str = SPIDER_SQL_MONTH_STR;
> - interval_len = SPIDER_SQL_MONTH_LEN;
> - break;
> - case INTERVAL_WEEK:
> - interval_str = SPIDER_SQL_WEEK_STR;
> - interval_len = SPIDER_SQL_WEEK_LEN;
> - break;
> - case INTERVAL_DAY:
> - interval_str = SPIDER_SQL_DAY_STR;
> - interval_len = SPIDER_SQL_DAY_LEN;
> - break;
> - case INTERVAL_HOUR:
> - interval_str = SPIDER_SQL_HOUR_STR;
> - interval_len = SPIDER_SQL_HOUR_LEN;
> - break;
> - case INTERVAL_MINUTE:
> - interval_str = SPIDER_SQL_MINUTE_STR;
> - interval_len = SPIDER_SQL_MINUTE_LEN;
> - break;
> - case INTERVAL_SECOND:
> - interval_str = SPIDER_SQL_SECOND_STR;
> - interval_len = SPIDER_SQL_SECOND_LEN;
> - break;
> - case INTERVAL_MICROSECOND:
> - interval_str = SPIDER_SQL_MICROSECOND_STR;
> - interval_len = SPIDER_SQL_MICROSECOND_LEN;
> - break;
> - default:
> - interval_str = "";
> - interval_len = 0;
> - break;
> - }
> - str->length(str->length() - SPIDER_SQL_OPEN_PAREN_LEN);
> - if (str->reserve(func_name_length + SPIDER_SQL_OPEN_PAREN_LEN +
> - interval_len + SPIDER_SQL_COMMA_LEN))
> - DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> - str->q_append(func_name, func_name_length);
> - str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN);
> - str->q_append(interval_str, interval_len);
> - str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN);
> - }
> - if ((error_num = spider_db_print_item_type(item_list[0], NULL, spider,
> - str, alias, alias_length, dbton_id, use_fields, fields)))
> - DBUG_RETURN(error_num);
> - if (str)
> - {
> - if (str->reserve(SPIDER_SQL_COMMA_LEN))
> - DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> - str->q_append(SPIDER_SQL_COMMA_STR, SPIDER_SQL_COMMA_LEN);
> - }
> - if ((error_num = spider_db_print_item_type(item_list[1], NULL, spider,
> - str, alias, alias_length, dbton_id, use_fields, fields)))
> - DBUG_RETURN(error_num);
> - if (str)
> - {
> - if (str->reserve(SPIDER_SQL_CLOSE_PAREN_LEN))
> - DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> - str->q_append(SPIDER_SQL_CLOSE_PAREN_STR,
> - SPIDER_SQL_CLOSE_PAREN_LEN);
> - }
> - DBUG_RETURN(0);
> -#else
> DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM);
> -#endif
> }
> } else if (func_name_length == 14)
> {
> @@ -5038,84 +4996,17 @@ int spider_db_mbase_util::open_item_func(
> func_name = (char*)item_func->func_name();
> func_name_length = strlen(func_name);
> }
> - }
> break;
> case Item_func::CASE_SEARCHED_FUNC:
> case Item_func::CASE_SIMPLE_FUNC:
> -#ifdef ITEM_FUNC_CASE_PARAMS_ARE_PUBLIC
Item_func_case looks somewhat different today, but I suspect
it can be pushed too
> - Item_func_case *item_func_case = (Item_func_case *) item_func;
> - if (str)
> - {
> - if (str->reserve(SPIDER_SQL_CASE_LEN))
> - DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> - str->q_append(SPIDER_SQL_CASE_STR, SPIDER_SQL_CASE_LEN);
> - }
> - if (item_func_case->first_expr_num != -1)
> - {
> - if ((error_num = spider_db_print_item_type(
> - item_list[item_func_case->first_expr_num], NULL, spider, str,
> - alias, alias_length, dbton_id, use_fields, fields)))
> - DBUG_RETURN(error_num);
> - }
> - for (roop_count = 0; roop_count < item_func_case->ncases;
> - roop_count += 2)
> - {
> - if (str)
> - {
> - if (str->reserve(SPIDER_SQL_WHEN_LEN))
> - DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> - str->q_append(SPIDER_SQL_WHEN_STR, SPIDER_SQL_WHEN_LEN);
> - }
> - if ((error_num = spider_db_print_item_type(
> - item_list[roop_count], NULL, spider, str,
> - alias, alias_length, dbton_id, use_fields, fields)))
> - DBUG_RETURN(error_num);
> - if (str)
> - {
> - if (str->reserve(SPIDER_SQL_THEN_LEN))
> - DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> - str->q_append(SPIDER_SQL_THEN_STR, SPIDER_SQL_THEN_LEN);
> - }
> - if ((error_num = spider_db_print_item_type(
> - item_list[roop_count + 1], NULL, spider, str,
> - alias, alias_length, dbton_id, use_fields, fields)))
> - DBUG_RETURN(error_num);
> - }
> - if (item_func_case->else_expr_num != -1)
> - {
> - if (str)
> - {
> - if (str->reserve(SPIDER_SQL_ELSE_LEN))
> - DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> - str->q_append(SPIDER_SQL_ELSE_STR, SPIDER_SQL_ELSE_LEN);
> - }
> - if ((error_num = spider_db_print_item_type(
> - item_list[item_func_case->else_expr_num], NULL, spider, str,
> - alias, alias_length, dbton_id, use_fields, fields)))
> - DBUG_RETURN(error_num);
> - }
> - if (str)
> - {
> - if (str->reserve(SPIDER_SQL_END_LEN + SPIDER_SQL_CLOSE_PAREN_LEN))
> - DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> - str->q_append(SPIDER_SQL_END_STR, SPIDER_SQL_END_LEN);
> - str->q_append(SPIDER_SQL_CLOSE_PAREN_STR,
> - SPIDER_SQL_CLOSE_PAREN_LEN);
> - }
> - DBUG_RETURN(0);
> -#else
> DBUG_RETURN(ER_SPIDER_COND_SKIP_NUM);
> -#endif
> case Item_func::JSON_EXTRACT_FUNC:
> func_name = (char*) item_func->func_name();
> func_name_length = strlen(func_name);
> - if (str)
> - {
> if (str->reserve(func_name_length + SPIDER_SQL_OPEN_PAREN_LEN))
> DBUG_RETURN(HA_ERR_OUT_OF_MEM);
> str->q_append(func_name, func_name_length);
> str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN);
> - }
> func_name = SPIDER_SQL_COMMA_STR;
> func_name_length = SPIDER_SQL_COMMA_LEN;
> separator_str = SPIDER_SQL_COMMA_STR;
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org