Hi Varun!
Here is the in-detail review for the percentile functions. Please add a
grammar definition for median too and test it. All in all, not many
changes, but please address my comments. If you disagree with some of them,
let's discuss them.
diff --git a/mysql-test/r/win_percentile_cont.result
b/mysql-test/r/win_percentile_cont.result
new file mode 100644
index 00000000000..61f70892887
--- /dev/null
+++ b/mysql-test/r/win_percentile_cont.result
diff --git a/mysql-test/t/win_percentile_cont.test
b/mysql-test/t/win_percentile_cont.test
new file mode 100644
index 00000000000..75fde963b2a
--- /dev/null
+++ b/mysql-test/t/win_percentile_cont.test
@@ -0,0 +1,55 @@
Please put more tests here, as we discussed.
Also, to make test cases easier to follow, please add cume_dist as a
function
in the select to all these use cases.
+CREATE TABLE student (name CHAR(10), test double, score DECIMAL(19,4));
+INSERT INTO student VALUES
+('Chun', 0, 3), ('Chun', 0, 7),
+('Kaolin', 0.5, 3), ('Kaolin', 0.6, 7),
+('Kaolin', 0.5, 4),
+('Tatiana', 0.8, 4), ('Tata', 0.8, 4);
+
Make this --echo # No partition clause
+#no partition clause
+select name, percentile_disc(0.5) within group(order by score) over ()
from student;
+select name, percentile_cont(0.5) within group(order by score) over ()
from student;
+
Same as before, use --echo # so we can see it in the result set too.
+# argument set to null
+select name, percentile_cont(null) within group(order by score) over
(partition by name) from student;
+select name, percentile_disc(null) within group(order by score) over
(partition by name) from student;
+
Use --echo #
+# complete query with partition column
+select name, percentile_cont(0.5) within group(order by score) over
(partition by name) as c from student;
+select name, percentile_disc(0.5) within group(order by score) over
(partition by name) as c from student;
+
Use --echo #
+#subqueries having percentile functions
+
+select * from ( select name , percentile_cont(0.5) within group ( order
by score) over (partition by name ) from student ) as t;
+select * from ( select name , percentile_disc(0.5) within group ( order
by score) over (partition by name ) from student ) as t;
+select name from student a where (select percentile_disc(0.5) within
group (order by score) over (partition by name) from student b limit 1) >=
0.5;
+
USe --echo
+# WITH STORED PROCEDURES
+
+
USe --echo
+#DISALLOWED FIELDS IN ORDER BY CLAUSE
+--error ER_WRONG_TYPE_FOR_PERCENTILE_CONT
+select score, percentile_cont(0.5) within group(order by name) over
(partition by score) from student;
+select score, percentile_disc(0.5) within group(order by name) over
(partition by score) from student;
+
+# error with 2 order by elements
+
+--error ER_NOT_SINGLE_ELEMENT_ORDER_LIST
+select percentile_disc(0.5) within group(order by score,test) over
(partition by name) from student;
+--error ER_NOT_SINGLE_ELEMENT_ORDER_LIST
+select percentile_cont(0.5) within group(order by score,test) over
(partition by name) from student;
+
Use --echo and fix that comment. We thought of a way for this right?
+#parameter value should be in the range of 0 to 1( NEED TO THINK A WAY
FOR THIS)
+--error ER_ARGUMENT_OUT_OF_RANGE
+select percentile_disc(1.5) within group(order by score) over (partition
by name) from student;
+--error ER_ARGUMENT_OUT_OF_RANGE
+select percentile_cont(1.5) within group(order by score) over (partition
by name) from student;
+
+--error ER_ARGUMENT_NOT_CONSTANT
+select name,percentile_cont(test) within group(order by score) over
(partition by name) from student;
+--error ER_ARGUMENT_NOT_CONSTANT
+select name, percentile_disc(test) within group(order by score) over
(partition by name) from student;
+
This is commented out. Not good, fix it please. And use --echo #
+#CHECK TYPE OF THE ARGUMENT, SHOULD BE ONLY NUMERICAL
+#select name, percentile_cont(name) within group(order by score) over
(partition by name) from student;
+
+drop table student;
diff --git a/sql/item.h b/sql/item.h
index 76ce4aa935f..108b0838dfc 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -5245,6 +5246,7 @@ class Cached_item_int :public Cached_item_item
Cached_item_int(Item *item_par) :Cached_item_item(item_par),value(0) {}
bool cmp(void);
int cmp_read_only();
Delete this, it's not used.
+ longlong get_value(){ return value;}
};
@@ -5255,6 +5257,7 @@ class Cached_item_decimal :public Cached_item_item
Cached_item_decimal(Item *item_par);
bool cmp(void);
int cmp_read_only();
Delete this, it's not used.
+ my_decimal *get_value(){ return &value;};
};
class Cached_item_field :public Cached_item
diff --git a/sql/item_windowfunc.cc b/sql/item_windowfunc.cc
index 6ab903a81bb..916f4113682 100644
--- a/sql/item_windowfunc.cc
+++ b/sql/item_windowfunc.cc
diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h
index 77cbd556e60..ca2ed7136ec 100644
--- a/sql/item_windowfunc.h
+++ b/sql/item_windowfunc.h
@@ -678,6 +687,275 @@ class Item_sum_ntile : public
Item_sum_window_with_row_count
ulong current_row_count_;
};
+class Item_sum_percentile_disc : public Item_sum_cume_dist,
+ public Type_handler_hybrid_field_type
+{
+public:
+ Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_cume_dist(thd,
arg),
+ Type_handler_hybrid_field_type(&type_handler_longlong),
+ value(NULL), val_calculated(FALSE), first_call(TRUE),
+ prev_value(0), order_item(NULL){}
+
+ double val_real()
+ {
+ if (get_row_count() == 0 || get_arg(0)->is_null())
+ {
+ null_value= true;
+ return 0;
+ }
+ null_value= false;
+ return value->val_real();
+ }
+
+ longlong val_int()
+ {
+ if (get_row_count() == 0 || get_arg(0)->is_null())
+ {
+ null_value= true;
+ return 0;
+ }
+ null_value= false;
+ return value->val_int();
+ }
+
+ my_decimal* val_decimal(my_decimal* dec)
+ {
+ if (get_row_count() == 0 || get_arg(0)->is_null())
+ {
+ null_value= true;
+ return 0;
+ }
+ null_value= false;
+ return value->val_decimal(dec);
+ }
+
+ String* val_str(String *str)
+ {
+ if (get_row_count() == 0 || get_arg(0)->is_null())
+ {
+ null_value= true;
+ return 0;
+ }
+ null_value= false;
+ return value->val_str(str);
+ }
+
+ bool add()
+ {
An extra space here before =
+ Item *arg = get_arg(0);
+ if (arg->is_null())
+ return false;
+
+ if (first_call)
+ {
+ prev_value= arg->val_real();
We are missing a space after >.
+ if (prev_value >1 || prev_value < 0)
+ {
+ my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0));
+ return true;
+ }
+ first_call= false;
+ }
+
+ double arg_val= arg->val_real();
+
There is one extra space here.
+ if (prev_value != arg_val)
+ {
+ my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0));
+ return true;
+ }
+
+ if (val_calculated)
+ return false;
+
+ value->store(order_item);
+ value->cache_value();
+ if (value->null_value)
+ return false;
+
+ Item_sum_cume_dist::add();
+ double val= Item_sum_cume_dist::val_real();
+
+ if (val >= prev_value && !val_calculated)
+ val_calculated= true;
+ return false;
+ }
+
+ enum Sumfunctype sum_func() const
+ {
+ return PERCENTILE_DISC_FUNC;
+ }
+
+ void clear()
+ {
+ val_calculated= false;
+ first_call= true;
+ value->clear();
+ Item_sum_cume_dist::clear();
+ }
+
+ const char*func_name() const
+ {
+ return "percentile_disc";
+ }
+
+ void update_field() {}
+ void set_type_handler(Window_spec *window_spec);
+ const Type_handler *type_handler() const
+ {return Type_handler_hybrid_field_type::type_handler();}
+
+ void fix_length_and_dec()
+ {
+ decimals = 10; // TODO-cvicentiu find out how many decimals the standard
+ // requires.
+ }
+
+ Item *get_copy(THD *thd, MEM_ROOT *mem_root)
+ { return get_item_copy<Item_sum_percentile_disc>(thd, mem_root, this); }
+ void setup_window_func(THD *thd, Window_spec *window_spec);
+ void setup_hybrid(THD *thd, Item *item);
+
+private:
+ Item_cache *value;
+ bool val_calculated;
+ bool first_call;
+ double prev_value;
+ Item *order_item;
+};
+
+class Item_sum_percentile_cont : public Item_sum_cume_dist,
+ public Type_handler_hybrid_field_type
+{
+public:
+ Item_sum_percentile_cont(THD *thd, Item* arg) : Item_sum_cume_dist(thd,
arg),
+ Type_handler_hybrid_field_type(&type_handler_double),
+ floor_value(NULL), ceil_value(NULL), first_call(TRUE),prev_value(0),
+ ceil_val_calculated(FALSE), floor_val_calculated(FALSE),
order_item(NULL){}
+
+ double val_real()
+ {
+ if (get_row_count() == 0 || get_arg(0)->is_null())
+ {
+ null_value= true;
+ return 0;
+ }
+ null_value= false;
+ double val= 1 + prev_value * (get_row_count()-1);
+
+ /*
+ Applying the formula to get the value
+ If (CRN = FRN = RN) then the result is (value of expression from row at
RN)
+ Otherwise the result is
+ (CRN - RN) * (value of expression for row at FRN) +
+ (RN - FRN) * (value of expression for row at CRN)
+ */
+
+ if(ceil(val) == floor(val))
+ return floor_value->val_real();
+
+ double ret_val= ((val - floor(val)) * ceil_value->val_real()) +
+ ((ceil(val) - val) * floor_value->val_real());
+
+ return ret_val;
+ }
+
+ bool add()
+ {
+ Item *arg = get_arg(0);
+ if (arg->is_null())
+ return false;
+
+ if (first_call)
+ {
+ first_call= false;
+ prev_value= arg->val_real();
+ if (prev_value >1 || prev_value < 0)
+ {
+ my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0));
+ return true;
+ }
+ }
+
+ double arg_val= arg->val_real();
+ if (prev_value != arg_val)
+ {
+ my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0));
+ return true;
+ }
+
+ if (!floor_val_calculated)
+ {
+ floor_value->store(order_item);
+ floor_value->cache_value();
+ if (floor_value->null_value)
+ return false;
+ }
+ if (floor_val_calculated && !ceil_val_calculated)
+ {
+ ceil_value->store(order_item);
+ ceil_value->cache_value();
+ if (ceil_value->null_value)
+ return false;
+ }
+
+ Item_sum_cume_dist::add();
+ double val= 1 + prev_value * (get_row_count()-1);
+
+ if (!floor_val_calculated && get_row_number() == floor(val))
+ floor_val_calculated= true;
+
+ if (!ceil_val_calculated && get_row_number() == ceil(val))
+ ceil_val_calculated= true;
+ return false;
+ }
+
+ enum Sumfunctype sum_func() const
+ {
+ return PERCENTILE_CONT_FUNC;
+ }
+
+ void clear()
+ {
+ first_call= true;
+ floor_value->clear();
+ ceil_value->clear();
+ floor_val_calculated= false;
+ ceil_val_calculated= false;
+ Item_sum_cume_dist::clear();
+ }
+
+ const char*func_name() const
+ {
+ return "percentile_cont";
+ }
+ void update_field() {}
+ void set_type_handler(Window_spec *window_spec);
+ const Type_handler *type_handler() const
+ {return Type_handler_hybrid_field_type::type_handler();}
+
+ void fix_length_and_dec()
+ {
+ decimals = 10; // TODO-cvicentiu find out how many decimals the standard
+ // requires.
+ }
+
+ Item *get_copy(THD *thd, MEM_ROOT *mem_root)
+ { return get_item_copy<Item_sum_percentile_cont>(thd, mem_root, this); }
+ void setup_window_func(THD *thd, Window_spec *window_spec);
+ void setup_hybrid(THD *thd, Item *item);
+
+private:
+ Item_cache *floor_value;
+ Item_cache *ceil_value;
+ bool first_call;
+ double prev_value;
+ bool ceil_val_calculated;
+ bool floor_val_calculated;
+ Item *order_item;
+};
+
+
+
class Item_window_func : public Item_func_or_sum
{
@@ -781,12 +1063,40 @@ class Item_window_func : public Item_func_or_sum
case Item_sum::DENSE_RANK_FUNC:
case Item_sum::PERCENT_RANK_FUNC:
case Item_sum::CUME_DIST_FUNC:
+ case Item_sum::PERCENTILE_CONT_FUNC:
+ case Item_sum::PERCENTILE_DISC_FUNC:
return true;
default:
return false;
}
}
Let's make the grammar enforce this. This is silly. Is there a big
USe --echo
limitation
why we can't define a special rule for these functions?
There already is order_by_single_element_list in sql_yacc.yy, it just
isn't defined properly. We can get rid of these checks altogether then.
+ bool only_single_element_order_list() const
+ {
+ switch (window_func()->sum_func()){
+ case Item_sum::PERCENTILE_CONT_FUNC:
+ case Item_sum::PERCENTILE_DISC_FUNC:
+ return true;
+ default:
+ return false;
+ }
+ }
+
This is not needed. Put this in Item_sum_percentile_disc's fix_fields.
+ void setting_handler_for_percentile_functions(Item_result rtype) const
+ {
+ switch (window_func()->sum_func()){
+ case Item_sum::PERCENTILE_DISC_FUNC:
+ ((Item_sum_percentile_disc* )
window_func())->set_handler_by_cmp_type(rtype);
+ break;
+ default:
+ return;
+ }
+ }
+
Get rid of this function here. Put the logic of checking this result type in
fix_fields specific to Item_sum_percentile_cont / disc.
+ bool check_result_type_of_order_item();
+
+
+
/*
Computation functions.
TODO: consoder merging these with class Group_bound_tracker.
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index 1f282e6aee5..d8d6975d92c 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -7490,3 +7490,11 @@ ER_WRONG_INSERT_INTO_SEQUENCE
eng "Wrong INSERT into a SEQUENCE. One can only do single table INSERT
into a squence object (like with mysqldump). If you want to change the
SEQUENCE, use ALTER SEQUENCE instead."
ER_SP_STACK_TRACE
eng "At line %u in %s"
This one will go away when we enforce the limitation in the grammar, make
sure
to remove it.
+ER_NOT_SINGLE_ELEMENT_ORDER_LIST
+ eng "Incorrect number of elements in the order list for '%s'"
+ER_WRONG_TYPE_FOR_PERCENTILE_CONT
+ eng "Numeric datatype is required for Percentile_CONT function"
As discussed have these two errors print the function name to which the
error is for, just like
ER_NOT_SINGLE_ELEMENT_ORDER_LIST uses now.
+ER_ARGUMENT_NOT_CONSTANT
+ eng "Argument to the percentile functions is not a constant"
+ER_ARGUMENT_OUT_OF_RANGE
+ eng "Argument to the percentile functions does not belong to the range
[0,1]"
diff --git a/sql/sql_window.cc b/sql/sql_window.cc
index 74e1ad25183..127d6d73ee6 100644
--- a/sql/sql_window.cc
+++ b/sql/sql_window.cc
@@ -304,6 +304,12 @@ setup_windows(THD *thd, Ref_ptr_array
ref_pointer_array, TABLE_LIST *tables,
win_func_item->update_used_tables();
}
2 problems with this code.
1. We can probably move this logic in
Item_sum_percentile_[cont|disc]::fix_fields.
See if that works.
2. There already is an identical while loop before this. USe that one,
don't have
an extra loop here for no reason.
+ li.rewind();
+ while ((win_func_item= li++))
+ {
+ if (win_func_item->check_result_type_of_order_item())
+ DBUG_RETURN(1);
+ }
DBUG_RETURN(0);
}
@@ -1658,6 +1666,49 @@ class Frame_unbounded_following_set_count : public
Frame_unbounded_following
}
};
This cursor counts all rows in the partition that don't have the order_item
set to null.
Let's rewrite this like so:
Extract this logic:
"""
List_iterator_fast<Item_sum> it(sum_functions);
Item_sum* item;
while ((item= it++))
{
Item_sum_window_with_row_count* item_with_row_count =
static_cast<Item_sum_window_with_row_count *>(item);
item_with_row_count->set_row_count(num_rows_in_partition);
}
"""
1. Nake it a protected method for Frame_unbounded_following_set_count. Lets
call it
set_win_funcs_row_count.
2. Rewrite Frame_unbounded_following_set_count to use that function.
3. Make a subclass Frame_unbounded_following_set_count_no_nulls
4. Make use of the protected method in the
Frame_unbounded_following_set_count_no_nulls
+class Frame_unbounded_following_set_count_special: public
Frame_unbounded_following_set_count
+{
+
+public:
+ Frame_unbounded_following_set_count_special(THD *thd,
+ SQL_I_List<ORDER> *partition_list,
+ SQL_I_List<ORDER> *order_list, Item* arg) :
+ Frame_unbounded_following_set_count(thd,partition_list, order_list)
+ {
+ order_item= order_list->first->item[0];
+ }
+ void next_partition(ha_rows rownum)
+ {
+ ha_rows num_rows_in_partition= 0;
+ if (cursor.fetch())
+ return;
+
+ /* Walk to the end of the partition, find how many rows there are. */
+ do
+ {
+ if (!order_item->is_null())
+ num_rows_in_partition++;
You are missing a space after }
+ }while (!cursor.next());
+
+ List_iterator_fast<Item_sum> it(sum_functions);
+ Item_sum* item;
+ while ((item= it++))
+ {
+ Item_sum_window_with_row_count* item_with_row_count =
+ static_cast<Item_sum_window_with_row_count *>(item);
+ item_with_row_count->set_row_count(num_rows_in_partition);
+ }
+ }
+
+ ha_rows get_curr_rownum() const
+ {
+ return cursor.get_rownum();
+ }
+
+private:
+ Item* order_item;
+};
+
/////////////////////////////////////////////////////////////////////////////
// ROWS-type frame bounds
/////////////////////////////////////////////////////////////////////////////
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index a0bbf39b138..b3b34fce3a4 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -10210,6 +10215,11 @@ geometry_function:
Geometry::wkb_polygon,
Geometry::wkb_linestring));
}
I think this is something that should not be here right?
What is this?
+ | WITHIN '(' expr ',' expr ')'
+ {
+ $$= GEOM_NEW(thd, Item_func_spatial_precise_rel(thd, $3, $5,
+ Item_func::SP_WITHIN_FUNC));
+ }
;
/*
@@ -10671,6 +10681,47 @@ simple_window_func:
}
;
+inverse_distribution_function:
+ inverse_distribution_function_def WITHIN GROUP_SYM
+ '('
+ { Select->prepare_add_window_spec(thd); }
+ order_by_single_element_list ')' OVER_SYM
+ '(' opt_window_ref opt_window_partition_clause ')'
+ {
+ LEX *lex= Lex;
+ if (Select->add_window_spec(thd, lex->win_ref,
+ Select->group_list,
+ Select->order_list,
+ NULL))
+ MYSQL_YYABORT;
+ $$= new (thd->mem_root) Item_window_func(thd, (Item_sum *) $1,
+ thd->lex->win_spec);
+ if ($$ == NULL)
+ MYSQL_YYABORT;
+ if (Select->add_window_func((Item_window_func *) $$))
+ MYSQL_YYABORT;
+ }
+ ;
+
+inverse_distribution_function_def:
+ PERCENTILE_CONT_SYM '(' expr ')'
+ {
+ $$= new (thd->mem_root) Item_sum_percentile_cont(thd, $3);
+ if ($$ == NULL)
+ MYSQL_YYABORT;
+ }
+ | PERCENTILE_DISC_SYM '(' expr ')'
+ {
+ $$= new (thd->mem_root) Item_sum_percentile_disc(thd, $3);
+ if ($$ == NULL)
+ MYSQL_YYABORT;
+ }
+ ;
+
See if you can make this accept only one order element.
+order_by_single_element_list:
+ ORDER_SYM BY order_list
+ ;
+
window_name:
ident
{