revision-id: f7efa5e713ddef1ac9e34ff5032e17d67e5074a1 (mariadb-10.6.1-95-gf7efa5e713d)
parent(s): b9edd5606a6bcae1ea3f2eaeee1e6c7f0502f091
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-27 19:28:10 +0300
message:
Rename histogram_type=JSON to JSON_HB
---
mysql-test/main/statistics_json.result | 74 +++++++++++-----------
mysql-test/main/statistics_json.test | 10 +--
mysql-test/main/system_mysql_db.result | 2 +-
mysql-test/main/system_mysql_db_fix40123.result | 2 +-
mysql-test/main/system_mysql_db_fix50030.result | 2 +-
mysql-test/main/system_mysql_db_fix50117.result | 2 +-
mysql-test/suite/funcs_1/r/is_columns_mysql.result | 8 +--
.../sys_vars/r/sysvars_server_notembedded.result | 2 +-
scripts/mysql_system_tables.sql | 2 +-
sql/item_strfunc.cc | 13 ++--
sql/sql_statistics.cc | 10 +--
sql/sql_statistics.h | 4 +-
12 files changed, 67 insertions(+), 64 deletions(-)
diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result
index 5bf5c98a206..97931026690 100644
--- a/mysql-test/main/statistics_json.result
+++ b/mysql-test/main/statistics_json.result
@@ -4,9 +4,9 @@
set @SINGLE_PREC_TYPE='single_prec_hb';
set @DOUBLE_PREC_TYPE='double_prec_hb';
set @DEFAULT_HIST_TYPE=@@histogram_type;
-set @SINGLE_PREC_TYPE='JSON';
-set @DOUBLE_PREC_TYPE='JSON';
-set @DEFAULT_HIST_TYPE='JSON';
+set @SINGLE_PREC_TYPE='JSON_HB';
+set @DOUBLE_PREC_TYPE='JSON_HB';
+set @DEFAULT_HIST_TYPE='JSON_HB';
drop table if exists t1,t2;
set @save_use_stat_tables=@@use_stat_tables;
set @save_histogram_size=@@global.histogram_size;
@@ -232,12 +232,12 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 0 49 0.0000 1.0000 4 JSON 5B0A20202239222C0A2020223139222C0A2020223331222C0A2020223430220A5D
-test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON 5B0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
-test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON 5B0A20202261616161222C0A202022626262626262222C0A202022636363636363636363222C0A2020226464646464646464220A5D
-test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON 5B0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233220A5D
-test t1 e 0.01 0.112 0.2250 6.2000 4 JSON 5B0A202022302E3031222C0A202022302E303132222C0A202022302E3035222C0A202022302E31220A5D
-test t1 f 1 5 0.2000 6.4000 4 JSON 5B0A20202202222C0A20202203222C0A20202204222C0A20202204220A5D
+test t1 a 0 49 0.0000 1.0000 4 JSON_HB 5B0A20202239222C0A2020223139222C0A2020223331222C0A2020223430220A5D
+test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON_HB 5B0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
+test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON_HB 5B0A20202261616161222C0A202022626262626262222C0A202022636363636363636363222C0A2020226464646464646464220A5D
+test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB 5B0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233220A5D
+test t1 e 0.01 0.112 0.2250 6.2000 4 JSON_HB 5B0A202022302E3031222C0A202022302E303132222C0A202022302E3035222C0A202022302E31220A5D
+test t1 f 1 5 0.2000 6.4000 4 JSON_HB 5B0A20202202222C0A20202203222C0A20202204222C0A20202204220A5D
DELETE FROM mysql.column_stats;
set histogram_size=8;
set histogram_type=@DOUBLE_PREC_TYPE;
@@ -251,12 +251,12 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 0 49 0.0000 1.0000 8 JSON 5B0A20202234222C0A20202239222C0A2020223135222C0A2020223231222C0A2020223239222C0A2020223333222C0A2020223339222C0A2020223433220A5D
-test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 8 JSON 5B0A20202276767676767676767676767676222C0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A20202277777777777777777777777777777777777777777777777777777777222C0A2020227878787878787878787878787878787878787878787878787878222C0A202022797979222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
-test t1 c aaaa dddddddd 0.1250 7.0000 8 JSON 5B0A20202261616161222C0A20202261616161222C0A202022626262626262222C0A202022626262626262222C0A202022636363636363636363222C0A202022636363636363636363222C0A2020226464646464646464222C0A2020226464646464646464220A5D
-test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 8 JSON 5B0A202022313938392D30332D3132222C0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233222C0A202022313939392D30372D3233220A5D
-test t1 e 0.01 0.112 0.2250 6.2000 8 JSON 5B0A202022302E3031222C0A202022302E3031222C0A202022302E3031222C0A202022302E303132222C0A202022302E3035222C0A202022302E31222C0A202022302E31222C0A202022302E313132220A5D
-test t1 f 1 5 0.2000 6.4000 8 JSON 5B0A20202201222C0A20202202222C0A20202202222C0A20202203222C0A20202203222C0A20202204222C0A20202204222C0A20202205220A5D
+test t1 a 0 49 0.0000 1.0000 8 JSON_HB 5B0A20202234222C0A20202239222C0A2020223135222C0A2020223231222C0A2020223239222C0A2020223333222C0A2020223339222C0A2020223433220A5D
+test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 8 JSON_HB 5B0A20202276767676767676767676767676222C0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A20202277777777777777777777777777777777777777777777777777777777222C0A2020227878787878787878787878787878787878787878787878787878222C0A202022797979222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
+test t1 c aaaa dddddddd 0.1250 7.0000 8 JSON_HB 5B0A20202261616161222C0A20202261616161222C0A202022626262626262222C0A202022626262626262222C0A202022636363636363636363222C0A202022636363636363636363222C0A2020226464646464646464222C0A2020226464646464646464220A5D
+test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 8 JSON_HB 5B0A202022313938392D30332D3132222C0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233222C0A202022313939392D30372D3233220A5D
+test t1 e 0.01 0.112 0.2250 6.2000 8 JSON_HB 5B0A202022302E3031222C0A202022302E3031222C0A202022302E3031222C0A202022302E303132222C0A202022302E3035222C0A202022302E31222C0A202022302E31222C0A202022302E313132220A5D
+test t1 f 1 5 0.2000 6.4000 8 JSON_HB 5B0A20202201222C0A20202202222C0A20202202222C0A20202203222C0A20202203222C0A20202204222C0A20202204222C0A20202205220A5D
DELETE FROM mysql.column_stats;
set histogram_size= 0;
set histogram_type=@SINGLE_PREC_TYPE;
@@ -1501,7 +1501,7 @@ nulls_ratio 0.0000
avg_length 4.0000
avg_frequency 2.7640
hist_size 100
-hist_type JSON
+hist_type JSON_HB
hex(histogram) 5B0A202022302E30222C0A202022302E30222C0A202022302E30222C0A202022302E30222C0A202022302E30222C0A202022302E30222C0A202022302E31222C0A202022302E31222C0A202022302E32222C0A202022302E32222C0A202022302E33222C0A202022302E33222C0A202022302E34222C0A202022302E34222C0A202022302E34222C0A202022302E35222C0A202022302E35222C0A202022302E36222C0A202022302E36222C0A202022302E37222C0A202022302E37222C0A202022302E38222C0A202022302E39222C0A202022312E31222C0A202022312E32222C0A202022312E33222C0A202022312E34222C0A202022312E34222C0A202022312E36222C0A202022312E36222C0A202022312E37222C0A202022312E39222C0A202022322E30222C0A202022322E32222C0A202022322E32222C0A202022322E33222C0A202022322E35222C0A202022322E36222C0A202022322E38222C0A202022322E39222C0A202022332E31222C0A202022332E32222C0A202022332E34222C0A202022332E36222C0A202022332E38222C0A202022342E30222C0A202022342E33222C0A202022342E35222C0A202022342E38222C0A202022352E31222C0A202022352E34222C0A202022352E37222C0A202022352E38222C0A202022362E31222C0A202022
362E34222C0A202022362E38222C0A202022372E32222C0A202022372E35222C0A202022372E37222C0A202022382E31222C0A202022382E35222C0A202022382E38222C0A202022392E31222C0A202022392E35222C0A20202231302E31222C0A20202231302E38222C0A20202231312E33222C0A20202231322E30222C0A20202231322E36222C0A20202231332E35222C0A20202231342E32222C0A20202231362E31222C0A20202231362E38222C0A20202231382E31222C0A20202232302E30222C0A20202232312E36222C0A20202232332E34222C0A20202232382E33222C0A20202233312E37222C0A20202233342E31222C0A20202233372E35222C0A20202234312E39222C0A20202234362E34222C0A20202235302E37222C0A20202235352E31222C0A20202236302E34222C0A20202236352E36222C0A20202237322E36222C0A20202237372E31222C0A20202238312E32222C0A20202238352E36222C0A20202238372E37222C0A20202238392E39222C0A20202239322E31222C0A20202239342E32222C0A20202239352E39222C0A20202239372E33222C0A20202239382E31222C0A20202239392E30222C0A20202239392E39220A5D
decode_histogram(hist_type,histogram) [
"0.0",
@@ -1615,7 +1615,7 @@ nulls_ratio 0.0000
avg_length 4.0000
avg_frequency 1.0467
hist_size 254
-hist_type JSON
+hist_type JSON_HB
hex(histogram) 5B0A20202231343338222C0A20202235303535222C0A2020223133303030222C0A2020223235383838222C0A2020223530363939222C0A2020223839303533222C0A2020223839333838222C0A2020223839383831222C0A2020223930313131222C0A2020223930363031222C0A2020223930393531222C0A2020223931323030222C0A2020223931373737222C0A2020223932323234222C0A2020223932353833222C0A2020223932393838222C0A2020223933333030222C0A2020223933383138222C0A2020223934313030222C0A2020223934363030222C0A2020223934393334222C0A2020223935343030222C0A2020223935393333222C0A2020223936323931222C0A2020223936383030222C0A2020223937313030222C0A2020223937343531222C0A2020223938303830222C0A2020223938333432222C0A2020223938373831222C0A2020223939333637222C0A2020223939373939222C0A202022313030313138222C0A202022313030343738222C0A202022313030383837222C0A202022313031323035222C0A202022313031353734222C0A202022313031393834222C0A202022313032323934222C0A202022313032363831222C0A202022313033313731222C0A202022313033353434222C0A202022313033393834222C0A20202231303437
3030222C0A202022313035313139222C0A202022313035363930222C0A202022313036303031222C0A202022313036343134222C0A202022313037303030222C0A202022313037333239222C0A202022313037373631222C0A202022313038313030222C0A202022313038353734222C0A202022313039313231222C0A202022313039353030222C0A202022313039393635222C0A202022313130333838222C0A202022313131313030222C0A202022313131373532222C0A202022313132333735222C0A202022313133303830222C0A202022313133383030222C0A202022313134323333222C0A202022313134383736222C0A202022313135353332222C0A202022313136313738222C0A202022313136363935222C0A202022313137323237222C0A202022313137383635222C0A202022313138373138222C0A202022313139323833222C0A202022313139373936222C0A202022313230323635222C0A202022313231303030222C0A202022313231363030222C0A202022313231393534222C0A202022313232373035222C0A202022313233333539222C0A202022313233383635222C0A202022313234323037222C0A202022313234373335222C0A202022313235323535222C0A202022313235373636222C0A202022313236323832222C0A202022313236383230222C0A202
022313237323232222C0A202022313237383031222C0A202022313238333030222C0A202022313239333030222C0A202022313330303030222C0A202022313331303030222C0A202022313331373137222C0A202022313332343535222C0A202022313333313036222C0A202022313333363432222C0A202022313334303337222C0A202022313335303130222C0A202022313336323136222C0A202022313337303238222C0A202022313337373736222C0A202022313338343138222C0A202022313339323833222C0A202022313430303330222C0A202022313430383030222C0A202022313432303434222C0A202022313432363539222C0A202022313433373236222C0A202022313434353832222C0A202022313435383030222C0A202022313436343339222C0A202022313437353233222C0A202022313438313035222C0A202022313439313436222C0A202022313530313030222C0A202022313531303630222C0A202022313532343432222C0A202022313533333634222C0A202022313534393830222C0A202022313535383030222C0A202022313537333030222C0A202022313538333335222C0A202022313539363332222C0A202022313631313631222C0A202022313632333030222C0A202022313633383439222C0A202022313634373437222C0A2020223136363436
37222C0A202022313637343631222C0A202022313639333030222C0A202022313730343238222C0A202022313731353332222C0A202022313732373031222C0A202022313733383935222C0A202022313734393835222C0A202022313736353736222C0A202022313738313832222C0A202022313739323038222C0A202022313830323130222C0A202022313831383035222C0A202022313833313030222C0A202022313834303631222C0A202022313835343031222C0A202022313836393030222C0A202022313838333434222C0A202022313839353639222C0A202022313930393035222C0A202022313933303035222C0A202022313934333030222C0A202022313935353030222C0A202022313937323534222C0A202022313939303030222C0A202022323030393031222C0A202022323032313334222C0A202022323033373933222C0A202022323036313538222C0A202022323037353838222C0A202022323130303638222C0A202022323132393736222C0A202022323134393530222C0A202022323136373335222C0A202022323138343437222C0A202022323231303030222C0A202022323232353138222C0A202022323234383837222C0A202022323237363537222C0A202022323239343235222C0A202022323333303431222C0A202022323336303030222C0A20202
2323339313234222C0A202022323431363439222C0A202022323433373432222C0A202022323435373732222C0A202022323438323435222C0A202022323532333836222C0A202022323534383432222C0A202022323537383132222C0A202022323632303030222C0A202022323634303831222C0A202022323636323831222C0A202022323730323531222C0A202022323732393638222C0A202022323736343231222C0A202022323739333430222C0A202022323832393431222C0A202022323836393030222C0A202022323931303030222C0A202022323934303536222C0A202022323938393030222C0A202022333031323736222C0A202022333034343737222C0A202022333039373530222C0A202022333133353330222C0A202022333137363030222C0A202022333232323637222C0A202022333236333939222C0A202022333330323736222C0A202022333334353633222C0A202022333339313331222C0A202022333432373338222C0A202022333439323436222C0A202022333533363332222C0A202022333539313437222C0A202022333632343730222C0A202022333636353439222C0A202022333732383430222C0A202022333830373535222C0A202022333835323031222C0A202022333932383330222C0A202022343030393937222C0A202022343039313030
222C0A202022343136393838222C0A202022343231353839222C0A202022343238353232222C0A202022343335393634222C0A202022343433373237222C0A202022343532393736222C0A202022343631303030222C0A202022343639373335222C0A202022343736383030222C0A202022343833313535222C0A202022343933343039222C0A202022353038383939222C0A202022353139373933222C0A202022353239393030222C0A202022353430383238222C0A202022353633363632222C0A202022353830303030222C0A202022353934353031222C0A202022363136373030222C0A202022363336373635222C0A202022363536393235222C0A202022363830333332222C0A202022373033353932222C0A202022373335313637222C0A202022373634393032222C0A202022373937373335222C0A202022383330303030222C0A202022383737323339222C0A202022393430353839222C0A202022393933343030222C0A20202231303432373430222C0A20202231313030303030222C0A20202231313536313030222C0A20202231323137383138222C0A20202231333030393737222C0A20202231333932383630222C0A20202231353137353530222C0A20202231363832303030222C0A20202231393639383638222C0A20202232313534333736222C0A20202232353
935363734222C0A20202232393634363338222C0A20202234323536333030222C0A20202236373538383435220A5D
decode_histogram(hist_type,histogram) [
"1438",
@@ -1943,7 +1943,7 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 1 3 0.0000 1.0000 10 JSON 5B0A20202231222C0A20202231222C0A20202231222C0A20202232222C0A20202232222C0A20202232222C0A20202232222C0A20202233222C0A20202233222C0A20202233220A5D
+test t1 a 1 3 0.0000 1.0000 10 JSON_HB 5B0A20202231222C0A20202231222C0A20202231222C0A20202232222C0A20202232222C0A20202232222C0A20202232222C0A20202233222C0A20202233222C0A20202233220A5D
set histogram_size=default;
drop table t1;
#
@@ -1957,7 +1957,7 @@ set histogram_type=@DOUBLE_PREC_TYPE;
show variables like 'histogram%';
Variable_name Value
histogram_size 10
-histogram_type JSON
+histogram_type JSON_HB
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
@@ -1968,7 +1968,7 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 1 5 0.0000 1.0000 10 JSON 5B0A20202231222C0A20202231222C0A20202232222C0A20202232222C0A20202233222C0A20202233222C0A20202234222C0A20202234222C0A20202235222C0A20202235220A5D
+test t1 a 1 5 0.0000 1.0000 10 JSON_HB 5B0A20202231222C0A20202231222C0A20202232222C0A20202232222C0A20202233222C0A20202233222C0A20202234222C0A20202234222C0A20202235222C0A20202235220A5D
set histogram_size=0;
set histogram_type=@SINGLE_PREC_TYPE;
drop table t1;
@@ -2009,7 +2009,7 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t2 id 1 1024 0.0000 8.0000 63 JSON 5B0A2020223137222C0A2020223333222C0A2020223439222C0A2020223635222C0A2020223831222C0A2020223937222C0A202022313133222C0A202022313239222C0A202022313435222C0A202022313631222C0A202022313737222C0A202022313933222C0A202022323039222C0A202022323235222C0A202022323431222C0A202022323537222C0A202022323733222C0A202022323839222C0A202022333035222C0A202022333231222C0A202022333337222C0A202022333533222C0A202022333639222C0A202022333835222C0A202022343031222C0A202022343137222C0A202022343333222C0A202022343439222C0A202022343635222C0A202022343831222C0A202022343937222C0A202022353133222C0A202022353239222C0A202022353435222C0A202022353631222C0A202022353737222C0A202022353933222C0A202022363039222C0A202022363235222C0A202022363431222C0A202022363537222C0A202022363733222C0A202022363839222C0A202022373035222C0A202022373231222C0A202022373337222C0A202022373533222C0A202022373639222C0A202022373835222C0A202022383031222C0A202022383137222C0A202022383333222C0A202022383439222C0A20202238363
5222C0A202022383831222C0A202022383937222C0A202022393133222C0A202022393239222C0A202022393435222C0A202022393631222C0A202022393737222C0A202022393933222C0A20202231303039220A5D
+test t2 id 1 1024 0.0000 8.0000 63 JSON_HB 5B0A2020223137222C0A2020223333222C0A2020223439222C0A2020223635222C0A2020223831222C0A2020223937222C0A202022313133222C0A202022313239222C0A202022313435222C0A202022313631222C0A202022313737222C0A202022313933222C0A202022323039222C0A202022323235222C0A202022323431222C0A202022323537222C0A202022323733222C0A202022323839222C0A202022333035222C0A202022333231222C0A202022333337222C0A202022333533222C0A202022333639222C0A202022333835222C0A202022343031222C0A202022343137222C0A202022343333222C0A202022343439222C0A202022343635222C0A202022343831222C0A202022343937222C0A202022353133222C0A202022353239222C0A202022353435222C0A202022353631222C0A202022353737222C0A202022353933222C0A202022363039222C0A202022363235222C0A202022363431222C0A202022363537222C0A202022363733222C0A202022363839222C0A202022373035222C0A202022373231222C0A202022373337222C0A202022373533222C0A202022373639222C0A202022373835222C0A202022383031222C0A202022383137222C0A202022383333222C0A202022383439222C0A20202238
3635222C0A202022383831222C0A202022383937222C0A202022393133222C0A202022393239222C0A202022393435222C0A202022393631222C0A202022393737222C0A202022393933222C0A20202231303039220A5D
set histogram_size=0;
drop table t1, t2;
set use_stat_tables=@save_use_stat_tables;
@@ -2315,7 +2315,7 @@ Note 1003 select `test`.`t1_bin`.`a` AS `a` from `test`.`t1_bin` where `test`.`t
analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 10.00 58.82 60.00 Using where
-set histogram_type=json;
+set histogram_type=json_hb;
create table t1_json (a varchar(255));
insert into t1_json select concat('a-', a) from ten;
analyze table t1_json persistent for all;
@@ -2324,7 +2324,7 @@ test.t1_json analyze status Engine-independent statistics collected
test.t1_json analyze status OK
select * from mysql.column_stats where table_name='t1_json';
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
-test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON [
+test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON_HB [
"a-0",
"a-0",
"a-0",
@@ -2452,7 +2452,7 @@ city varchar(100)
set histogram_size=50;
insert into users select 'Moscow' from seq_1_to_99;
insert into users select 'Helsinki' from seq_1_to_2;
-set histogram_type=json;
+set histogram_type=json_hb;
analyze table users persistent for all;
Table Op Msg_type Msg_text
test.users analyze status Engine-independent statistics collected
@@ -2484,12 +2484,12 @@ drop table users;
DELETE FROM mysql.column_stats;
create schema world;
use world;
-set histogram_type='JSON';
+set histogram_type='JSON_HB';
set histogram_size=50;
ANALYZE TABLE Country, City, CountryLanguage persistent for all;
SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats;
column_name min_value max_value hist_size hist_type histogram
-Code ABW ZWE 50 JSON [
+Code ABW ZWE 50 JSON_HB [
"ALB",
"ARM",
"AUS",
@@ -2541,7 +2541,7 @@ Code ABW ZWE 50 JSON [
"VIR",
"YEM"
]
-Name Afghanistan Zimbabwe 50 JSON [
+Name Afghanistan Zimbabwe 50 JSON_HB [
"Andorra",
"Argentina",
"Azerbaijan",
@@ -2593,7 +2593,7 @@ Name Afghanistan Zimbabwe 50 JSON [
"Venezuela",
"Western Sahara"
]
-SurfaceArea 0.40 17075400.00 50 JSON [
+SurfaceArea 0.40 17075400.00 50 JSON_HB [
"14.00",
"36.00",
"78.00",
@@ -2645,7 +2645,7 @@ SurfaceArea 0.40 17075400.00 50 JSON [
"2724900.00",
"9363520.00"
]
-Population 0 1277558000 50 JSON [
+Population 0 1277558000 50 JSON_HB [
"0",
"1000",
"2500",
@@ -2697,7 +2697,7 @@ Population 0 1277558000 50 JSON [
"111506000",
"170115000"
]
-Capital 1 4074 50 JSON [
+Capital 1 4074 50 JSON_HB [
"35",
"63",
"129",
@@ -2749,7 +2749,7 @@ Capital 1 4074 50 JSON [
"3537",
"3791"
]
-ID 1 4079 50 JSON [
+ID 1 4079 50 JSON_HB [
"80",
"160",
"240",
@@ -2801,7 +2801,7 @@ ID 1 4079 50 JSON [
"3920",
"4000"
]
-Name A Coruña (La Coruña) Ürgenc 50 JSON [
+Name A Coruña (La Coruña) Ürgenc 50 JSON_HB [
"Allentown",
"Araguari",
"Bahtim",
@@ -2853,7 +2853,7 @@ Name A Coruña (La Coruña) Ürgenc 50 JSON [
"Yangjiang",
"Zhaodong"
]
-Country ABW ZWE 50 JSON [
+Country ABW ZWE 50 JSON_HB [
"ARM",
"BHS",
"BRA",
@@ -2905,7 +2905,7 @@ Country ABW ZWE 50 JSON [
"USA",
"VNM"
]
-Population 42 10500000 50 JSON [
+Population 42 10500000 50 JSON_HB [
"50699",
"90601",
"92583",
@@ -2957,7 +2957,7 @@ Population 42 10500000 50 JSON [
"1300977",
"2154376"
]
-Country ABW ZWE 50 JSON [
+Country ABW ZWE 50 JSON_HB [
"ALB",
"ASM",
"AZE",
@@ -3009,7 +3009,7 @@ Country ABW ZWE 50 JSON [
"VNM",
"ZAF"
]
-Language Abhyasi [South]Mande 50 JSON [
+Language Abhyasi [South]Mande 50 JSON_HB [
"Amhara",
"Arabic",
"Araucan",
@@ -3061,7 +3061,7 @@ Language Abhyasi [South]Mande 50 JSON [
"Urdu",
"Wolea"
]
-Percentage 0.0 99.9 50 JSON [
+Percentage 0.0 99.9 50 JSON_HB [
"0.0",
"0.0",
"0.0",
diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test
index 4bda3255b1c..404d1e11650 100644
--- a/mysql-test/main/statistics_json.test
+++ b/mysql-test/main/statistics_json.test
@@ -2,7 +2,7 @@
--echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON
--echo #
-let $histogram_type_override='JSON';
+let $histogram_type_override='JSON_HB';
--source statistics.test
--source include/have_stat_tables.inc
@@ -27,7 +27,7 @@ select hex(histogram) from mysql.column_stats where table_name='t1_bin';
explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
-set histogram_type=json;
+set histogram_type=json_hb;
create table t1_json (a varchar(255));
insert into t1_json select concat('a-', a) from ten;
analyze table t1_json persistent for all;
@@ -51,7 +51,7 @@ create table users (
set histogram_size=50;
insert into users select 'Moscow' from seq_1_to_99;
insert into users select 'Helsinki' from seq_1_to_2;
-set histogram_type=json;
+set histogram_type=json_hb;
analyze table users persistent for all;
explain extended select * from users where city = 'Moscow';
analyze select * from users where city = 'Moscow';
@@ -76,7 +76,7 @@ use world;
--enable_result_log
--enable_query_log
-set histogram_type='JSON';
+set histogram_type='JSON_HB';
set histogram_size=50;
--disable_result_log
ANALYZE TABLE Country, City, CountryLanguage persistent for all;
@@ -91,4 +91,4 @@ analyze select * from Country where 'Code' < 'BBC';
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
-DROP SCHEMA world;
\ No newline at end of file
+DROP SCHEMA world;
diff --git a/mysql-test/main/system_mysql_db.result b/mysql-test/main/system_mysql_db.result
index b756dfcf45e..f9e37a6e616 100644
--- a/mysql-test/main/system_mysql_db.result
+++ b/mysql-test/main/system_mysql_db.result
@@ -234,7 +234,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/mysql-test/main/system_mysql_db_fix40123.result b/mysql-test/main/system_mysql_db_fix40123.result
index ec972058d54..613542caf42 100644
--- a/mysql-test/main/system_mysql_db_fix40123.result
+++ b/mysql-test/main/system_mysql_db_fix40123.result
@@ -272,7 +272,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/mysql-test/main/system_mysql_db_fix50030.result b/mysql-test/main/system_mysql_db_fix50030.result
index 4e038849cf6..b60253c42ad 100644
--- a/mysql-test/main/system_mysql_db_fix50030.result
+++ b/mysql-test/main/system_mysql_db_fix50030.result
@@ -276,7 +276,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/mysql-test/main/system_mysql_db_fix50117.result b/mysql-test/main/system_mysql_db_fix50117.result
index 7d540477d51..ebd8b8733c9 100644
--- a/mysql-test/main/system_mysql_db_fix50117.result
+++ b/mysql-test/main/system_mysql_db_fix50117.result
@@ -256,7 +256,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/mysql-test/suite/funcs_1/r/is_columns_mysql.result b/mysql-test/suite/funcs_1/r/is_columns_mysql.result
index ef4832cdb8a..0211ff13cd6 100644
--- a/mysql-test/suite/funcs_1/r/is_columns_mysql.result
+++ b/mysql-test/suite/funcs_1/r/is_columns_mysql.result
@@ -13,9 +13,9 @@ def mysql column_stats avg_frequency 8 NULL YES decimal NULL NULL 12 4 NULL NULL
def mysql column_stats avg_length 7 NULL YES decimal NULL NULL 12 4 NULL NULL NULL decimal(12,4) select,insert,update,references NEVER NULL
def mysql column_stats column_name 3 NULL NO varchar 64 192 NULL NULL NULL utf8mb3 utf8mb3_bin varchar(64) PRI select,insert,update,references NEVER NULL
def mysql column_stats db_name 1 NULL NO varchar 64 192 NULL NULL NULL utf8mb3 utf8mb3_bin varchar(64) PRI select,insert,update,references NEVER NULL
-def mysql column_stats histogram 11 NULL YES varbinary 255 255 NULL NULL NULL NULL NULL varbinary(255) select,insert,update,references NEVER NULL
+def mysql column_stats histogram 11 NULL YES blob 65535 65535 NULL NULL NULL NULL NULL blob select,insert,update,references NEVER NULL
def mysql column_stats hist_size 9 NULL YES tinyint NULL NULL 3 0 NULL NULL NULL tinyint(3) unsigned select,insert,update,references NEVER NULL
-def mysql column_stats hist_type 10 NULL YES enum 14 42 NULL NULL NULL utf8mb3 utf8mb3_bin enum('SINGLE_PREC_HB','DOUBLE_PREC_HB') select,insert,update,references NEVER NULL
+def mysql column_stats hist_type 10 NULL YES enum 14 42 NULL NULL NULL utf8mb3 utf8mb3_bin enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') select,insert,update,references NEVER NULL
def mysql column_stats max_value 5 NULL YES varbinary 255 255 NULL NULL NULL NULL NULL varbinary(255) select,insert,update,references NEVER NULL
def mysql column_stats min_value 4 NULL YES varbinary 255 255 NULL NULL NULL NULL NULL varbinary(255) select,insert,update,references NEVER NULL
def mysql column_stats nulls_ratio 6 NULL YES decimal NULL NULL 12 4 NULL NULL NULL decimal(12,4) select,insert,update,references NEVER NULL
@@ -345,8 +345,8 @@ NULL mysql column_stats nulls_ratio decimal NULL NULL NULL NULL decimal(12,4)
NULL mysql column_stats avg_length decimal NULL NULL NULL NULL decimal(12,4)
NULL mysql column_stats avg_frequency decimal NULL NULL NULL NULL decimal(12,4)
NULL mysql column_stats hist_size tinyint NULL NULL NULL NULL tinyint(3) unsigned
-3.0000 mysql column_stats hist_type enum 14 42 utf8mb3 utf8mb3_bin enum('SINGLE_PREC_HB','DOUBLE_PREC_HB')
-1.0000 mysql column_stats histogram varbinary 255 255 NULL NULL varbinary(255)
+3.0000 mysql column_stats hist_type enum 14 42 utf8mb3 utf8mb3_bin enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB')
+1.0000 mysql column_stats histogram blob 65535 65535 NULL NULL blob
3.0000 mysql db Host char 255 765 utf8mb3 utf8mb3_bin char(255)
3.0000 mysql db Db char 64 192 utf8mb3 utf8mb3_bin char(64)
3.0000 mysql db User char 128 384 utf8mb3 utf8mb3_bin char(128)
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
index 7b811a011ff..e1a8f4126c0 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -1309,7 +1309,7 @@ VARIABLE_COMMENT Specifies type of the histograms created by ANALYZE. Possible v
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
-ENUM_VALUE_LIST SINGLE_PREC_HB,DOUBLE_PREC_HB
+ENUM_VALUE_LIST SINGLE_PREC_HB,DOUBLE_PREC_HB,JSON_HB
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME HOSTNAME
diff --git a/scripts/mysql_system_tables.sql b/scripts/mysql_system_tables.sql
index e31f3372b5f..e5b824894a0 100644
--- a/scripts/mysql_system_tables.sql
+++ b/scripts/mysql_system_tables.sql
@@ -314,7 +314,7 @@ DROP TABLE tmp_proxies_priv;
CREATE TABLE IF NOT EXISTS table_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, cardinality bigint(21) unsigned DEFAULT NULL, PRIMARY KEY (db_name,table_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Tables';
-CREATE TABLE IF NOT EXISTS column_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, column_name varchar(64) NOT NULL, min_value varbinary(255) DEFAULT NULL, max_value varbinary(255) DEFAULT NULL, nulls_ratio decimal(12,4) DEFAULT NULL, avg_length decimal(12,4) DEFAULT NULL, avg_frequency decimal(12,4) DEFAULT NULL, hist_size tinyint unsigned, hist_type enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON'), histogram blob, PRIMARY KEY (db_name,table_name,column_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Columns';
+CREATE TABLE IF NOT EXISTS column_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, column_name varchar(64) NOT NULL, min_value varbinary(255) DEFAULT NULL, max_value varbinary(255) DEFAULT NULL, nulls_ratio decimal(12,4) DEFAULT NULL, avg_length decimal(12,4) DEFAULT NULL, avg_frequency decimal(12,4) DEFAULT NULL, hist_size tinyint unsigned, hist_type enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB'), histogram blob, PRIMARY KEY (db_name,table_name,column_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Columns';
CREATE TABLE IF NOT EXISTS index_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, index_name varchar(64) NOT NULL, prefix_arity int(11) unsigned NOT NULL, avg_frequency decimal(12,4) DEFAULT NULL, PRIMARY KEY (db_name,table_name,index_name,prefix_arity) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Indexes';
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 1f31de8fd52..9f406a7a1cf 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -496,7 +496,7 @@ String *Item_func_from_base64::val_str(String *str)
const char *histogram_types[] =
- {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", "JSON", 0};
+ {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", "JSON_HB", 0};
static TYPELIB histogram_types_typelib=
{ array_elements(histogram_types),
"histogram_types",
@@ -526,6 +526,13 @@ String *Item_func_decode_histogram::val_str(String *str)
null_value= 1;
return 0;
}
+
+ if (type == JSON_HB)
+ {
+ // It's a JSON histogram. Return it as-is.
+ return res;
+ }
+
if (type == DOUBLE_PREC_HB && res->length() % 2 != 0)
res->length(res->length() - 1); // one byte is unused
@@ -534,10 +541,6 @@ String *Item_func_decode_histogram::val_str(String *str)
str->length(0);
char numbuf[32];
const uchar *p= (uchar*)res->c_ptr_safe();
- if (type == JSON)
- {
- return res;
- }
for (i= 0; i < res->length(); i++)
{
double val;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 43cf4d7dbdd..fb89bf513c1 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -193,7 +193,7 @@ TABLE_FIELD_TYPE column_stat_fields[COLUMN_STAT_N_FIELDS] =
},
{
{ STRING_WITH_LEN("hist_type") },
- { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON')") },
+ { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB')") },
{ STRING_WITH_LEN("utf8mb3") }
},
{
@@ -1234,7 +1234,7 @@ class Column_stat: public Stat_table
case DOUBLE_PREC_HB:
hist = new (mem_root) Histogram_binary();
break;
- case JSON:
+ case JSON_HB:
hist = new (mem_root) Histogram_json();
break;
default:
@@ -2046,7 +2046,7 @@ class Histogram_builder_json : public Histogram_builder
Histogram_base *create_histogram(Histogram_type hist_type)
{
// assumes the caller already checked for invalid histograms
- if (hist_type == JSON)
+ if (hist_type == JSON_HB)
return new Histogram_json;
else
return new Histogram_binary;
@@ -2207,7 +2207,7 @@ class Count_distinct_field: public Sql_alloc
*/
void walk_tree_with_histogram(ha_rows rows)
{
- if (table_field->collected_stats->histogram_->get_type() == JSON)
+ if (table_field->collected_stats->histogram_->get_type() == JSON_HB)
{
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
tree->walk(table_field->table, json_histogram_build_walk,
@@ -2915,7 +2915,7 @@ Histogram_base * get_histogram_by_type(MEM_ROOT *mem_root, Histogram_type hist_t
case SINGLE_PREC_HB:
case DOUBLE_PREC_HB:
return new Histogram_binary();
- case JSON:
+ case JSON_HB:
return new Histogram_json();
default:
DBUG_ASSERT(0);
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 33e0430450a..f9fdd0a63bc 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -44,7 +44,7 @@ enum enum_histogram_type
{
SINGLE_PREC_HB,
DOUBLE_PREC_HB,
- JSON,
+ JSON_HB,
INVALID_HISTOGRAM
} Histogram_type;
@@ -374,7 +374,7 @@ class Histogram_json : public Histogram_base
Histogram_type get_type() override
{
- return JSON;
+ return JSON_HB;
}
void set_size (ulonglong sz) override {size = (uint8) sz; }
1
0
revision-id: c1dfee7b8a60ffd5955013f3c7735353bb8d8bab (mariadb-10.6.1-95-gc1dfee7b8a6)
parent(s): b9edd5606a6bcae1ea3f2eaeee1e6c7f0502f091
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-27 17:57:38 +0300
message:
Rename histogram_type=JSON to JSON_HB
---
mysql-test/main/statistics_json.result | 74 ++++++++++++-------------
mysql-test/main/statistics_json.test | 10 ++--
mysql-test/main/system_mysql_db.result | 2 +-
mysql-test/main/system_mysql_db_fix40123.result | 2 +-
mysql-test/main/system_mysql_db_fix50030.result | 2 +-
mysql-test/main/system_mysql_db_fix50117.result | 2 +-
scripts/mysql_system_tables.sql | 2 +-
sql/item_strfunc.cc | 13 +++--
sql/sql_statistics.cc | 10 ++--
sql/sql_statistics.h | 4 +-
10 files changed, 62 insertions(+), 59 deletions(-)
diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result
index 5bf5c98a206..97931026690 100644
--- a/mysql-test/main/statistics_json.result
+++ b/mysql-test/main/statistics_json.result
@@ -4,9 +4,9 @@
set @SINGLE_PREC_TYPE='single_prec_hb';
set @DOUBLE_PREC_TYPE='double_prec_hb';
set @DEFAULT_HIST_TYPE=@@histogram_type;
-set @SINGLE_PREC_TYPE='JSON';
-set @DOUBLE_PREC_TYPE='JSON';
-set @DEFAULT_HIST_TYPE='JSON';
+set @SINGLE_PREC_TYPE='JSON_HB';
+set @DOUBLE_PREC_TYPE='JSON_HB';
+set @DEFAULT_HIST_TYPE='JSON_HB';
drop table if exists t1,t2;
set @save_use_stat_tables=@@use_stat_tables;
set @save_histogram_size=@@global.histogram_size;
@@ -232,12 +232,12 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 0 49 0.0000 1.0000 4 JSON 5B0A20202239222C0A2020223139222C0A2020223331222C0A2020223430220A5D
-test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON 5B0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
-test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON 5B0A20202261616161222C0A202022626262626262222C0A202022636363636363636363222C0A2020226464646464646464220A5D
-test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON 5B0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233220A5D
-test t1 e 0.01 0.112 0.2250 6.2000 4 JSON 5B0A202022302E3031222C0A202022302E303132222C0A202022302E3035222C0A202022302E31220A5D
-test t1 f 1 5 0.2000 6.4000 4 JSON 5B0A20202202222C0A20202203222C0A20202204222C0A20202204220A5D
+test t1 a 0 49 0.0000 1.0000 4 JSON_HB 5B0A20202239222C0A2020223139222C0A2020223331222C0A2020223430220A5D
+test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON_HB 5B0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
+test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON_HB 5B0A20202261616161222C0A202022626262626262222C0A202022636363636363636363222C0A2020226464646464646464220A5D
+test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON_HB 5B0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233220A5D
+test t1 e 0.01 0.112 0.2250 6.2000 4 JSON_HB 5B0A202022302E3031222C0A202022302E303132222C0A202022302E3035222C0A202022302E31220A5D
+test t1 f 1 5 0.2000 6.4000 4 JSON_HB 5B0A20202202222C0A20202203222C0A20202204222C0A20202204220A5D
DELETE FROM mysql.column_stats;
set histogram_size=8;
set histogram_type=@DOUBLE_PREC_TYPE;
@@ -251,12 +251,12 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 0 49 0.0000 1.0000 8 JSON 5B0A20202234222C0A20202239222C0A2020223135222C0A2020223231222C0A2020223239222C0A2020223333222C0A2020223339222C0A2020223433220A5D
-test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 8 JSON 5B0A20202276767676767676767676767676222C0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A20202277777777777777777777777777777777777777777777777777777777222C0A2020227878787878787878787878787878787878787878787878787878222C0A202022797979222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
-test t1 c aaaa dddddddd 0.1250 7.0000 8 JSON 5B0A20202261616161222C0A20202261616161222C0A202022626262626262222C0A202022626262626262222C0A202022636363636363636363222C0A202022636363636363636363222C0A2020226464646464646464222C0A2020226464646464646464220A5D
-test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 8 JSON 5B0A202022313938392D30332D3132222C0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233222C0A202022313939392D30372D3233220A5D
-test t1 e 0.01 0.112 0.2250 6.2000 8 JSON 5B0A202022302E3031222C0A202022302E3031222C0A202022302E3031222C0A202022302E303132222C0A202022302E3035222C0A202022302E31222C0A202022302E31222C0A202022302E313132220A5D
-test t1 f 1 5 0.2000 6.4000 8 JSON 5B0A20202201222C0A20202202222C0A20202202222C0A20202203222C0A20202203222C0A20202204222C0A20202204222C0A20202205220A5D
+test t1 a 0 49 0.0000 1.0000 8 JSON_HB 5B0A20202234222C0A20202239222C0A2020223135222C0A2020223231222C0A2020223239222C0A2020223333222C0A2020223339222C0A2020223433220A5D
+test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 8 JSON_HB 5B0A20202276767676767676767676767676222C0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A20202277777777777777777777777777777777777777777777777777777777222C0A2020227878787878787878787878787878787878787878787878787878222C0A202022797979222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
+test t1 c aaaa dddddddd 0.1250 7.0000 8 JSON_HB 5B0A20202261616161222C0A20202261616161222C0A202022626262626262222C0A202022626262626262222C0A202022636363636363636363222C0A202022636363636363636363222C0A2020226464646464646464222C0A2020226464646464646464220A5D
+test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 8 JSON_HB 5B0A202022313938392D30332D3132222C0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233222C0A202022313939392D30372D3233220A5D
+test t1 e 0.01 0.112 0.2250 6.2000 8 JSON_HB 5B0A202022302E3031222C0A202022302E3031222C0A202022302E3031222C0A202022302E303132222C0A202022302E3035222C0A202022302E31222C0A202022302E31222C0A202022302E313132220A5D
+test t1 f 1 5 0.2000 6.4000 8 JSON_HB 5B0A20202201222C0A20202202222C0A20202202222C0A20202203222C0A20202203222C0A20202204222C0A20202204222C0A20202205220A5D
DELETE FROM mysql.column_stats;
set histogram_size= 0;
set histogram_type=@SINGLE_PREC_TYPE;
@@ -1501,7 +1501,7 @@ nulls_ratio 0.0000
avg_length 4.0000
avg_frequency 2.7640
hist_size 100
-hist_type JSON
+hist_type JSON_HB
hex(histogram) 5B0A202022302E30222C0A202022302E30222C0A202022302E30222C0A202022302E30222C0A202022302E30222C0A202022302E30222C0A202022302E31222C0A202022302E31222C0A202022302E32222C0A202022302E32222C0A202022302E33222C0A202022302E33222C0A202022302E34222C0A202022302E34222C0A202022302E34222C0A202022302E35222C0A202022302E35222C0A202022302E36222C0A202022302E36222C0A202022302E37222C0A202022302E37222C0A202022302E38222C0A202022302E39222C0A202022312E31222C0A202022312E32222C0A202022312E33222C0A202022312E34222C0A202022312E34222C0A202022312E36222C0A202022312E36222C0A202022312E37222C0A202022312E39222C0A202022322E30222C0A202022322E32222C0A202022322E32222C0A202022322E33222C0A202022322E35222C0A202022322E36222C0A202022322E38222C0A202022322E39222C0A202022332E31222C0A202022332E32222C0A202022332E34222C0A202022332E36222C0A202022332E38222C0A202022342E30222C0A202022342E33222C0A202022342E35222C0A202022342E38222C0A202022352E31222C0A202022352E34222C0A202022352E37222C0A202022352E38222C0A202022362E31222C0A202022
362E34222C0A202022362E38222C0A202022372E32222C0A202022372E35222C0A202022372E37222C0A202022382E31222C0A202022382E35222C0A202022382E38222C0A202022392E31222C0A202022392E35222C0A20202231302E31222C0A20202231302E38222C0A20202231312E33222C0A20202231322E30222C0A20202231322E36222C0A20202231332E35222C0A20202231342E32222C0A20202231362E31222C0A20202231362E38222C0A20202231382E31222C0A20202232302E30222C0A20202232312E36222C0A20202232332E34222C0A20202232382E33222C0A20202233312E37222C0A20202233342E31222C0A20202233372E35222C0A20202234312E39222C0A20202234362E34222C0A20202235302E37222C0A20202235352E31222C0A20202236302E34222C0A20202236352E36222C0A20202237322E36222C0A20202237372E31222C0A20202238312E32222C0A20202238352E36222C0A20202238372E37222C0A20202238392E39222C0A20202239322E31222C0A20202239342E32222C0A20202239352E39222C0A20202239372E33222C0A20202239382E31222C0A20202239392E30222C0A20202239392E39220A5D
decode_histogram(hist_type,histogram) [
"0.0",
@@ -1615,7 +1615,7 @@ nulls_ratio 0.0000
avg_length 4.0000
avg_frequency 1.0467
hist_size 254
-hist_type JSON
+hist_type JSON_HB
hex(histogram) 5B0A20202231343338222C0A20202235303535222C0A2020223133303030222C0A2020223235383838222C0A2020223530363939222C0A2020223839303533222C0A2020223839333838222C0A2020223839383831222C0A2020223930313131222C0A2020223930363031222C0A2020223930393531222C0A2020223931323030222C0A2020223931373737222C0A2020223932323234222C0A2020223932353833222C0A2020223932393838222C0A2020223933333030222C0A2020223933383138222C0A2020223934313030222C0A2020223934363030222C0A2020223934393334222C0A2020223935343030222C0A2020223935393333222C0A2020223936323931222C0A2020223936383030222C0A2020223937313030222C0A2020223937343531222C0A2020223938303830222C0A2020223938333432222C0A2020223938373831222C0A2020223939333637222C0A2020223939373939222C0A202022313030313138222C0A202022313030343738222C0A202022313030383837222C0A202022313031323035222C0A202022313031353734222C0A202022313031393834222C0A202022313032323934222C0A202022313032363831222C0A202022313033313731222C0A202022313033353434222C0A202022313033393834222C0A20202231303437
3030222C0A202022313035313139222C0A202022313035363930222C0A202022313036303031222C0A202022313036343134222C0A202022313037303030222C0A202022313037333239222C0A202022313037373631222C0A202022313038313030222C0A202022313038353734222C0A202022313039313231222C0A202022313039353030222C0A202022313039393635222C0A202022313130333838222C0A202022313131313030222C0A202022313131373532222C0A202022313132333735222C0A202022313133303830222C0A202022313133383030222C0A202022313134323333222C0A202022313134383736222C0A202022313135353332222C0A202022313136313738222C0A202022313136363935222C0A202022313137323237222C0A202022313137383635222C0A202022313138373138222C0A202022313139323833222C0A202022313139373936222C0A202022313230323635222C0A202022313231303030222C0A202022313231363030222C0A202022313231393534222C0A202022313232373035222C0A202022313233333539222C0A202022313233383635222C0A202022313234323037222C0A202022313234373335222C0A202022313235323535222C0A202022313235373636222C0A202022313236323832222C0A202022313236383230222C0A202
022313237323232222C0A202022313237383031222C0A202022313238333030222C0A202022313239333030222C0A202022313330303030222C0A202022313331303030222C0A202022313331373137222C0A202022313332343535222C0A202022313333313036222C0A202022313333363432222C0A202022313334303337222C0A202022313335303130222C0A202022313336323136222C0A202022313337303238222C0A202022313337373736222C0A202022313338343138222C0A202022313339323833222C0A202022313430303330222C0A202022313430383030222C0A202022313432303434222C0A202022313432363539222C0A202022313433373236222C0A202022313434353832222C0A202022313435383030222C0A202022313436343339222C0A202022313437353233222C0A202022313438313035222C0A202022313439313436222C0A202022313530313030222C0A202022313531303630222C0A202022313532343432222C0A202022313533333634222C0A202022313534393830222C0A202022313535383030222C0A202022313537333030222C0A202022313538333335222C0A202022313539363332222C0A202022313631313631222C0A202022313632333030222C0A202022313633383439222C0A202022313634373437222C0A2020223136363436
37222C0A202022313637343631222C0A202022313639333030222C0A202022313730343238222C0A202022313731353332222C0A202022313732373031222C0A202022313733383935222C0A202022313734393835222C0A202022313736353736222C0A202022313738313832222C0A202022313739323038222C0A202022313830323130222C0A202022313831383035222C0A202022313833313030222C0A202022313834303631222C0A202022313835343031222C0A202022313836393030222C0A202022313838333434222C0A202022313839353639222C0A202022313930393035222C0A202022313933303035222C0A202022313934333030222C0A202022313935353030222C0A202022313937323534222C0A202022313939303030222C0A202022323030393031222C0A202022323032313334222C0A202022323033373933222C0A202022323036313538222C0A202022323037353838222C0A202022323130303638222C0A202022323132393736222C0A202022323134393530222C0A202022323136373335222C0A202022323138343437222C0A202022323231303030222C0A202022323232353138222C0A202022323234383837222C0A202022323237363537222C0A202022323239343235222C0A202022323333303431222C0A202022323336303030222C0A20202
2323339313234222C0A202022323431363439222C0A202022323433373432222C0A202022323435373732222C0A202022323438323435222C0A202022323532333836222C0A202022323534383432222C0A202022323537383132222C0A202022323632303030222C0A202022323634303831222C0A202022323636323831222C0A202022323730323531222C0A202022323732393638222C0A202022323736343231222C0A202022323739333430222C0A202022323832393431222C0A202022323836393030222C0A202022323931303030222C0A202022323934303536222C0A202022323938393030222C0A202022333031323736222C0A202022333034343737222C0A202022333039373530222C0A202022333133353330222C0A202022333137363030222C0A202022333232323637222C0A202022333236333939222C0A202022333330323736222C0A202022333334353633222C0A202022333339313331222C0A202022333432373338222C0A202022333439323436222C0A202022333533363332222C0A202022333539313437222C0A202022333632343730222C0A202022333636353439222C0A202022333732383430222C0A202022333830373535222C0A202022333835323031222C0A202022333932383330222C0A202022343030393937222C0A202022343039313030
222C0A202022343136393838222C0A202022343231353839222C0A202022343238353232222C0A202022343335393634222C0A202022343433373237222C0A202022343532393736222C0A202022343631303030222C0A202022343639373335222C0A202022343736383030222C0A202022343833313535222C0A202022343933343039222C0A202022353038383939222C0A202022353139373933222C0A202022353239393030222C0A202022353430383238222C0A202022353633363632222C0A202022353830303030222C0A202022353934353031222C0A202022363136373030222C0A202022363336373635222C0A202022363536393235222C0A202022363830333332222C0A202022373033353932222C0A202022373335313637222C0A202022373634393032222C0A202022373937373335222C0A202022383330303030222C0A202022383737323339222C0A202022393430353839222C0A202022393933343030222C0A20202231303432373430222C0A20202231313030303030222C0A20202231313536313030222C0A20202231323137383138222C0A20202231333030393737222C0A20202231333932383630222C0A20202231353137353530222C0A20202231363832303030222C0A20202231393639383638222C0A20202232313534333736222C0A20202232353
935363734222C0A20202232393634363338222C0A20202234323536333030222C0A20202236373538383435220A5D
decode_histogram(hist_type,histogram) [
"1438",
@@ -1943,7 +1943,7 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 1 3 0.0000 1.0000 10 JSON 5B0A20202231222C0A20202231222C0A20202231222C0A20202232222C0A20202232222C0A20202232222C0A20202232222C0A20202233222C0A20202233222C0A20202233220A5D
+test t1 a 1 3 0.0000 1.0000 10 JSON_HB 5B0A20202231222C0A20202231222C0A20202231222C0A20202232222C0A20202232222C0A20202232222C0A20202232222C0A20202233222C0A20202233222C0A20202233220A5D
set histogram_size=default;
drop table t1;
#
@@ -1957,7 +1957,7 @@ set histogram_type=@DOUBLE_PREC_TYPE;
show variables like 'histogram%';
Variable_name Value
histogram_size 10
-histogram_type JSON
+histogram_type JSON_HB
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
@@ -1968,7 +1968,7 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 1 5 0.0000 1.0000 10 JSON 5B0A20202231222C0A20202231222C0A20202232222C0A20202232222C0A20202233222C0A20202233222C0A20202234222C0A20202234222C0A20202235222C0A20202235220A5D
+test t1 a 1 5 0.0000 1.0000 10 JSON_HB 5B0A20202231222C0A20202231222C0A20202232222C0A20202232222C0A20202233222C0A20202233222C0A20202234222C0A20202234222C0A20202235222C0A20202235220A5D
set histogram_size=0;
set histogram_type=@SINGLE_PREC_TYPE;
drop table t1;
@@ -2009,7 +2009,7 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t2 id 1 1024 0.0000 8.0000 63 JSON 5B0A2020223137222C0A2020223333222C0A2020223439222C0A2020223635222C0A2020223831222C0A2020223937222C0A202022313133222C0A202022313239222C0A202022313435222C0A202022313631222C0A202022313737222C0A202022313933222C0A202022323039222C0A202022323235222C0A202022323431222C0A202022323537222C0A202022323733222C0A202022323839222C0A202022333035222C0A202022333231222C0A202022333337222C0A202022333533222C0A202022333639222C0A202022333835222C0A202022343031222C0A202022343137222C0A202022343333222C0A202022343439222C0A202022343635222C0A202022343831222C0A202022343937222C0A202022353133222C0A202022353239222C0A202022353435222C0A202022353631222C0A202022353737222C0A202022353933222C0A202022363039222C0A202022363235222C0A202022363431222C0A202022363537222C0A202022363733222C0A202022363839222C0A202022373035222C0A202022373231222C0A202022373337222C0A202022373533222C0A202022373639222C0A202022373835222C0A202022383031222C0A202022383137222C0A202022383333222C0A202022383439222C0A20202238363
5222C0A202022383831222C0A202022383937222C0A202022393133222C0A202022393239222C0A202022393435222C0A202022393631222C0A202022393737222C0A202022393933222C0A20202231303039220A5D
+test t2 id 1 1024 0.0000 8.0000 63 JSON_HB 5B0A2020223137222C0A2020223333222C0A2020223439222C0A2020223635222C0A2020223831222C0A2020223937222C0A202022313133222C0A202022313239222C0A202022313435222C0A202022313631222C0A202022313737222C0A202022313933222C0A202022323039222C0A202022323235222C0A202022323431222C0A202022323537222C0A202022323733222C0A202022323839222C0A202022333035222C0A202022333231222C0A202022333337222C0A202022333533222C0A202022333639222C0A202022333835222C0A202022343031222C0A202022343137222C0A202022343333222C0A202022343439222C0A202022343635222C0A202022343831222C0A202022343937222C0A202022353133222C0A202022353239222C0A202022353435222C0A202022353631222C0A202022353737222C0A202022353933222C0A202022363039222C0A202022363235222C0A202022363431222C0A202022363537222C0A202022363733222C0A202022363839222C0A202022373035222C0A202022373231222C0A202022373337222C0A202022373533222C0A202022373639222C0A202022373835222C0A202022383031222C0A202022383137222C0A202022383333222C0A202022383439222C0A20202238
3635222C0A202022383831222C0A202022383937222C0A202022393133222C0A202022393239222C0A202022393435222C0A202022393631222C0A202022393737222C0A202022393933222C0A20202231303039220A5D
set histogram_size=0;
drop table t1, t2;
set use_stat_tables=@save_use_stat_tables;
@@ -2315,7 +2315,7 @@ Note 1003 select `test`.`t1_bin`.`a` AS `a` from `test`.`t1_bin` where `test`.`t
analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1_bin ALL NULL NULL NULL NULL 10 10.00 58.82 60.00 Using where
-set histogram_type=json;
+set histogram_type=json_hb;
create table t1_json (a varchar(255));
insert into t1_json select concat('a-', a) from ten;
analyze table t1_json persistent for all;
@@ -2324,7 +2324,7 @@ test.t1_json analyze status Engine-independent statistics collected
test.t1_json analyze status OK
select * from mysql.column_stats where table_name='t1_json';
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
-test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON [
+test t1_json a a-0 a-9 0.0000 3.0000 1.0000 100 JSON_HB [
"a-0",
"a-0",
"a-0",
@@ -2452,7 +2452,7 @@ city varchar(100)
set histogram_size=50;
insert into users select 'Moscow' from seq_1_to_99;
insert into users select 'Helsinki' from seq_1_to_2;
-set histogram_type=json;
+set histogram_type=json_hb;
analyze table users persistent for all;
Table Op Msg_type Msg_text
test.users analyze status Engine-independent statistics collected
@@ -2484,12 +2484,12 @@ drop table users;
DELETE FROM mysql.column_stats;
create schema world;
use world;
-set histogram_type='JSON';
+set histogram_type='JSON_HB';
set histogram_size=50;
ANALYZE TABLE Country, City, CountryLanguage persistent for all;
SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats;
column_name min_value max_value hist_size hist_type histogram
-Code ABW ZWE 50 JSON [
+Code ABW ZWE 50 JSON_HB [
"ALB",
"ARM",
"AUS",
@@ -2541,7 +2541,7 @@ Code ABW ZWE 50 JSON [
"VIR",
"YEM"
]
-Name Afghanistan Zimbabwe 50 JSON [
+Name Afghanistan Zimbabwe 50 JSON_HB [
"Andorra",
"Argentina",
"Azerbaijan",
@@ -2593,7 +2593,7 @@ Name Afghanistan Zimbabwe 50 JSON [
"Venezuela",
"Western Sahara"
]
-SurfaceArea 0.40 17075400.00 50 JSON [
+SurfaceArea 0.40 17075400.00 50 JSON_HB [
"14.00",
"36.00",
"78.00",
@@ -2645,7 +2645,7 @@ SurfaceArea 0.40 17075400.00 50 JSON [
"2724900.00",
"9363520.00"
]
-Population 0 1277558000 50 JSON [
+Population 0 1277558000 50 JSON_HB [
"0",
"1000",
"2500",
@@ -2697,7 +2697,7 @@ Population 0 1277558000 50 JSON [
"111506000",
"170115000"
]
-Capital 1 4074 50 JSON [
+Capital 1 4074 50 JSON_HB [
"35",
"63",
"129",
@@ -2749,7 +2749,7 @@ Capital 1 4074 50 JSON [
"3537",
"3791"
]
-ID 1 4079 50 JSON [
+ID 1 4079 50 JSON_HB [
"80",
"160",
"240",
@@ -2801,7 +2801,7 @@ ID 1 4079 50 JSON [
"3920",
"4000"
]
-Name A Coruña (La Coruña) Ürgenc 50 JSON [
+Name A Coruña (La Coruña) Ürgenc 50 JSON_HB [
"Allentown",
"Araguari",
"Bahtim",
@@ -2853,7 +2853,7 @@ Name A Coruña (La Coruña) Ürgenc 50 JSON [
"Yangjiang",
"Zhaodong"
]
-Country ABW ZWE 50 JSON [
+Country ABW ZWE 50 JSON_HB [
"ARM",
"BHS",
"BRA",
@@ -2905,7 +2905,7 @@ Country ABW ZWE 50 JSON [
"USA",
"VNM"
]
-Population 42 10500000 50 JSON [
+Population 42 10500000 50 JSON_HB [
"50699",
"90601",
"92583",
@@ -2957,7 +2957,7 @@ Population 42 10500000 50 JSON [
"1300977",
"2154376"
]
-Country ABW ZWE 50 JSON [
+Country ABW ZWE 50 JSON_HB [
"ALB",
"ASM",
"AZE",
@@ -3009,7 +3009,7 @@ Country ABW ZWE 50 JSON [
"VNM",
"ZAF"
]
-Language Abhyasi [South]Mande 50 JSON [
+Language Abhyasi [South]Mande 50 JSON_HB [
"Amhara",
"Arabic",
"Araucan",
@@ -3061,7 +3061,7 @@ Language Abhyasi [South]Mande 50 JSON [
"Urdu",
"Wolea"
]
-Percentage 0.0 99.9 50 JSON [
+Percentage 0.0 99.9 50 JSON_HB [
"0.0",
"0.0",
"0.0",
diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test
index 4bda3255b1c..404d1e11650 100644
--- a/mysql-test/main/statistics_json.test
+++ b/mysql-test/main/statistics_json.test
@@ -2,7 +2,7 @@
--echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON
--echo #
-let $histogram_type_override='JSON';
+let $histogram_type_override='JSON_HB';
--source statistics.test
--source include/have_stat_tables.inc
@@ -27,7 +27,7 @@ select hex(histogram) from mysql.column_stats where table_name='t1_bin';
explain extended select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
analyze select * from t1_bin where a between 'a-3a' and 'zzzzzzzzz';
-set histogram_type=json;
+set histogram_type=json_hb;
create table t1_json (a varchar(255));
insert into t1_json select concat('a-', a) from ten;
analyze table t1_json persistent for all;
@@ -51,7 +51,7 @@ create table users (
set histogram_size=50;
insert into users select 'Moscow' from seq_1_to_99;
insert into users select 'Helsinki' from seq_1_to_2;
-set histogram_type=json;
+set histogram_type=json_hb;
analyze table users persistent for all;
explain extended select * from users where city = 'Moscow';
analyze select * from users where city = 'Moscow';
@@ -76,7 +76,7 @@ use world;
--enable_result_log
--enable_query_log
-set histogram_type='JSON';
+set histogram_type='JSON_HB';
set histogram_size=50;
--disable_result_log
ANALYZE TABLE Country, City, CountryLanguage persistent for all;
@@ -91,4 +91,4 @@ analyze select * from Country where 'Code' < 'BBC';
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
-DROP SCHEMA world;
\ No newline at end of file
+DROP SCHEMA world;
diff --git a/mysql-test/main/system_mysql_db.result b/mysql-test/main/system_mysql_db.result
index b756dfcf45e..f9e37a6e616 100644
--- a/mysql-test/main/system_mysql_db.result
+++ b/mysql-test/main/system_mysql_db.result
@@ -234,7 +234,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/mysql-test/main/system_mysql_db_fix40123.result b/mysql-test/main/system_mysql_db_fix40123.result
index ec972058d54..613542caf42 100644
--- a/mysql-test/main/system_mysql_db_fix40123.result
+++ b/mysql-test/main/system_mysql_db_fix40123.result
@@ -272,7 +272,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/mysql-test/main/system_mysql_db_fix50030.result b/mysql-test/main/system_mysql_db_fix50030.result
index 4e038849cf6..b60253c42ad 100644
--- a/mysql-test/main/system_mysql_db_fix50030.result
+++ b/mysql-test/main/system_mysql_db_fix50030.result
@@ -276,7 +276,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/mysql-test/main/system_mysql_db_fix50117.result b/mysql-test/main/system_mysql_db_fix50117.result
index 7d540477d51..ebd8b8733c9 100644
--- a/mysql-test/main/system_mysql_db_fix50117.result
+++ b/mysql-test/main/system_mysql_db_fix50117.result
@@ -256,7 +256,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/scripts/mysql_system_tables.sql b/scripts/mysql_system_tables.sql
index e31f3372b5f..e5b824894a0 100644
--- a/scripts/mysql_system_tables.sql
+++ b/scripts/mysql_system_tables.sql
@@ -314,7 +314,7 @@ DROP TABLE tmp_proxies_priv;
CREATE TABLE IF NOT EXISTS table_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, cardinality bigint(21) unsigned DEFAULT NULL, PRIMARY KEY (db_name,table_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Tables';
-CREATE TABLE IF NOT EXISTS column_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, column_name varchar(64) NOT NULL, min_value varbinary(255) DEFAULT NULL, max_value varbinary(255) DEFAULT NULL, nulls_ratio decimal(12,4) DEFAULT NULL, avg_length decimal(12,4) DEFAULT NULL, avg_frequency decimal(12,4) DEFAULT NULL, hist_size tinyint unsigned, hist_type enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON'), histogram blob, PRIMARY KEY (db_name,table_name,column_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Columns';
+CREATE TABLE IF NOT EXISTS column_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, column_name varchar(64) NOT NULL, min_value varbinary(255) DEFAULT NULL, max_value varbinary(255) DEFAULT NULL, nulls_ratio decimal(12,4) DEFAULT NULL, avg_length decimal(12,4) DEFAULT NULL, avg_frequency decimal(12,4) DEFAULT NULL, hist_size tinyint unsigned, hist_type enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB'), histogram blob, PRIMARY KEY (db_name,table_name,column_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Columns';
CREATE TABLE IF NOT EXISTS index_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, index_name varchar(64) NOT NULL, prefix_arity int(11) unsigned NOT NULL, avg_frequency decimal(12,4) DEFAULT NULL, PRIMARY KEY (db_name,table_name,index_name,prefix_arity) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Indexes';
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 1f31de8fd52..9f406a7a1cf 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -496,7 +496,7 @@ String *Item_func_from_base64::val_str(String *str)
const char *histogram_types[] =
- {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", "JSON", 0};
+ {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", "JSON_HB", 0};
static TYPELIB histogram_types_typelib=
{ array_elements(histogram_types),
"histogram_types",
@@ -526,6 +526,13 @@ String *Item_func_decode_histogram::val_str(String *str)
null_value= 1;
return 0;
}
+
+ if (type == JSON_HB)
+ {
+ // It's a JSON histogram. Return it as-is.
+ return res;
+ }
+
if (type == DOUBLE_PREC_HB && res->length() % 2 != 0)
res->length(res->length() - 1); // one byte is unused
@@ -534,10 +541,6 @@ String *Item_func_decode_histogram::val_str(String *str)
str->length(0);
char numbuf[32];
const uchar *p= (uchar*)res->c_ptr_safe();
- if (type == JSON)
- {
- return res;
- }
for (i= 0; i < res->length(); i++)
{
double val;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 43cf4d7dbdd..fb89bf513c1 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -193,7 +193,7 @@ TABLE_FIELD_TYPE column_stat_fields[COLUMN_STAT_N_FIELDS] =
},
{
{ STRING_WITH_LEN("hist_type") },
- { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON')") },
+ { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB')") },
{ STRING_WITH_LEN("utf8mb3") }
},
{
@@ -1234,7 +1234,7 @@ class Column_stat: public Stat_table
case DOUBLE_PREC_HB:
hist = new (mem_root) Histogram_binary();
break;
- case JSON:
+ case JSON_HB:
hist = new (mem_root) Histogram_json();
break;
default:
@@ -2046,7 +2046,7 @@ class Histogram_builder_json : public Histogram_builder
Histogram_base *create_histogram(Histogram_type hist_type)
{
// assumes the caller already checked for invalid histograms
- if (hist_type == JSON)
+ if (hist_type == JSON_HB)
return new Histogram_json;
else
return new Histogram_binary;
@@ -2207,7 +2207,7 @@ class Count_distinct_field: public Sql_alloc
*/
void walk_tree_with_histogram(ha_rows rows)
{
- if (table_field->collected_stats->histogram_->get_type() == JSON)
+ if (table_field->collected_stats->histogram_->get_type() == JSON_HB)
{
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
tree->walk(table_field->table, json_histogram_build_walk,
@@ -2915,7 +2915,7 @@ Histogram_base * get_histogram_by_type(MEM_ROOT *mem_root, Histogram_type hist_t
case SINGLE_PREC_HB:
case DOUBLE_PREC_HB:
return new Histogram_binary();
- case JSON:
+ case JSON_HB:
return new Histogram_json();
default:
DBUG_ASSERT(0);
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 33e0430450a..f9fdd0a63bc 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -44,7 +44,7 @@ enum enum_histogram_type
{
SINGLE_PREC_HB,
DOUBLE_PREC_HB,
- JSON,
+ JSON_HB,
INVALID_HISTOGRAM
} Histogram_type;
@@ -374,7 +374,7 @@ class Histogram_json : public Histogram_base
Histogram_type get_type() override
{
- return JSON;
+ return JSON_HB;
}
void set_size (ulonglong sz) override {size = (uint8) sz; }
1
0
revision-id: ef70a780ac663f56412183ef47cfac2d4ae84738 (mariadb-10.6.0-413-gef70a780ac6)
parent(s): d809e220eb2a31f07016210186600fa9dfe56232
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-27 16:57:22 +0300
message:
Rename histogram_type=JSON to JSON_HB
---
mysql-test/main/statistics_json.result | 8 ++++----
mysql-test/main/statistics_json.test | 6 +++---
mysql-test/main/system_mysql_db.result | 3 +--
mysql-test/main/system_mysql_db_fix50030.result | 2 +-
scripts/mysql_system_tables.sql | 2 +-
sql/item_strfunc.cc | 13 ++++++++-----
sql/sql_statistics.cc | 10 +++++-----
sql/sql_statistics.h | 4 ++--
8 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result
index 5bf5c98a206..9eaa195c534 100644
--- a/mysql-test/main/statistics_json.result
+++ b/mysql-test/main/statistics_json.result
@@ -4,9 +4,9 @@
set @SINGLE_PREC_TYPE='single_prec_hb';
set @DOUBLE_PREC_TYPE='double_prec_hb';
set @DEFAULT_HIST_TYPE=@@histogram_type;
-set @SINGLE_PREC_TYPE='JSON';
-set @DOUBLE_PREC_TYPE='JSON';
-set @DEFAULT_HIST_TYPE='JSON';
+set @SINGLE_PREC_TYPE='JSON_HB';
+set @DOUBLE_PREC_TYPE='JSON_HB';
+set @DEFAULT_HIST_TYPE='JSON_HB';
drop table if exists t1,t2;
set @save_use_stat_tables=@@use_stat_tables;
set @save_histogram_size=@@global.histogram_size;
@@ -232,7 +232,7 @@ nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
-test t1 a 0 49 0.0000 1.0000 4 JSON 5B0A20202239222C0A2020223139222C0A2020223331222C0A2020223430220A5D
+test t1 a 0 49 0.0000 1.0000 4 JSON_HB 5B0A20202239222C0A2020223139222C0A2020223331222C0A2020223430220A5D
test t1 b vvvvvvvvvvvvv zzzzzzzzzzzzzzzzzz 0.2000 6.4000 4 JSON 5B0A20202276767676767676767676767676222C0A20202277777777777777777777777777777777777777777777777777777777222C0A202022797979222C0A2020227A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A7A220A5D
test t1 c aaaa dddddddd 0.1250 7.0000 4 JSON 5B0A20202261616161222C0A202022626262626262222C0A202022636363636363636363222C0A2020226464646464646464220A5D
test t1 d 1989-03-12 1999-07-23 0.1500 8.5000 4 JSON 5B0A202022313938392D30332D3132222C0A202022313939302D30352D3135222C0A202022313939302D30352D3135222C0A202022313939392D30372D3233220A5D
diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test
index 4bda3255b1c..1fc42155ece 100644
--- a/mysql-test/main/statistics_json.test
+++ b/mysql-test/main/statistics_json.test
@@ -2,7 +2,7 @@
--echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON
--echo #
-let $histogram_type_override='JSON';
+let $histogram_type_override='JSON_HB';
--source statistics.test
--source include/have_stat_tables.inc
@@ -76,7 +76,7 @@ use world;
--enable_result_log
--enable_query_log
-set histogram_type='JSON';
+set histogram_type='JSON_HB';
set histogram_size=50;
--disable_result_log
ANALYZE TABLE Country, City, CountryLanguage persistent for all;
@@ -91,4 +91,4 @@ analyze select * from Country where 'Code' < 'BBC';
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
-DROP SCHEMA world;
\ No newline at end of file
+DROP SCHEMA world;
diff --git a/mysql-test/main/system_mysql_db.result b/mysql-test/main/system_mysql_db.result
index b756dfcf45e..cfcd738e7cc 100644
--- a/mysql-test/main/system_mysql_db.result
+++ b/mysql-test/main/system_mysql_db.result
@@ -234,8 +234,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
- `histogram` blob DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL, `histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
show create table index_stats;
diff --git a/mysql-test/main/system_mysql_db_fix50030.result b/mysql-test/main/system_mysql_db_fix50030.result
index 4e038849cf6..b60253c42ad 100644
--- a/mysql-test/main/system_mysql_db_fix50030.result
+++ b/mysql-test/main/system_mysql_db_fix50030.result
@@ -276,7 +276,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
- `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON') COLLATE utf8mb3_bin DEFAULT NULL,
+ `hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB') COLLATE utf8mb3_bin DEFAULT NULL,
`histogram` blob DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_bin PAGE_CHECKSUM=1 TRANSACTIONAL=0 COMMENT='Statistics on Columns'
diff --git a/scripts/mysql_system_tables.sql b/scripts/mysql_system_tables.sql
index e31f3372b5f..e5b824894a0 100644
--- a/scripts/mysql_system_tables.sql
+++ b/scripts/mysql_system_tables.sql
@@ -314,7 +314,7 @@ DROP TABLE tmp_proxies_priv;
CREATE TABLE IF NOT EXISTS table_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, cardinality bigint(21) unsigned DEFAULT NULL, PRIMARY KEY (db_name,table_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Tables';
-CREATE TABLE IF NOT EXISTS column_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, column_name varchar(64) NOT NULL, min_value varbinary(255) DEFAULT NULL, max_value varbinary(255) DEFAULT NULL, nulls_ratio decimal(12,4) DEFAULT NULL, avg_length decimal(12,4) DEFAULT NULL, avg_frequency decimal(12,4) DEFAULT NULL, hist_size tinyint unsigned, hist_type enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON'), histogram blob, PRIMARY KEY (db_name,table_name,column_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Columns';
+CREATE TABLE IF NOT EXISTS column_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, column_name varchar(64) NOT NULL, min_value varbinary(255) DEFAULT NULL, max_value varbinary(255) DEFAULT NULL, nulls_ratio decimal(12,4) DEFAULT NULL, avg_length decimal(12,4) DEFAULT NULL, avg_frequency decimal(12,4) DEFAULT NULL, hist_size tinyint unsigned, hist_type enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB'), histogram blob, PRIMARY KEY (db_name,table_name,column_name) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Columns';
CREATE TABLE IF NOT EXISTS index_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, index_name varchar(64) NOT NULL, prefix_arity int(11) unsigned NOT NULL, avg_frequency decimal(12,4) DEFAULT NULL, PRIMARY KEY (db_name,table_name,index_name,prefix_arity) ) engine=Aria transactional=0 CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Indexes';
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 1f31de8fd52..9f406a7a1cf 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -496,7 +496,7 @@ String *Item_func_from_base64::val_str(String *str)
const char *histogram_types[] =
- {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", "JSON", 0};
+ {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", "JSON_HB", 0};
static TYPELIB histogram_types_typelib=
{ array_elements(histogram_types),
"histogram_types",
@@ -526,6 +526,13 @@ String *Item_func_decode_histogram::val_str(String *str)
null_value= 1;
return 0;
}
+
+ if (type == JSON_HB)
+ {
+ // It's a JSON histogram. Return it as-is.
+ return res;
+ }
+
if (type == DOUBLE_PREC_HB && res->length() % 2 != 0)
res->length(res->length() - 1); // one byte is unused
@@ -534,10 +541,6 @@ String *Item_func_decode_histogram::val_str(String *str)
str->length(0);
char numbuf[32];
const uchar *p= (uchar*)res->c_ptr_safe();
- if (type == JSON)
- {
- return res;
- }
for (i= 0; i < res->length(); i++)
{
double val;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 43cf4d7dbdd..fb89bf513c1 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -193,7 +193,7 @@ TABLE_FIELD_TYPE column_stat_fields[COLUMN_STAT_N_FIELDS] =
},
{
{ STRING_WITH_LEN("hist_type") },
- { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON')") },
+ { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB')") },
{ STRING_WITH_LEN("utf8mb3") }
},
{
@@ -1234,7 +1234,7 @@ class Column_stat: public Stat_table
case DOUBLE_PREC_HB:
hist = new (mem_root) Histogram_binary();
break;
- case JSON:
+ case JSON_HB:
hist = new (mem_root) Histogram_json();
break;
default:
@@ -2046,7 +2046,7 @@ class Histogram_builder_json : public Histogram_builder
Histogram_base *create_histogram(Histogram_type hist_type)
{
// assumes the caller already checked for invalid histograms
- if (hist_type == JSON)
+ if (hist_type == JSON_HB)
return new Histogram_json;
else
return new Histogram_binary;
@@ -2207,7 +2207,7 @@ class Count_distinct_field: public Sql_alloc
*/
void walk_tree_with_histogram(ha_rows rows)
{
- if (table_field->collected_stats->histogram_->get_type() == JSON)
+ if (table_field->collected_stats->histogram_->get_type() == JSON_HB)
{
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
tree->walk(table_field->table, json_histogram_build_walk,
@@ -2915,7 +2915,7 @@ Histogram_base * get_histogram_by_type(MEM_ROOT *mem_root, Histogram_type hist_t
case SINGLE_PREC_HB:
case DOUBLE_PREC_HB:
return new Histogram_binary();
- case JSON:
+ case JSON_HB:
return new Histogram_json();
default:
DBUG_ASSERT(0);
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 33e0430450a..f9fdd0a63bc 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -44,7 +44,7 @@ enum enum_histogram_type
{
SINGLE_PREC_HB,
DOUBLE_PREC_HB,
- JSON,
+ JSON_HB,
INVALID_HISTOGRAM
} Histogram_type;
@@ -374,7 +374,7 @@ class Histogram_json : public Histogram_base
Histogram_type get_type() override
{
- return JSON;
+ return JSON_HB;
}
void set_size (ulonglong sz) override {size = (uint8) sz; }
1
0
revision-id: d809e220eb2a31f07016210186600fa9dfe56232 (mariadb-10.6.0-412-gd809e220eb2)
parent(s): aba2dcd0861bf6ed596c24a227522cb3c602874c
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-27 16:49:45 +0300
message:
Fix compile error and test failure:
- Don't use 'res' uninitialized
- multiply it by col_non_nulls before set_if_bigger(...) call.
---
sql/sql_statistics.cc | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index a518aa44958..43cf4d7dbdd 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -4369,8 +4369,8 @@ double get_column_range_cardinality(Field *field,
if (hist && hist->is_usable(thd))
{
sel= hist->range_selectivity(field, min_endp, max_endp);
- set_if_bigger(res, col_stats->get_avg_frequency());
- } else
+ }
+ else
{
double min_mp_pos, max_mp_pos;
if (min_endp && !(field->null_ptr && min_endp->key[0]))
@@ -4395,6 +4395,7 @@ double get_column_range_cardinality(Field *field,
sel = (max_mp_pos - min_mp_pos);
}
res= col_non_nulls * sel;
+ set_if_bigger(res, col_stats->get_avg_frequency());
}
else
res= col_non_nulls;
1
0

[Commits] 8c6c1b80a0b: Range Locking: Move locking secondary keys to right place
by psergey 23 Aug '21
by psergey 23 Aug '21
23 Aug '21
revision-id: 8c6c1b80a0b08b896de495272a41c33fa5dd406e (percona-202103-114-g8c6c1b80a0b)
parent(s): a234810d9a7510f2d27da922d9229ac946c3d99c
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-23 22:03:40 +0300
message:
Range Locking: Move locking secondary keys to right place
>From ha_rocksdb::update_write_sk into ha_rocksdb::check_and_lock_sk
---
storage/rocksdb/ha_rocksdb.cc | 70 ++++++++++++++++++++++++++++++++-----------
storage/rocksdb/ha_rocksdb.h | 4 +++
2 files changed, 57 insertions(+), 17 deletions(-)
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index b685bc11eb5..4bda73ff3d2 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -10995,6 +10995,8 @@ int ha_rocksdb::check_and_lock_sk(
lock for this key.
*/
if (!(key_info->flags & HA_NOSAME)) {
+ if (rocksdb_use_range_locking)
+ return check_and_lock_non_unique_sk(key_id, row_info);
return HA_EXIT_SUCCESS;
}
@@ -11111,6 +11113,57 @@ int ha_rocksdb::check_and_lock_sk(
return rc;
}
+
+/**
+ @brief
+ Lock the non-unique sk for range locking
+*/
+int ha_rocksdb::check_and_lock_non_unique_sk(
+ const uint key_id, const struct update_row_info &row_info) {
+
+ DBUG_ASSERT(rocksdb_use_range_locking);
+ const Rdb_key_def &kd = *m_key_descr_arr[key_id];
+ bool store_row_debug_checksums = should_store_row_debug_checksums();
+
+ if (row_info.old_data != nullptr) {
+ rocksdb::Slice old_key_slice;
+ int old_packed_size;
+
+ old_packed_size = kd.pack_record(
+ table, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old,
+ &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0,
+ nullptr, m_ttl_bytes);
+
+ old_key_slice = rocksdb::Slice(
+ reinterpret_cast<const char *>(m_sk_packed_tuple_old), old_packed_size);
+
+ auto s= row_info.tx->lock_singlepoint_range(kd.get_cf(), old_key_slice);
+ if (!s.ok()) {
+ return (row_info.tx->set_status_error(table->in_use, s, kd,
+ m_tbl_def, m_table_handler));
+ }
+ }
+
+ int new_packed_size;
+ rocksdb::Slice new_key_slice;
+ rocksdb::Slice new_value_slice;
+ new_packed_size =
+ kd.pack_record(table, m_pack_buffer, row_info.new_data,
+ m_sk_packed_tuple, &m_sk_tails, 0,
+ row_info.hidden_pk_id, 0, nullptr, m_ttl_bytes);
+ new_key_slice = rocksdb::Slice(
+ reinterpret_cast<const char *>(m_sk_packed_tuple), new_packed_size);
+
+ auto s= row_info.tx->lock_singlepoint_range(kd.get_cf(), new_key_slice);
+ if (!s.ok()) {
+ return (row_info.tx->set_status_error(table->in_use, s, kd,
+ m_tbl_def, m_table_handler));
+ }
+
+ return HA_EXIT_SUCCESS;
+}
+
+
/**
Enumerate all keys to check their uniquess and also lock it
@@ -11428,15 +11481,6 @@ int ha_rocksdb::update_write_sk(const TABLE *const table_arg,
old_key_slice = rocksdb::Slice(
reinterpret_cast<const char *>(m_sk_packed_tuple_old), old_packed_size);
- /* Range locking: lock the index tuple being deleted */
- if (rocksdb_use_range_locking) {
- auto s= row_info.tx->lock_singlepoint_range(kd.get_cf(), old_key_slice);
- if (!s.ok()) {
- return (row_info.tx->set_status_error(table->in_use, s, kd,
- m_tbl_def, m_table_handler));
- }
- }
-
// TODO(mung) - If the new_data and old_data below to the same partial index
// group (ie. have the same prefix), we can make use of the read below to
// determine whether to issue SingleDelete or not.
@@ -11482,14 +11526,6 @@ int ha_rocksdb::update_write_sk(const TABLE *const table_arg,
if (bulk_load_sk && row_info.old_data == nullptr) {
rc = bulk_load_key(row_info.tx, kd, new_key_slice, new_value_slice, true);
} else {
- /* Range locking: lock the index tuple being inserted */
- if (rocksdb_use_range_locking) {
- auto s= row_info.tx->lock_singlepoint_range(kd.get_cf(), new_key_slice);
- if (!s.ok()) {
- return (row_info.tx->set_status_error(table->in_use, s, kd,
- m_tbl_def, m_table_handler));
- }
- }
row_info.tx->get_indexed_write_batch()->Put(kd.get_cf(), new_key_slice,
new_value_slice);
}
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index ebe14103de1..48ba50efd3b 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -758,6 +758,10 @@ class ha_rocksdb : public my_core::handler {
const struct update_row_info &row_info,
bool *const found, const bool skip_unique_check)
MY_ATTRIBUTE((__warn_unused_result__));
+
+ int check_and_lock_non_unique_sk(const uint key_id,
+ const struct update_row_info &row_info)
+ MY_ATTRIBUTE((__warn_unused_result__));
int check_uniqueness_and_lock(const struct update_row_info &row_info,
bool pk_changed, const bool skip_unique_check)
MY_ATTRIBUTE((__warn_unused_result__));
1
0

[Commits] 349fb057ab2: MDEV-26301: LATERAL DERIVED refills the temp. table too many times
by psergey 08 Aug '21
by psergey 08 Aug '21
08 Aug '21
revision-id: 349fb057ab28118bafb4f60cb96e743967c9d748 (mariadb-10.5.4-837-g349fb057ab2)
parent(s): 4ba4c06038165726dad6a18d85c2b54446ea5ef8
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-09 00:16:34 +0300
message:
MDEV-26301: LATERAL DERIVED refills the temp. table too many times
Do remember and check LATERAL DERIVED table's "parameters". If the values
of parameters have not changed, do not refill the derived table.
Take this into account at optimization phase: compute the shortest join
prefix $SP that LATERAL DERIVED table depends on, then use $SP's output
cardinality as an upper bound of #of times the LATERAL DERIVED table will
be refilled.
The optimization is controlled by @@optimizer_switch flag named
split_materialized_is_lazy and is disabled by default.
---
mysql-test/main/derived_split_innodb.result | 347 +++++++++++++++++++++
mysql-test/main/derived_split_innodb.test | 168 ++++++++++
mysql-test/main/mysqld--help.result | 3 +-
mysql-test/main/mysqltest_tracking_info.result | 2 +-
mysql-test/main/opt_trace.result | 2 +-
.../suite/sys_vars/r/optimizer_switch_basic.result | 32 +-
.../sys_vars/r/sysvars_server_notembedded.result | 2 +-
sql/opt_split.cc | 192 +++++++++++-
sql/sql_derived.cc | 9 +
sql/sql_lex.cc | 1 +
sql/sql_lex.h | 3 +
sql/sql_priv.h | 1 +
sql/sql_select.cc | 5 +-
sql/sql_select.h | 28 +-
sql/sql_union.cc | 1 +
sql/sys_vars.cc | 1 +
16 files changed, 769 insertions(+), 28 deletions(-)
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 9edf9a1f2ae..5e54c0151f0 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -241,3 +241,350 @@ set optimizer_switch='split_materialized=default';
set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
# End of 10.3 tests
+#
+# MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5
+1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where
+1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "split_chosen": true
+ }
+]
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+# The important part in the below output is:
+# "lateral": 1,
+# "query_block": {
+# "select_id": 2,
+# "r_loops": 5, <-- must be 5, not 30.
+analyze format=json
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+ANALYZE
+{
+ "query_block": {
+ "select_id": 1,
+ "r_loops": 1,
+ "r_total_time_ms": "REPLACED",
+ "const_condition": "1",
+ "table": {
+ "table_name": "t1",
+ "access_type": "ALL",
+ "r_loops": 1,
+ "rows": 5,
+ "r_rows": 5,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ },
+ "table": {
+ "table_name": "t2",
+ "access_type": "ref",
+ "possible_keys": ["a"],
+ "key": "a",
+ "key_length": "5",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.b"],
+ "r_loops": 5,
+ "rows": 2,
+ "r_rows": 2,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))"
+ },
+ "table": {
+ "table_name": "t3",
+ "access_type": "ref",
+ "possible_keys": ["a"],
+ "key": "a",
+ "key_length": "5",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.b"],
+ "r_loops": 10,
+ "rows": 3,
+ "r_rows": 3,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))"
+ },
+ "table": {
+ "table_name": "<derived2>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "5",
+ "used_key_parts": ["grp_id"],
+ "ref": ["test.t1.b"],
+ "r_loops": 30,
+ "rows": 10,
+ "r_rows": 1,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))",
+ "materialized": {
+ "lateral": 1,
+ "query_block": {
+ "select_id": 2,
+ "r_loops": 5,
+ "r_total_time_ms": "REPLACED",
+ "outer_ref_condition": "t1.b is not null",
+ "table": {
+ "table_name": "t10",
+ "access_type": "ref",
+ "possible_keys": ["grp_id"],
+ "key": "grp_id",
+ "key_length": "5",
+ "used_key_parts": ["grp_id"],
+ "ref": ["test.t1.b"],
+ "r_loops": 5,
+ "rows": 100,
+ "r_rows": 100,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ },
+ "block-nl-join": {
+ "table": {
+ "table_name": "t11",
+ "access_type": "ALL",
+ "r_loops": 5,
+ "rows": 10,
+ "r_rows": 10,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ },
+ "buffer_type": "flat",
+ "buffer_size": "1Kb",
+ "join_type": "BNL",
+ "attached_condition": "trigcond(t11.col1 = t10.col1)",
+ "r_filtered": 10
+ }
+ }
+ }
+ }
+ }
+}
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t22 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+create table t5 (
+pk int primary key
+);
+insert into t5 select seq from seq_1_to_1000;
+explain
+select * from
+t21,
+(
+(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5 Using where
+1 PRIMARY t5 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index
+1 PRIMARY t2 ref a a 5 test.t1.b 2
+1 PRIMARY t3 ref a a 5 test.t1.b 3
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t5.pk 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index bee9ef497b6..1776a88ba05 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -193,3 +193,171 @@ set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
--echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+ (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+--echo # The important part in the below output is:
+--echo # "lateral": 1,
+--echo # "query_block": {
+--echo # "select_id": 2,
+--echo # "r_loops": 5, <-- must be 5, not 30.
+--source include/analyze-format.inc
+analyze format=json
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+
+# Same as above but throw in a couple of const tables.
+explain
+select * from
+ t21, t22,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1 and t22.pk=2;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+explain
+select * from
+ t21,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+# And also add a non-const table
+
+create table t5 (
+ pk int primary key
+ );
+insert into t5 select seq from seq_1_to_1000;
+
+explain
+select * from
+ t21,
+ (
+ (((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 78616ae7c9d..6a38d720493 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -718,7 +718,8 @@ The following specify which files/extra groups are read (specified before remain
extended_keys, exists_to_in, orderby_uses_equalities,
condition_pushdown_for_derived, split_materialized,
condition_pushdown_for_subquery, rowid_filter,
- condition_pushdown_from_having, not_null_range_scan
+ condition_pushdown_from_having, not_null_range_scan,
+ split_materialized_is_lazy
--optimizer-trace=name
Controls tracing of the Optimizer:
optimizer_trace=option=val[,option=val...], where option
diff --git a/mysql-test/main/mysqltest_tracking_info.result b/mysql-test/main/mysqltest_tracking_info.result
index 2a58ba430ca..3c91c956cfb 100644
--- a/mysql-test/main/mysqltest_tracking_info.result
+++ b/mysql-test/main/mysqltest_tracking_info.result
@@ -38,7 +38,7 @@ SET @@session.session_track_system_variables='optimizer_switch';
set optimizer_switch='index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off';
-- Tracker : SESSION_TRACK_SYSTEM_VARIABLES
-- optimizer_switch
--- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
+-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,split_materialized_is_lazy=off
Warnings:
Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index d4ba8eccb91..97d3b48ee85 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
],
"split_plan_choice":
{
+ "unsplit_cost": 25.72361682,
"split_cost": 2.488945919,
"record_count_for_split": 4,
- "unsplit_cost": 25.72361682,
"split_chosen": true
},
"chosen_lateral_derived":
diff --git a/mysql-test/suite/sys_vars/r/optimizer_switch_basic.result b/mysql-test/suite/sys_vars/r/optimizer_switch_basic.result
index 80bd2d7af5f..0d958018cf6 100644
--- a/mysql-test/suite/sys_vars/r/optimizer_switch_basic.result
+++ b/mysql-test/suite/sys_vars/r/optimizer_switch_basic.result
@@ -1,60 +1,60 @@
set @@global.optimizer_switch=@@optimizer_switch;
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,split_materialized_is_lazy=off
select @@session.optimizer_switch;
@@session.optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,split_materialized_is_lazy=off
show global variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,split_materialized_is_lazy=off
show session variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,split_materialized_is_lazy=off
select * from information_schema.global_variables where variable_name='optimizer_switch';
VARIABLE_NAME VARIABLE_VALUE
-OPTIMIZER_SWITCH index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
+OPTIMIZER_SWITCH index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,split_materialized_is_lazy=off
select * from information_schema.session_variables where variable_name='optimizer_switch';
VARIABLE_NAME VARIABLE_VALUE
-OPTIMIZER_SWITCH index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
+OPTIMIZER_SWITCH index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,split_materialized_is_lazy=off
set global optimizer_switch=4101;
set session optimizer_switch=2058;
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
select @@session.optimizer_switch;
@@session.optimizer_switch
-index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
set global optimizer_switch="index_merge_sort_union=on";
set session optimizer_switch="index_merge=off";
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
select @@session.optimizer_switch;
@@session.optimizer_switch
-index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
show global variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+optimizer_switch index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
show session variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+optimizer_switch index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
select * from information_schema.global_variables where variable_name='optimizer_switch';
VARIABLE_NAME VARIABLE_VALUE
-OPTIMIZER_SWITCH index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+OPTIMIZER_SWITCH index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
select * from information_schema.session_variables where variable_name='optimizer_switch';
VARIABLE_NAME VARIABLE_VALUE
-OPTIMIZER_SWITCH index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+OPTIMIZER_SWITCH index_merge=off,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
set session optimizer_switch="default";
select @@session.optimizer_switch;
@@session.optimizer_switch
-index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off
+index_merge=on,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=off,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=off,in_to_exists=on,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=off,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=off,condition_pushdown_for_subquery=off,rowid_filter=off,condition_pushdown_from_having=off,not_null_range_scan=off,split_materialized_is_lazy=off
set optimizer_switch = replace(@@optimizer_switch, '=off', '=on');
Warnings:
Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=on,split_materialized_is_lazy=on
set global optimizer_switch=1.1;
ERROR 42000: Incorrect argument type to variable 'optimizer_switch'
set global optimizer_switch=1e1;
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
index 03296aba336..cae2a8266fa 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -2439,7 +2439,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
-ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,default
+ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,split_materialized_is_lazy,default
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_TRACE
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d482c2de2a4..7157caa248e 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -876,11 +876,92 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
}
+
+/*
+ @brief
+ Find shortest join order sub-prefix that covers the specified table_map.
+
+ @detail
+ Given a join prefix and a bitmap of table(s) in table_map, find the
+ shortest prefix of the prefix that covers the tables in the table_map.
+*/
+
+uint get_prefix_size(const POSITION *positions, uint max_pos, table_map map)
+{
+ map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT
+ table_map covered= 0;
+ uint i;
+
+ for (i=0; i < max_pos; i++)
+ {
+ covered |= positions[i].table->table->map;
+ if (!(map & ~covered))
+ break;
+ }
+ return i;
+}
+
+
+/*
+ @brief
+ Find the shortest join prefix that has a KEYUSE_EXT for a given key_part.
+
+ @detail
+ Given an array of KEYUSE objects for access to {table, key, key_part},
+ and a join order prefix (specified by array of POSITION structs),
+ find the shortest prefix for which there is a KEYUSE_EXT object describin
+ the access to they table.key.key_part
+
+ @note
+ KEYUSE_EXT objects are from inside the derived table, while the join
+ prefix is from the parent JOIN, so we need to use
+ KEYUSE_EXT::needed_in_prefix
+*/
+
+uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size,
+ KEYUSE_EXT *keyuse)
+{
+ TABLE *table= keyuse->table;
+ uint key= keyuse->key;
+ uint keypart= keyuse->keypart;
+ uint prefix_size= max_prefix_size;
+
+ while (keyuse->table == table && keyuse->key == key &&
+ keyuse->keypart == keypart)
+ {
+ uint cur_size= get_prefix_size(positions, max_prefix_size,
+ keyuse->needed_in_prefix);
+ if (cur_size < prefix_size)
+ prefix_size= cur_size;
+ keyuse++;
+ }
+ return prefix_size;
+}
+
+
+/*
+ Get fanout of of a join prefix
+*/
+double get_join_prefix_fanout(const POSITION *positions, uint prefix_size)
+{
+ double fanout = 1.0;
+ for (uint i=0; i <= prefix_size; i++)
+ {
+ if (positions[i].records_read > 1e-30)
+ fanout *= positions[i].records_read;
+ if (positions[i].cond_selectivity > 1e-30)
+ fanout *= positions[i].cond_selectivity;
+ }
+ return fanout;
+}
+
/**
@brief
Choose the best splitting to extend the evaluated partial join
@param
+ join_prefix Join order prefix built so far
+ prefix_size Length of the join prefix
record_count estimated cardinality of the extended partial join
remaining_tables tables not joined yet
@@ -906,7 +987,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
if the plan has been chosen, NULL - otherwise.
*/
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix,
+ uint prefix_size,
+ double record_count,
table_map remaining_tables)
{
SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -922,6 +1005,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
SplM_plan_info *spl_plan= 0;
uint best_key= 0;
uint best_key_parts= 0;
+ uint best_min_prefix_size;
Json_writer_object spl_trace(thd, "lateral_derived_choice");
Json_writer_array trace_indexes(thd, "indexes_for_splitting");
@@ -942,6 +1026,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint key= keyuse_ext->key;
KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
key_part_map found_parts= 0;
+ uint prefix_len= 0;
do
{
if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -951,11 +1036,40 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
if (!(keyuse_ext->keypart_map & found_parts))
{
+ /*
+ Check for
+ 1. found_parts is empty and this is the first key part we've found
+ 2. or, we are looking at the first keyuse object for this keypart,
+ and found_parts has a bit set for the previous key part.
+ */
if ((!found_parts && !keyuse_ext->keypart) ||
(found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
+ {
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ /*
+ Find the shortest join prefix we'll need to get a lookup value
+ for this keypart read this key part
+ */
+ uint min_for_kp= get_min_prefix_for_key_part(join_prefix,
+ prefix_size,
+ keyuse_ext);
+ /*
+ The join prefix needed to read all keyparts is the longest
+ prefix of all key parts.
+ */
+ if (min_for_kp > prefix_len)
+ prefix_len= min_for_kp;
+ }
+
found_parts|= keyuse_ext->keypart_map;
+ }
else
{
+ /*
+ This is a new key part but it doesn't form a continuous index
+ prefix. Skip all KEYUSEs for this index.
+ */
do
{
keyuse_ext++;
@@ -981,6 +1095,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
best_key_parts= keyuse_ext->keypart + 1;
best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ best_min_prefix_size= prefix_len;
+ trace.add("min_prefix_len", (longlong)prefix_len);
+ }
}
keyuse_ext++;
}
@@ -1069,10 +1188,29 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (spl_plan)
{
Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
+
choice.add("split_cost", spl_plan->cost);
choice.add("record_count_for_split", record_count);
- choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
- if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+
+ double fanout= record_count;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ /*
+ The LATERAL DERIVED table will only be refilled if the values it
+ refers to change.
+ The values refer only to the prefix of the join prefix so far.
+ Adjust the number of LATERAL DERIVED refills accordingly.
+ */
+ double prefix_fanout= get_join_prefix_fanout(join_prefix,
+ best_min_prefix_size);
+ if (prefix_fanout < fanout)
+ fanout= prefix_fanout;
+
+ choice.add("join_prefix_fanout_for_split", fanout);
+ }
+
+ if (fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
The best plan that employs splitting is cheaper than
@@ -1112,6 +1250,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
@param
remaining_tables used to filter out the equalities that cannot
be pushed.
+ lparams if non-NULL, call lparams->add() to collect a list
+ of the "parameter" items for the lateral subquery.
+ The list will be used to reduce the number of times
+ the lateral derived table is refilled.
@details
This function is called by JOIN_TAB::fix_splitting that is used
@@ -1128,18 +1270,23 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
true on failure
*/
-bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
+bool JOIN::inject_best_splitting_cond(table_map remaining_tables,
+ Lateral_derived_parameters *lparams)
{
Item *inj_cond= 0;
List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
KEY_FIELD *added_key_field;
+ table_map needed_tables= 0;
while ((added_key_field= li++))
{
if (remaining_tables & added_key_field->val->used_tables())
continue;
+ if (lparams && lparams->add_param(added_key_field->val))
+ return true;
if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
return true;
+ needed_tables |= added_key_field->val->used_tables();
}
DBUG_ASSERT(inj_cond_list->elements);
switch (inj_cond_list->elements) {
@@ -1230,8 +1377,15 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
memcpy((char *) md_join->best_positions,
(char *) spl_plan->best_positions,
sizeof(POSITION) * md_join->table_count);
- if (md_join->inject_best_splitting_cond(remaining_tables))
+
+ Lateral_derived_parameters *lparams= NULL;
+ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ lparams= new Lateral_derived_parameters;
+
+ if (md_join->inject_best_splitting_cond(remaining_tables, lparams))
return true;
+
+ table->pos_in_table_list->get_unit()->lateral_derived_params= lparams;
/*
This is called for a proper work of JOIN::get_best_combination()
called for the join that materializes T
@@ -1283,3 +1437,31 @@ bool JOIN::fix_all_splittings_in_plan()
}
return false;
}
+
+
+bool Lateral_derived_parameters::add_param(Item *item)
+{
+ Cached_item *tmp;
+ if (!(tmp= new_Cached_item(current_thd, item, FALSE)) ||
+ list.push_back(tmp))
+ return true;
+
+ return false;
+}
+
+
+/*
+ @brief
+ Test if LATERAL DERIVED table's parameters changed since last invocation
+
+ @return
+ false Parameter values have not changed since last invocation
+ true Parameter values have changed since last invocation. Current values
+ of the parameters are now saved.
+*/
+bool Lateral_derived_parameters::need_refill()
+{
+ if (test_if_group_changed(list) == -1)
+ return false; // not changed
+ return true;
+}
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index f8ee3475af8..25524c90da5 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -1233,6 +1233,15 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
DBUG_RETURN(res);
}
+ /*
+ For LATERAL DERIVED tables, check if we actually need to refill it. If its
+ "parameters" (i.e. items it refers to) have the same values, we don't need
+ to refill.
+ */
+ if (unit->lateral_derived_params &&
+ !unit->lateral_derived_params->need_refill())
+ DBUG_RETURN(false);
+
if (unit->executed && !derived_is_recursive &&
(unit->uncacheable & UNCACHEABLE_DEPENDENT))
{
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index b9ca67182cf..5b3897b28fa 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -2915,6 +2915,7 @@ void st_select_lex_unit::init_query()
columns_are_renamed= false;
with_wrapped_tvc= false;
have_except_all_or_intersect_all= false;
+ lateral_derived_params= NULL;
}
void st_select_lex::init_query()
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 45a3bf72fa4..278ee7d5276 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -845,6 +845,7 @@ class JOIN;
class select_unit;
class Procedure;
class Explain_query;
+class Lateral_derived_parameters;
void delete_explain_query(LEX *lex);
void create_explain_query(LEX *lex, MEM_ROOT *mem_root);
@@ -964,6 +965,8 @@ class st_select_lex_unit: public st_select_lex_node {
bool columns_are_renamed;
+ Lateral_derived_parameters *lateral_derived_params;
+
void init_query();
st_select_lex* outer_select();
const st_select_lex* first_select() const
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index 07f07a7150f..7cdb6a62d08 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -234,6 +234,7 @@
#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33)
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34)
#define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35)
+#define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY (1ULL << 36)
#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 39e224ab024..85c3355cf08 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7460,7 +7460,10 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (s->table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ {
+ spl_plan= s->choose_best_splitting(join_positions, idx, record_count,
+ remaining_tables);
+ }
Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 29e42ff8ef8..2a9cb5464e6 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -249,6 +249,7 @@ class AGGR_OP;
class Filesort;
struct SplM_plan_info;
class SplM_opt_info;
+class Lateral_derived_parameters;
typedef struct st_join_table {
TABLE *table;
@@ -682,7 +683,9 @@ typedef struct st_join_table {
void partial_cleanup();
void add_keyuses_for_splitting();
- SplM_plan_info *choose_best_splitting(double record_count,
+ SplM_plan_info *choose_best_splitting(const POSITION *join_prefix,
+ uint top_prefix_size,
+ double record_count,
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
@@ -1795,7 +1798,8 @@ class JOIN :public Sql_alloc
bool inject_cond_into_where(Item *injected_cond);
bool check_for_splittable_materialized();
void add_keyuses_for_splitting();
- bool inject_best_splitting_cond(table_map remaining_tables);
+ bool inject_best_splitting_cond(table_map remaining_tables,
+ Lateral_derived_parameters *params);
bool fix_all_splittings_in_plan();
void make_notnull_conds_for_range_scans();
@@ -2532,4 +2536,24 @@ void propagate_new_equalities(THD *thd, Item *cond,
COND_EQUAL *inherited,
bool *is_simplifiable_cond);
+/*
+ A list of parameters for a derived table that uses LATERAL DERIVED
+ optimization.
+
+ The last values of the parameters are saved, which one can use to check
+ whether
+ - they've changed and one needs to refill the LATERAL DERIVED table
+ - they are not changed and refill is not needed.
+*/
+class Lateral_derived_parameters : public Sql_alloc
+{
+ List<Cached_item> list;
+public:
+ // Construction interface
+ bool add_param(Item *item);
+
+ // Execution interface
+ bool need_refill();
+};
+
#endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index b88d78c0db3..a0a89b1e401 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -2588,6 +2588,7 @@ bool st_select_lex_unit::cleanup()
}
columns_are_renamed= false;
cleaned= 1;
+ lateral_derived_params= NULL;
for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
error|= sl->cleanup();
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 2f7cf290dcd..725f407fc3b 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2717,6 +2717,7 @@ export const char *optimizer_switch_names[]=
"rowid_filter",
"condition_pushdown_from_having",
"not_null_range_scan",
+ "split_materialized_is_lazy",
"default",
NullS
};
1
0

[Commits] 44f4f3ebbd0: MDEV-26301: LATERAL DERIVED refills the temp. table too many times
by psergey 08 Aug '21
by psergey 08 Aug '21
08 Aug '21
revision-id: 44f4f3ebbd0ab0208ff592f941f329173465e08c (mariadb-10.5.4-837-g44f4f3ebbd0)
parent(s): 4ba4c06038165726dad6a18d85c2b54446ea5ef8
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-08 19:56:05 +0300
message:
MDEV-26301: LATERAL DERIVED refills the temp. table too many times
Do remember and check LATERAL DERIVED table's "parameters". If the values
of parameters have not changed, do not refill the derived table.
Take this into account at optimization phase: compute the shortest join
prefix $SP that LATERAL DERIVED table depends on, then use $SP's output
cardinality as an upper bound of #of times the LATERAL DERIVED table will
be refilled.
The optimization is controlled by @@optimizer_switch flag named
split_materialized_is_lazy and is disabled by default.
---
mysql-test/main/derived_split_innodb.result | 347 ++++++++++++++++++++++++++++
mysql-test/main/derived_split_innodb.test | 168 ++++++++++++++
mysql-test/main/mysqld--help.result | 3 +-
mysql-test/main/opt_trace.result | 2 +-
sql/opt_split.cc | 192 ++++++++++++++-
sql/sql_derived.cc | 9 +
sql/sql_lex.cc | 1 +
sql/sql_lex.h | 3 +
sql/sql_priv.h | 1 +
sql/sql_select.cc | 5 +-
sql/sql_select.h | 28 ++-
sql/sql_union.cc | 1 +
sql/sys_vars.cc | 1 +
13 files changed, 751 insertions(+), 10 deletions(-)
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 9edf9a1f2ae..5e54c0151f0 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -241,3 +241,350 @@ set optimizer_switch='split_materialized=default';
set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
# End of 10.3 tests
+#
+# MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5
+1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where
+1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "split_chosen": true
+ }
+]
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+# The important part in the below output is:
+# "lateral": 1,
+# "query_block": {
+# "select_id": 2,
+# "r_loops": 5, <-- must be 5, not 30.
+analyze format=json
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+ANALYZE
+{
+ "query_block": {
+ "select_id": 1,
+ "r_loops": 1,
+ "r_total_time_ms": "REPLACED",
+ "const_condition": "1",
+ "table": {
+ "table_name": "t1",
+ "access_type": "ALL",
+ "r_loops": 1,
+ "rows": 5,
+ "r_rows": 5,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ },
+ "table": {
+ "table_name": "t2",
+ "access_type": "ref",
+ "possible_keys": ["a"],
+ "key": "a",
+ "key_length": "5",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.b"],
+ "r_loops": 5,
+ "rows": 2,
+ "r_rows": 2,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))"
+ },
+ "table": {
+ "table_name": "t3",
+ "access_type": "ref",
+ "possible_keys": ["a"],
+ "key": "a",
+ "key_length": "5",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.b"],
+ "r_loops": 10,
+ "rows": 3,
+ "r_rows": 3,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))"
+ },
+ "table": {
+ "table_name": "<derived2>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "5",
+ "used_key_parts": ["grp_id"],
+ "ref": ["test.t1.b"],
+ "r_loops": 30,
+ "rows": 10,
+ "r_rows": 1,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "trigcond(trigcond(t1.b is not null))",
+ "materialized": {
+ "lateral": 1,
+ "query_block": {
+ "select_id": 2,
+ "r_loops": 5,
+ "r_total_time_ms": "REPLACED",
+ "outer_ref_condition": "t1.b is not null",
+ "table": {
+ "table_name": "t10",
+ "access_type": "ref",
+ "possible_keys": ["grp_id"],
+ "key": "grp_id",
+ "key_length": "5",
+ "used_key_parts": ["grp_id"],
+ "ref": ["test.t1.b"],
+ "r_loops": 5,
+ "rows": 100,
+ "r_rows": 100,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ },
+ "block-nl-join": {
+ "table": {
+ "table_name": "t11",
+ "access_type": "ALL",
+ "r_loops": 5,
+ "rows": 10,
+ "r_rows": 10,
+ "r_table_time_ms": "REPLACED",
+ "r_other_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ },
+ "buffer_type": "flat",
+ "buffer_size": "1Kb",
+ "join_type": "BNL",
+ "attached_condition": "trigcond(t11.col1 = t10.col1)",
+ "r_filtered": 10
+ }
+ }
+ }
+ }
+ }
+}
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t22 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+create table t5 (
+pk int primary key
+);
+insert into t5 select seq from seq_1_to_1000;
+explain
+select * from
+t21,
+(
+(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5 Using where
+1 PRIMARY t5 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index
+1 PRIMARY t2 ref a a 5 test.t1.b 2
+1 PRIMARY t3 ref a a 5 test.t1.b 3
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t5.pk 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index bee9ef497b6..1776a88ba05 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -193,3 +193,171 @@ set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
--echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+ (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+--echo # The important part in the below output is:
+--echo # "lateral": 1,
+--echo # "query_block": {
+--echo # "select_id": 2,
+--echo # "r_loops": 5, <-- must be 5, not 30.
+--source include/analyze-format.inc
+analyze format=json
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+
+# Same as above but throw in a couple of const tables.
+explain
+select * from
+ t21, t22,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1 and t22.pk=2;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+explain
+select * from
+ t21,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+# And also add a non-const table
+
+create table t5 (
+ pk int primary key
+ );
+insert into t5 select seq from seq_1_to_1000;
+
+explain
+select * from
+ t21,
+ (
+ (((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 78616ae7c9d..6a38d720493 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -718,7 +718,8 @@ The following specify which files/extra groups are read (specified before remain
extended_keys, exists_to_in, orderby_uses_equalities,
condition_pushdown_for_derived, split_materialized,
condition_pushdown_for_subquery, rowid_filter,
- condition_pushdown_from_having, not_null_range_scan
+ condition_pushdown_from_having, not_null_range_scan,
+ split_materialized_is_lazy
--optimizer-trace=name
Controls tracing of the Optimizer:
optimizer_trace=option=val[,option=val...], where option
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index d4ba8eccb91..97d3b48ee85 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
],
"split_plan_choice":
{
+ "unsplit_cost": 25.72361682,
"split_cost": 2.488945919,
"record_count_for_split": 4,
- "unsplit_cost": 25.72361682,
"split_chosen": true
},
"chosen_lateral_derived":
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d482c2de2a4..7157caa248e 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -876,11 +876,92 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
}
+
+/*
+ @brief
+ Find shortest join order sub-prefix that covers the specified table_map.
+
+ @detail
+ Given a join prefix and a bitmap of table(s) in table_map, find the
+ shortest prefix of the prefix that covers the tables in the table_map.
+*/
+
+uint get_prefix_size(const POSITION *positions, uint max_pos, table_map map)
+{
+ map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT
+ table_map covered= 0;
+ uint i;
+
+ for (i=0; i < max_pos; i++)
+ {
+ covered |= positions[i].table->table->map;
+ if (!(map & ~covered))
+ break;
+ }
+ return i;
+}
+
+
+/*
+ @brief
+ Find the shortest join prefix that has a KEYUSE_EXT for a given key_part.
+
+ @detail
+ Given an array of KEYUSE objects for access to {table, key, key_part},
+ and a join order prefix (specified by array of POSITION structs),
+ find the shortest prefix for which there is a KEYUSE_EXT object describin
+ the access to they table.key.key_part
+
+ @note
+ KEYUSE_EXT objects are from inside the derived table, while the join
+ prefix is from the parent JOIN, so we need to use
+ KEYUSE_EXT::needed_in_prefix
+*/
+
+uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size,
+ KEYUSE_EXT *keyuse)
+{
+ TABLE *table= keyuse->table;
+ uint key= keyuse->key;
+ uint keypart= keyuse->keypart;
+ uint prefix_size= max_prefix_size;
+
+ while (keyuse->table == table && keyuse->key == key &&
+ keyuse->keypart == keypart)
+ {
+ uint cur_size= get_prefix_size(positions, max_prefix_size,
+ keyuse->needed_in_prefix);
+ if (cur_size < prefix_size)
+ prefix_size= cur_size;
+ keyuse++;
+ }
+ return prefix_size;
+}
+
+
+/*
+ Get fanout of of a join prefix
+*/
+double get_join_prefix_fanout(const POSITION *positions, uint prefix_size)
+{
+ double fanout = 1.0;
+ for (uint i=0; i <= prefix_size; i++)
+ {
+ if (positions[i].records_read > 1e-30)
+ fanout *= positions[i].records_read;
+ if (positions[i].cond_selectivity > 1e-30)
+ fanout *= positions[i].cond_selectivity;
+ }
+ return fanout;
+}
+
/**
@brief
Choose the best splitting to extend the evaluated partial join
@param
+ join_prefix Join order prefix built so far
+ prefix_size Length of the join prefix
record_count estimated cardinality of the extended partial join
remaining_tables tables not joined yet
@@ -906,7 +987,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
if the plan has been chosen, NULL - otherwise.
*/
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix,
+ uint prefix_size,
+ double record_count,
table_map remaining_tables)
{
SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -922,6 +1005,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
SplM_plan_info *spl_plan= 0;
uint best_key= 0;
uint best_key_parts= 0;
+ uint best_min_prefix_size;
Json_writer_object spl_trace(thd, "lateral_derived_choice");
Json_writer_array trace_indexes(thd, "indexes_for_splitting");
@@ -942,6 +1026,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint key= keyuse_ext->key;
KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
key_part_map found_parts= 0;
+ uint prefix_len= 0;
do
{
if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -951,11 +1036,40 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
if (!(keyuse_ext->keypart_map & found_parts))
{
+ /*
+ Check for
+ 1. found_parts is empty and this is the first key part we've found
+ 2. or, we are looking at the first keyuse object for this keypart,
+ and found_parts has a bit set for the previous key part.
+ */
if ((!found_parts && !keyuse_ext->keypart) ||
(found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
+ {
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ /*
+ Find the shortest join prefix we'll need to get a lookup value
+ for this keypart read this key part
+ */
+ uint min_for_kp= get_min_prefix_for_key_part(join_prefix,
+ prefix_size,
+ keyuse_ext);
+ /*
+ The join prefix needed to read all keyparts is the longest
+ prefix of all key parts.
+ */
+ if (min_for_kp > prefix_len)
+ prefix_len= min_for_kp;
+ }
+
found_parts|= keyuse_ext->keypart_map;
+ }
else
{
+ /*
+ This is a new key part but it doesn't form a continuous index
+ prefix. Skip all KEYUSEs for this index.
+ */
do
{
keyuse_ext++;
@@ -981,6 +1095,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
best_key_parts= keyuse_ext->keypart + 1;
best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ best_min_prefix_size= prefix_len;
+ trace.add("min_prefix_len", (longlong)prefix_len);
+ }
}
keyuse_ext++;
}
@@ -1069,10 +1188,29 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (spl_plan)
{
Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
+
choice.add("split_cost", spl_plan->cost);
choice.add("record_count_for_split", record_count);
- choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
- if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+
+ double fanout= record_count;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ /*
+ The LATERAL DERIVED table will only be refilled if the values it
+ refers to change.
+ The values refer only to the prefix of the join prefix so far.
+ Adjust the number of LATERAL DERIVED refills accordingly.
+ */
+ double prefix_fanout= get_join_prefix_fanout(join_prefix,
+ best_min_prefix_size);
+ if (prefix_fanout < fanout)
+ fanout= prefix_fanout;
+
+ choice.add("join_prefix_fanout_for_split", fanout);
+ }
+
+ if (fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
The best plan that employs splitting is cheaper than
@@ -1112,6 +1250,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
@param
remaining_tables used to filter out the equalities that cannot
be pushed.
+ lparams if non-NULL, call lparams->add() to collect a list
+ of the "parameter" items for the lateral subquery.
+ The list will be used to reduce the number of times
+ the lateral derived table is refilled.
@details
This function is called by JOIN_TAB::fix_splitting that is used
@@ -1128,18 +1270,23 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
true on failure
*/
-bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
+bool JOIN::inject_best_splitting_cond(table_map remaining_tables,
+ Lateral_derived_parameters *lparams)
{
Item *inj_cond= 0;
List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
KEY_FIELD *added_key_field;
+ table_map needed_tables= 0;
while ((added_key_field= li++))
{
if (remaining_tables & added_key_field->val->used_tables())
continue;
+ if (lparams && lparams->add_param(added_key_field->val))
+ return true;
if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
return true;
+ needed_tables |= added_key_field->val->used_tables();
}
DBUG_ASSERT(inj_cond_list->elements);
switch (inj_cond_list->elements) {
@@ -1230,8 +1377,15 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
memcpy((char *) md_join->best_positions,
(char *) spl_plan->best_positions,
sizeof(POSITION) * md_join->table_count);
- if (md_join->inject_best_splitting_cond(remaining_tables))
+
+ Lateral_derived_parameters *lparams= NULL;
+ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ lparams= new Lateral_derived_parameters;
+
+ if (md_join->inject_best_splitting_cond(remaining_tables, lparams))
return true;
+
+ table->pos_in_table_list->get_unit()->lateral_derived_params= lparams;
/*
This is called for a proper work of JOIN::get_best_combination()
called for the join that materializes T
@@ -1283,3 +1437,31 @@ bool JOIN::fix_all_splittings_in_plan()
}
return false;
}
+
+
+bool Lateral_derived_parameters::add_param(Item *item)
+{
+ Cached_item *tmp;
+ if (!(tmp= new_Cached_item(current_thd, item, FALSE)) ||
+ list.push_back(tmp))
+ return true;
+
+ return false;
+}
+
+
+/*
+ @brief
+ Test if LATERAL DERIVED table's parameters changed since last invocation
+
+ @return
+ false Parameter values have not changed since last invocation
+ true Parameter values have changed since last invocation. Current values
+ of the parameters are now saved.
+*/
+bool Lateral_derived_parameters::need_refill()
+{
+ if (test_if_group_changed(list) == -1)
+ return false; // not changed
+ return true;
+}
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index f8ee3475af8..25524c90da5 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -1233,6 +1233,15 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
DBUG_RETURN(res);
}
+ /*
+ For LATERAL DERIVED tables, check if we actually need to refill it. If its
+ "parameters" (i.e. items it refers to) have the same values, we don't need
+ to refill.
+ */
+ if (unit->lateral_derived_params &&
+ !unit->lateral_derived_params->need_refill())
+ DBUG_RETURN(false);
+
if (unit->executed && !derived_is_recursive &&
(unit->uncacheable & UNCACHEABLE_DEPENDENT))
{
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index b9ca67182cf..5b3897b28fa 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -2915,6 +2915,7 @@ void st_select_lex_unit::init_query()
columns_are_renamed= false;
with_wrapped_tvc= false;
have_except_all_or_intersect_all= false;
+ lateral_derived_params= NULL;
}
void st_select_lex::init_query()
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 45a3bf72fa4..278ee7d5276 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -845,6 +845,7 @@ class JOIN;
class select_unit;
class Procedure;
class Explain_query;
+class Lateral_derived_parameters;
void delete_explain_query(LEX *lex);
void create_explain_query(LEX *lex, MEM_ROOT *mem_root);
@@ -964,6 +965,8 @@ class st_select_lex_unit: public st_select_lex_node {
bool columns_are_renamed;
+ Lateral_derived_parameters *lateral_derived_params;
+
void init_query();
st_select_lex* outer_select();
const st_select_lex* first_select() const
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index 07f07a7150f..7cdb6a62d08 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -234,6 +234,7 @@
#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33)
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34)
#define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35)
+#define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY (1ULL << 36)
#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 39e224ab024..85c3355cf08 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7460,7 +7460,10 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (s->table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ {
+ spl_plan= s->choose_best_splitting(join_positions, idx, record_count,
+ remaining_tables);
+ }
Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 29e42ff8ef8..2a9cb5464e6 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -249,6 +249,7 @@ class AGGR_OP;
class Filesort;
struct SplM_plan_info;
class SplM_opt_info;
+class Lateral_derived_parameters;
typedef struct st_join_table {
TABLE *table;
@@ -682,7 +683,9 @@ typedef struct st_join_table {
void partial_cleanup();
void add_keyuses_for_splitting();
- SplM_plan_info *choose_best_splitting(double record_count,
+ SplM_plan_info *choose_best_splitting(const POSITION *join_prefix,
+ uint top_prefix_size,
+ double record_count,
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
@@ -1795,7 +1798,8 @@ class JOIN :public Sql_alloc
bool inject_cond_into_where(Item *injected_cond);
bool check_for_splittable_materialized();
void add_keyuses_for_splitting();
- bool inject_best_splitting_cond(table_map remaining_tables);
+ bool inject_best_splitting_cond(table_map remaining_tables,
+ Lateral_derived_parameters *params);
bool fix_all_splittings_in_plan();
void make_notnull_conds_for_range_scans();
@@ -2532,4 +2536,24 @@ void propagate_new_equalities(THD *thd, Item *cond,
COND_EQUAL *inherited,
bool *is_simplifiable_cond);
+/*
+ A list of parameters for a derived table that uses LATERAL DERIVED
+ optimization.
+
+ The last values of the parameters are saved, which one can use to check
+ whether
+ - they've changed and one needs to refill the LATERAL DERIVED table
+ - they are not changed and refill is not needed.
+*/
+class Lateral_derived_parameters : public Sql_alloc
+{
+ List<Cached_item> list;
+public:
+ // Construction interface
+ bool add_param(Item *item);
+
+ // Execution interface
+ bool need_refill();
+};
+
#endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index b88d78c0db3..a0a89b1e401 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -2588,6 +2588,7 @@ bool st_select_lex_unit::cleanup()
}
columns_are_renamed= false;
cleaned= 1;
+ lateral_derived_params= NULL;
for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
error|= sl->cleanup();
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 2f7cf290dcd..725f407fc3b 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2717,6 +2717,7 @@ export const char *optimizer_switch_names[]=
"rowid_filter",
"condition_pushdown_from_having",
"not_null_range_scan",
+ "split_materialized_is_lazy",
"default",
NullS
};
1
0

[Commits] 9f8c928a939: MDEV-26301: LATERAL DERIVED refills the temp. table too many times
by psergey 08 Aug '21
by psergey 08 Aug '21
08 Aug '21
revision-id: 9f8c928a939e5b92aaf5e64aab468f01d656af68 (mariadb-10.5.4-837-g9f8c928a939)
parent(s): 4ba4c06038165726dad6a18d85c2b54446ea5ef8
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-08 19:41:35 +0300
message:
MDEV-26301: LATERAL DERIVED refills the temp. table too many times
---
mysql-test/main/derived_split_innodb.result | 220 ++++++++++++++++++++++++++++
mysql-test/main/derived_split_innodb.test | 153 +++++++++++++++++++
mysql-test/main/mysqld--help.result | 3 +-
mysql-test/main/opt_trace.result | 2 +-
sql/opt_split.cc | 192 +++++++++++++++++++++++-
sql/sql_derived.cc | 9 ++
sql/sql_lex.cc | 1 +
sql/sql_lex.h | 3 +
sql/sql_priv.h | 1 +
sql/sql_select.cc | 5 +-
sql/sql_select.h | 28 +++-
sql/sql_union.cc | 1 +
sql/sys_vars.cc | 1 +
13 files changed, 609 insertions(+), 10 deletions(-)
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 9edf9a1f2ae..d37339b8393 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -241,3 +241,223 @@ set optimizer_switch='split_materialized=default';
set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
# End of 10.3 tests
+#
+# MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5
+1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where
+1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "split_chosen": true
+ }
+]
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t22 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+create table t5 (
+pk int primary key
+);
+insert into t5 select seq from seq_1_to_1000;
+explain
+select * from
+t21,
+(
+(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5 Using where
+1 PRIMARY t5 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index
+1 PRIMARY t2 ref a a 5 test.t1.b 2
+1 PRIMARY t3 ref a a 5 test.t1.b 3
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t5.pk 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index bee9ef497b6..1369018f532 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -193,3 +193,156 @@ set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
--echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+ (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+
+# Same as above but throw in a couple of const tables.
+explain
+select * from
+ t21, t22,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1 and t22.pk=2;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+explain
+select * from
+ t21,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+# And also add a non-const table
+
+create table t5 (
+ pk int primary key
+ );
+insert into t5 select seq from seq_1_to_1000;
+
+explain
+select * from
+ t21,
+ (
+ (((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 78616ae7c9d..6a38d720493 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -718,7 +718,8 @@ The following specify which files/extra groups are read (specified before remain
extended_keys, exists_to_in, orderby_uses_equalities,
condition_pushdown_for_derived, split_materialized,
condition_pushdown_for_subquery, rowid_filter,
- condition_pushdown_from_having, not_null_range_scan
+ condition_pushdown_from_having, not_null_range_scan,
+ split_materialized_is_lazy
--optimizer-trace=name
Controls tracing of the Optimizer:
optimizer_trace=option=val[,option=val...], where option
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index d4ba8eccb91..97d3b48ee85 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
],
"split_plan_choice":
{
+ "unsplit_cost": 25.72361682,
"split_cost": 2.488945919,
"record_count_for_split": 4,
- "unsplit_cost": 25.72361682,
"split_chosen": true
},
"chosen_lateral_derived":
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d482c2de2a4..7157caa248e 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -876,11 +876,92 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
}
+
+/*
+ @brief
+ Find shortest join order sub-prefix that covers the specified table_map.
+
+ @detail
+ Given a join prefix and a bitmap of table(s) in table_map, find the
+ shortest prefix of the prefix that covers the tables in the table_map.
+*/
+
+uint get_prefix_size(const POSITION *positions, uint max_pos, table_map map)
+{
+ map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT
+ table_map covered= 0;
+ uint i;
+
+ for (i=0; i < max_pos; i++)
+ {
+ covered |= positions[i].table->table->map;
+ if (!(map & ~covered))
+ break;
+ }
+ return i;
+}
+
+
+/*
+ @brief
+ Find the shortest join prefix that has a KEYUSE_EXT for a given key_part.
+
+ @detail
+ Given an array of KEYUSE objects for access to {table, key, key_part},
+ and a join order prefix (specified by array of POSITION structs),
+ find the shortest prefix for which there is a KEYUSE_EXT object describin
+ the access to they table.key.key_part
+
+ @note
+ KEYUSE_EXT objects are from inside the derived table, while the join
+ prefix is from the parent JOIN, so we need to use
+ KEYUSE_EXT::needed_in_prefix
+*/
+
+uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size,
+ KEYUSE_EXT *keyuse)
+{
+ TABLE *table= keyuse->table;
+ uint key= keyuse->key;
+ uint keypart= keyuse->keypart;
+ uint prefix_size= max_prefix_size;
+
+ while (keyuse->table == table && keyuse->key == key &&
+ keyuse->keypart == keypart)
+ {
+ uint cur_size= get_prefix_size(positions, max_prefix_size,
+ keyuse->needed_in_prefix);
+ if (cur_size < prefix_size)
+ prefix_size= cur_size;
+ keyuse++;
+ }
+ return prefix_size;
+}
+
+
+/*
+ Get fanout of of a join prefix
+*/
+double get_join_prefix_fanout(const POSITION *positions, uint prefix_size)
+{
+ double fanout = 1.0;
+ for (uint i=0; i <= prefix_size; i++)
+ {
+ if (positions[i].records_read > 1e-30)
+ fanout *= positions[i].records_read;
+ if (positions[i].cond_selectivity > 1e-30)
+ fanout *= positions[i].cond_selectivity;
+ }
+ return fanout;
+}
+
/**
@brief
Choose the best splitting to extend the evaluated partial join
@param
+ join_prefix Join order prefix built so far
+ prefix_size Length of the join prefix
record_count estimated cardinality of the extended partial join
remaining_tables tables not joined yet
@@ -906,7 +987,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
if the plan has been chosen, NULL - otherwise.
*/
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix,
+ uint prefix_size,
+ double record_count,
table_map remaining_tables)
{
SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -922,6 +1005,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
SplM_plan_info *spl_plan= 0;
uint best_key= 0;
uint best_key_parts= 0;
+ uint best_min_prefix_size;
Json_writer_object spl_trace(thd, "lateral_derived_choice");
Json_writer_array trace_indexes(thd, "indexes_for_splitting");
@@ -942,6 +1026,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint key= keyuse_ext->key;
KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
key_part_map found_parts= 0;
+ uint prefix_len= 0;
do
{
if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -951,11 +1036,40 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
if (!(keyuse_ext->keypart_map & found_parts))
{
+ /*
+ Check for
+ 1. found_parts is empty and this is the first key part we've found
+ 2. or, we are looking at the first keyuse object for this keypart,
+ and found_parts has a bit set for the previous key part.
+ */
if ((!found_parts && !keyuse_ext->keypart) ||
(found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
+ {
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ /*
+ Find the shortest join prefix we'll need to get a lookup value
+ for this keypart read this key part
+ */
+ uint min_for_kp= get_min_prefix_for_key_part(join_prefix,
+ prefix_size,
+ keyuse_ext);
+ /*
+ The join prefix needed to read all keyparts is the longest
+ prefix of all key parts.
+ */
+ if (min_for_kp > prefix_len)
+ prefix_len= min_for_kp;
+ }
+
found_parts|= keyuse_ext->keypart_map;
+ }
else
{
+ /*
+ This is a new key part but it doesn't form a continuous index
+ prefix. Skip all KEYUSEs for this index.
+ */
do
{
keyuse_ext++;
@@ -981,6 +1095,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
best_key_parts= keyuse_ext->keypart + 1;
best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ best_min_prefix_size= prefix_len;
+ trace.add("min_prefix_len", (longlong)prefix_len);
+ }
}
keyuse_ext++;
}
@@ -1069,10 +1188,29 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (spl_plan)
{
Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
+
choice.add("split_cost", spl_plan->cost);
choice.add("record_count_for_split", record_count);
- choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
- if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+
+ double fanout= record_count;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ /*
+ The LATERAL DERIVED table will only be refilled if the values it
+ refers to change.
+ The values refer only to the prefix of the join prefix so far.
+ Adjust the number of LATERAL DERIVED refills accordingly.
+ */
+ double prefix_fanout= get_join_prefix_fanout(join_prefix,
+ best_min_prefix_size);
+ if (prefix_fanout < fanout)
+ fanout= prefix_fanout;
+
+ choice.add("join_prefix_fanout_for_split", fanout);
+ }
+
+ if (fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
The best plan that employs splitting is cheaper than
@@ -1112,6 +1250,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
@param
remaining_tables used to filter out the equalities that cannot
be pushed.
+ lparams if non-NULL, call lparams->add() to collect a list
+ of the "parameter" items for the lateral subquery.
+ The list will be used to reduce the number of times
+ the lateral derived table is refilled.
@details
This function is called by JOIN_TAB::fix_splitting that is used
@@ -1128,18 +1270,23 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
true on failure
*/
-bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
+bool JOIN::inject_best_splitting_cond(table_map remaining_tables,
+ Lateral_derived_parameters *lparams)
{
Item *inj_cond= 0;
List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
KEY_FIELD *added_key_field;
+ table_map needed_tables= 0;
while ((added_key_field= li++))
{
if (remaining_tables & added_key_field->val->used_tables())
continue;
+ if (lparams && lparams->add_param(added_key_field->val))
+ return true;
if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
return true;
+ needed_tables |= added_key_field->val->used_tables();
}
DBUG_ASSERT(inj_cond_list->elements);
switch (inj_cond_list->elements) {
@@ -1230,8 +1377,15 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
memcpy((char *) md_join->best_positions,
(char *) spl_plan->best_positions,
sizeof(POSITION) * md_join->table_count);
- if (md_join->inject_best_splitting_cond(remaining_tables))
+
+ Lateral_derived_parameters *lparams= NULL;
+ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ lparams= new Lateral_derived_parameters;
+
+ if (md_join->inject_best_splitting_cond(remaining_tables, lparams))
return true;
+
+ table->pos_in_table_list->get_unit()->lateral_derived_params= lparams;
/*
This is called for a proper work of JOIN::get_best_combination()
called for the join that materializes T
@@ -1283,3 +1437,31 @@ bool JOIN::fix_all_splittings_in_plan()
}
return false;
}
+
+
+bool Lateral_derived_parameters::add_param(Item *item)
+{
+ Cached_item *tmp;
+ if (!(tmp= new_Cached_item(current_thd, item, FALSE)) ||
+ list.push_back(tmp))
+ return true;
+
+ return false;
+}
+
+
+/*
+ @brief
+ Test if LATERAL DERIVED table's parameters changed since last invocation
+
+ @return
+ false Parameter values have not changed since last invocation
+ true Parameter values have changed since last invocation. Current values
+ of the parameters are now saved.
+*/
+bool Lateral_derived_parameters::need_refill()
+{
+ if (test_if_group_changed(list) == -1)
+ return false; // not changed
+ return true;
+}
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index f8ee3475af8..25524c90da5 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -1233,6 +1233,15 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
DBUG_RETURN(res);
}
+ /*
+ For LATERAL DERIVED tables, check if we actually need to refill it. If its
+ "parameters" (i.e. items it refers to) have the same values, we don't need
+ to refill.
+ */
+ if (unit->lateral_derived_params &&
+ !unit->lateral_derived_params->need_refill())
+ DBUG_RETURN(false);
+
if (unit->executed && !derived_is_recursive &&
(unit->uncacheable & UNCACHEABLE_DEPENDENT))
{
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index b9ca67182cf..5b3897b28fa 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -2915,6 +2915,7 @@ void st_select_lex_unit::init_query()
columns_are_renamed= false;
with_wrapped_tvc= false;
have_except_all_or_intersect_all= false;
+ lateral_derived_params= NULL;
}
void st_select_lex::init_query()
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 45a3bf72fa4..278ee7d5276 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -845,6 +845,7 @@ class JOIN;
class select_unit;
class Procedure;
class Explain_query;
+class Lateral_derived_parameters;
void delete_explain_query(LEX *lex);
void create_explain_query(LEX *lex, MEM_ROOT *mem_root);
@@ -964,6 +965,8 @@ class st_select_lex_unit: public st_select_lex_node {
bool columns_are_renamed;
+ Lateral_derived_parameters *lateral_derived_params;
+
void init_query();
st_select_lex* outer_select();
const st_select_lex* first_select() const
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index 07f07a7150f..7cdb6a62d08 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -234,6 +234,7 @@
#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33)
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34)
#define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35)
+#define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY (1ULL << 36)
#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 39e224ab024..85c3355cf08 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7460,7 +7460,10 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (s->table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ {
+ spl_plan= s->choose_best_splitting(join_positions, idx, record_count,
+ remaining_tables);
+ }
Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 29e42ff8ef8..2a9cb5464e6 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -249,6 +249,7 @@ class AGGR_OP;
class Filesort;
struct SplM_plan_info;
class SplM_opt_info;
+class Lateral_derived_parameters;
typedef struct st_join_table {
TABLE *table;
@@ -682,7 +683,9 @@ typedef struct st_join_table {
void partial_cleanup();
void add_keyuses_for_splitting();
- SplM_plan_info *choose_best_splitting(double record_count,
+ SplM_plan_info *choose_best_splitting(const POSITION *join_prefix,
+ uint top_prefix_size,
+ double record_count,
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
@@ -1795,7 +1798,8 @@ class JOIN :public Sql_alloc
bool inject_cond_into_where(Item *injected_cond);
bool check_for_splittable_materialized();
void add_keyuses_for_splitting();
- bool inject_best_splitting_cond(table_map remaining_tables);
+ bool inject_best_splitting_cond(table_map remaining_tables,
+ Lateral_derived_parameters *params);
bool fix_all_splittings_in_plan();
void make_notnull_conds_for_range_scans();
@@ -2532,4 +2536,24 @@ void propagate_new_equalities(THD *thd, Item *cond,
COND_EQUAL *inherited,
bool *is_simplifiable_cond);
+/*
+ A list of parameters for a derived table that uses LATERAL DERIVED
+ optimization.
+
+ The last values of the parameters are saved, which one can use to check
+ whether
+ - they've changed and one needs to refill the LATERAL DERIVED table
+ - they are not changed and refill is not needed.
+*/
+class Lateral_derived_parameters : public Sql_alloc
+{
+ List<Cached_item> list;
+public:
+ // Construction interface
+ bool add_param(Item *item);
+
+ // Execution interface
+ bool need_refill();
+};
+
#endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index b88d78c0db3..a0a89b1e401 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -2588,6 +2588,7 @@ bool st_select_lex_unit::cleanup()
}
columns_are_renamed= false;
cleaned= 1;
+ lateral_derived_params= NULL;
for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
error|= sl->cleanup();
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 2f7cf290dcd..725f407fc3b 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2717,6 +2717,7 @@ export const char *optimizer_switch_names[]=
"rowid_filter",
"condition_pushdown_from_having",
"not_null_range_scan",
+ "split_materialized_is_lazy",
"default",
NullS
};
1
0

[Commits] 9f6066441c1: MDEV-26301: LATERAL DERIVED refills the temp. table too many times
by psergey 08 Aug '21
by psergey 08 Aug '21
08 Aug '21
revision-id: 9f6066441c1f014c48770e52164cf2fab93ca078 (mariadb-10.5.4-837-g9f6066441c1)
parent(s): 4ba4c06038165726dad6a18d85c2b54446ea5ef8
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-08 19:15:45 +0300
message:
MDEV-26301: LATERAL DERIVED refills the temp. table too many times
---
mysql-test/main/derived_split_innodb.result | 220 ++++++++++++++++++++++++++++
mysql-test/main/derived_split_innodb.test | 153 +++++++++++++++++++
mysql-test/main/mysqld--help.result | 3 +-
mysql-test/main/opt_trace.result | 2 +-
sql/opt_split.cc | 192 +++++++++++++++++++++++-
sql/sql_derived.cc | 9 ++
sql/sql_lex.cc | 1 +
sql/sql_lex.h | 3 +
sql/sql_priv.h | 1 +
sql/sql_select.cc | 5 +-
sql/sql_select.h | 28 +++-
sql/sql_union.cc | 1 +
sql/sys_vars.cc | 1 +
13 files changed, 609 insertions(+), 10 deletions(-)
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 9edf9a1f2ae..d37339b8393 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -241,3 +241,223 @@ set optimizer_switch='split_materialized=default';
set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
# End of 10.3 tests
+#
+# MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5
+1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where
+1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "split_chosen": true
+ }
+]
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t22 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+create table t5 (
+pk int primary key
+);
+insert into t5 select seq from seq_1_to_1000;
+explain
+select * from
+t21,
+(
+(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5 Using where
+1 PRIMARY t5 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index
+1 PRIMARY t2 ref a a 5 test.t1.b 2
+1 PRIMARY t3 ref a a 5 test.t1.b 3
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t5.pk 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index bee9ef497b6..1369018f532 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -193,3 +193,156 @@ set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
--echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+ (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+
+# Same as above but throw in a couple of const tables.
+explain
+select * from
+ t21, t22,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1 and t22.pk=2;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+explain
+select * from
+ t21,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+# And also add a non-const table
+
+create table t5 (
+ pk int primary key
+ );
+insert into t5 select seq from seq_1_to_1000;
+
+explain
+select * from
+ t21,
+ (
+ (((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 78616ae7c9d..6a38d720493 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -718,7 +718,8 @@ The following specify which files/extra groups are read (specified before remain
extended_keys, exists_to_in, orderby_uses_equalities,
condition_pushdown_for_derived, split_materialized,
condition_pushdown_for_subquery, rowid_filter,
- condition_pushdown_from_having, not_null_range_scan
+ condition_pushdown_from_having, not_null_range_scan,
+ split_materialized_is_lazy
--optimizer-trace=name
Controls tracing of the Optimizer:
optimizer_trace=option=val[,option=val...], where option
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index d4ba8eccb91..97d3b48ee85 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
],
"split_plan_choice":
{
+ "unsplit_cost": 25.72361682,
"split_cost": 2.488945919,
"record_count_for_split": 4,
- "unsplit_cost": 25.72361682,
"split_chosen": true
},
"chosen_lateral_derived":
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d482c2de2a4..2d124a7fe2e 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -876,11 +876,92 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
}
+
+/*
+ @brief
+ Find shortest join order sub-prefix that covers the specified table_map.
+
+ @detail
+ Given a join prefix and a bitmap of table(s) in table_map, find the
+ shortest prefix of the prefix that covers the tables in the table_map.
+*/
+
+uint get_prefix_size(const POSITION *positions, uint max_pos, table_map map)
+{
+ map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT
+ table_map covered= 0;
+ uint i;
+
+ for (i=0; i < max_pos; i++)
+ {
+ covered |= positions[i].table->table->map;
+ if (!(map & ~covered))
+ break;
+ }
+ return i;
+}
+
+
+/*
+ @brief
+ Find the shortest join prefix that has a KEYUSE_EXT for a given key_part.
+
+ @detail
+ Given an array of KEYUSE objects for access to {table, key, key_part},
+ and a join order prefix (specified by array of POSITION structs),
+ find the shortest prefix for which there is a KEYUSE_EXT object describin
+ the access to they table.key.key_part
+
+ @note
+ KEYUSE_EXT objects are from inside the derived table, while the join
+ prefix is from the parent JOIN, so we need to use
+ KEYUSE_EXT::needed_in_prefix
+*/
+
+uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size,
+ KEYUSE_EXT *keyuse)
+{
+ TABLE *table= keyuse->table;
+ uint key= keyuse->key;
+ uint keypart= keyuse->keypart;
+ uint prefix_size= max_prefix_size;
+
+ while (keyuse->table == table && keyuse->key == key &&
+ keyuse->keypart == keypart)
+ {
+ uint cur_size= get_prefix_size(positions, max_prefix_size,
+ keyuse->needed_in_prefix);
+ if (cur_size < prefix_size)
+ prefix_size= cur_size;
+ keyuse++;
+ }
+ return prefix_size;
+}
+
+
+/*
+ Get fanout of of a join prefix
+*/
+double get_join_prefix_fanout(const POSITION *positions, uint prefix_size)
+{
+ double fanout = 1.0;
+ for (uint i=0; i <= prefix_size; i++)
+ {
+ if (positions[i].records_read > 1e-30)
+ fanout *= positions[i].records_read;
+ if (positions[i].cond_selectivity > 1e-30)
+ fanout *= positions[i].cond_selectivity;
+ }
+ return fanout;
+}
+
/**
@brief
Choose the best splitting to extend the evaluated partial join
@param
+ join_prefix Join order prefix built so far
+ prefix_size Length of the join prefix
record_count estimated cardinality of the extended partial join
remaining_tables tables not joined yet
@@ -906,7 +987,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
if the plan has been chosen, NULL - otherwise.
*/
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix,
+ uint prefix_size,
+ double record_count,
table_map remaining_tables)
{
SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -922,6 +1005,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
SplM_plan_info *spl_plan= 0;
uint best_key= 0;
uint best_key_parts= 0;
+ uint best_min_prefix_size;
Json_writer_object spl_trace(thd, "lateral_derived_choice");
Json_writer_array trace_indexes(thd, "indexes_for_splitting");
@@ -942,6 +1026,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint key= keyuse_ext->key;
KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
key_part_map found_parts= 0;
+ uint prefix_len= 0;
do
{
if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -951,11 +1036,40 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
if (!(keyuse_ext->keypart_map & found_parts))
{
+ /*
+ Check for
+ 1. found_parts is empty and this is the first key part we've found
+ 2. or, we are looking at the first keyuse object for this keypart,
+ and found_parts has a bit set for the previous key part.
+ */
if ((!found_parts && !keyuse_ext->keypart) ||
(found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
+ {
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ /*
+ Find the shortest join prefix we'll need to get a lookup value
+ for this keypart read this key part
+ */
+ uint min_for_kp= get_min_prefix_for_key_part(join_prefix,
+ prefix_size,
+ keyuse_ext);
+ /*
+ The join prefix needed to read all keyparts is the longest
+ prefix of all key parts.
+ */
+ if (min_for_kp > prefix_len)
+ prefix_len= min_for_kp;
+ }
+
found_parts|= keyuse_ext->keypart_map;
+ }
else
{
+ /*
+ This is a new key part but it doesn't form a continuous index
+ prefix. Skip all KEYUSEs for this index.
+ */
do
{
keyuse_ext++;
@@ -981,6 +1095,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
best_key_parts= keyuse_ext->keypart + 1;
best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ best_min_prefix_size= prefix_len;
+ trace.add("min_prefix_len", (longlong)prefix_len);
+ }
}
keyuse_ext++;
}
@@ -1069,10 +1188,29 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (spl_plan)
{
Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
+
choice.add("split_cost", spl_plan->cost);
choice.add("record_count_for_split", record_count);
- choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
- if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+
+ double fanout= record_count;
+ if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ {
+ /*
+ The LATERAL DERIVED table will only be refilled if the values it
+ refers to change.
+ The values refer only to the prefix of the join prefix so far.
+ Adjust the number of LATERAL DERIVED refills accordingly.
+ */
+ double prefix_fanout= get_join_prefix_fanout(join_prefix,
+ best_min_prefix_size);
+ if (prefix_fanout < record_count)
+ prefix_fanout= record_count;
+
+ choice.add("join_prefix_fanout_for_split", fanout);
+ }
+
+ if (fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
The best plan that employs splitting is cheaper than
@@ -1112,6 +1250,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
@param
remaining_tables used to filter out the equalities that cannot
be pushed.
+ lparams if non-NULL, call lparams->add() to collect a list
+ of the "parameter" items for the lateral subquery.
+ The list will be used to reduce the number of times
+ the lateral derived table is refilled.
@details
This function is called by JOIN_TAB::fix_splitting that is used
@@ -1128,18 +1270,23 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
true on failure
*/
-bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
+bool JOIN::inject_best_splitting_cond(table_map remaining_tables,
+ Lateral_derived_parameters *lparams)
{
Item *inj_cond= 0;
List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
KEY_FIELD *added_key_field;
+ table_map needed_tables= 0;
while ((added_key_field= li++))
{
if (remaining_tables & added_key_field->val->used_tables())
continue;
+ if (lparams && lparams->add_param(added_key_field->val))
+ return true;
if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
return true;
+ needed_tables |= added_key_field->val->used_tables();
}
DBUG_ASSERT(inj_cond_list->elements);
switch (inj_cond_list->elements) {
@@ -1230,8 +1377,15 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
memcpy((char *) md_join->best_positions,
(char *) spl_plan->best_positions,
sizeof(POSITION) * md_join->table_count);
- if (md_join->inject_best_splitting_cond(remaining_tables))
+
+ Lateral_derived_parameters *lparams= NULL;
+ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+ lparams= new Lateral_derived_parameters;
+
+ if (md_join->inject_best_splitting_cond(remaining_tables, lparams))
return true;
+
+ table->pos_in_table_list->get_unit()->lateral_derived_params= lparams;
/*
This is called for a proper work of JOIN::get_best_combination()
called for the join that materializes T
@@ -1283,3 +1437,31 @@ bool JOIN::fix_all_splittings_in_plan()
}
return false;
}
+
+
+bool Lateral_derived_parameters::add_param(Item *item)
+{
+ Cached_item *tmp;
+ if (!(tmp= new_Cached_item(current_thd, item, FALSE)) ||
+ list.push_back(tmp))
+ return true;
+
+ return false;
+}
+
+
+/*
+ @brief
+ Test if LATERAL DERIVED table's parameters changed since last invocation
+
+ @return
+ false Parameter values have not changed since last invocation
+ true Parameter values have changed since last invocation. Current values
+ of the parameters are now saved.
+*/
+bool Lateral_derived_parameters::need_refill()
+{
+ if (test_if_group_changed(list) == -1)
+ return false; // not changed
+ return true;
+}
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index f8ee3475af8..25524c90da5 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -1233,6 +1233,15 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
DBUG_RETURN(res);
}
+ /*
+ For LATERAL DERIVED tables, check if we actually need to refill it. If its
+ "parameters" (i.e. items it refers to) have the same values, we don't need
+ to refill.
+ */
+ if (unit->lateral_derived_params &&
+ !unit->lateral_derived_params->need_refill())
+ DBUG_RETURN(false);
+
if (unit->executed && !derived_is_recursive &&
(unit->uncacheable & UNCACHEABLE_DEPENDENT))
{
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index b9ca67182cf..5b3897b28fa 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -2915,6 +2915,7 @@ void st_select_lex_unit::init_query()
columns_are_renamed= false;
with_wrapped_tvc= false;
have_except_all_or_intersect_all= false;
+ lateral_derived_params= NULL;
}
void st_select_lex::init_query()
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 45a3bf72fa4..278ee7d5276 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -845,6 +845,7 @@ class JOIN;
class select_unit;
class Procedure;
class Explain_query;
+class Lateral_derived_parameters;
void delete_explain_query(LEX *lex);
void create_explain_query(LEX *lex, MEM_ROOT *mem_root);
@@ -964,6 +965,8 @@ class st_select_lex_unit: public st_select_lex_node {
bool columns_are_renamed;
+ Lateral_derived_parameters *lateral_derived_params;
+
void init_query();
st_select_lex* outer_select();
const st_select_lex* first_select() const
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index 07f07a7150f..7cdb6a62d08 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -234,6 +234,7 @@
#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33)
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34)
#define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35)
+#define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY (1ULL << 36)
#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 39e224ab024..85c3355cf08 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7460,7 +7460,10 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (s->table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ {
+ spl_plan= s->choose_best_splitting(join_positions, idx, record_count,
+ remaining_tables);
+ }
Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 29e42ff8ef8..2a9cb5464e6 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -249,6 +249,7 @@ class AGGR_OP;
class Filesort;
struct SplM_plan_info;
class SplM_opt_info;
+class Lateral_derived_parameters;
typedef struct st_join_table {
TABLE *table;
@@ -682,7 +683,9 @@ typedef struct st_join_table {
void partial_cleanup();
void add_keyuses_for_splitting();
- SplM_plan_info *choose_best_splitting(double record_count,
+ SplM_plan_info *choose_best_splitting(const POSITION *join_prefix,
+ uint top_prefix_size,
+ double record_count,
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
@@ -1795,7 +1798,8 @@ class JOIN :public Sql_alloc
bool inject_cond_into_where(Item *injected_cond);
bool check_for_splittable_materialized();
void add_keyuses_for_splitting();
- bool inject_best_splitting_cond(table_map remaining_tables);
+ bool inject_best_splitting_cond(table_map remaining_tables,
+ Lateral_derived_parameters *params);
bool fix_all_splittings_in_plan();
void make_notnull_conds_for_range_scans();
@@ -2532,4 +2536,24 @@ void propagate_new_equalities(THD *thd, Item *cond,
COND_EQUAL *inherited,
bool *is_simplifiable_cond);
+/*
+ A list of parameters for a derived table that uses LATERAL DERIVED
+ optimization.
+
+ The last values of the parameters are saved, which one can use to check
+ whether
+ - they've changed and one needs to refill the LATERAL DERIVED table
+ - they are not changed and refill is not needed.
+*/
+class Lateral_derived_parameters : public Sql_alloc
+{
+ List<Cached_item> list;
+public:
+ // Construction interface
+ bool add_param(Item *item);
+
+ // Execution interface
+ bool need_refill();
+};
+
#endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index b88d78c0db3..a0a89b1e401 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -2588,6 +2588,7 @@ bool st_select_lex_unit::cleanup()
}
columns_are_renamed= false;
cleaned= 1;
+ lateral_derived_params= NULL;
for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
error|= sl->cleanup();
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 2f7cf290dcd..725f407fc3b 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2717,6 +2717,7 @@ export const char *optimizer_switch_names[]=
"rowid_filter",
"condition_pushdown_from_having",
"not_null_range_scan",
+ "split_materialized_is_lazy",
"default",
NullS
};
1
0

[Commits] 4d3c4340286: MDEV-21130: Histograms: use JSON as on-disk format
by Sergei Petrunia 06 Aug '21
by Sergei Petrunia 06 Aug '21
06 Aug '21
revision-id: 4d3c434028649a0f14471ed4a1a4c10b97b78eb8 (mariadb-10.6.3-32-g4d3c4340286)
parent(s): 44f88fc96104365877b9ad8cc7106d48fc1d5448
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-06 20:08:16 +0300
message:
MDEV-21130: Histograms: use JSON as on-disk format
A demo of how to use in-memory data structure for histogram.
The patch shows how to
* convert string form of data to binary form
* compare two values in binary form
* compute a fraction for val in [X, Y] range.
grep for GSOC-TODO for notes.
---
sql/field.h | 3 +
sql/sql_statistics.cc | 256 +++++++++++++++++++++++++++++++++++++++++++++++++-
sql/sql_statistics.h | 40 +++++++-
3 files changed, 289 insertions(+), 10 deletions(-)
diff --git a/sql/field.h b/sql/field.h
index 4b64742b7b3..a24c6e8a300 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -1852,6 +1852,7 @@ class Field: public Value_source
{
return (double) 0.5;
}
+ virtual bool pos_through_val_str() { return false;}
/*
Check if comparison between the field and an item unambiguously
@@ -2137,6 +2138,8 @@ class Field_str :public Field {
{
return pos_in_interval_val_str(min, max, length_size());
}
+ bool pos_through_val_str() override {return true;}
+
bool test_if_equality_guarantees_uniqueness(const Item *const_item) const
override;
SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param, KEY_PART *key_part,
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 818d2b8d492..eed22d7ed77 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -1240,7 +1240,8 @@ class Column_stat: public Stat_table
default:
return NULL;
}
- if (!hist->parse(mem_root, table_field->read_stats->histogram_type_on_disk,
+ if (!hist->parse(mem_root, table_field,
+ table_field->read_stats->histogram_type_on_disk,
(const uchar*)val.ptr(), val.length()))
{
table_field->read_stats->histogram_= hist;
@@ -1253,7 +1254,7 @@ class Column_stat: public Stat_table
}
};
-bool Histogram_binary::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr_arg, uint size_arg)
+bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg, const uchar *ptr_arg, uint size_arg)
{
// Just copy the data
size = (uint8) size_arg;
@@ -1288,21 +1289,260 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htyp
size = (uint8) size_arg;
}
-bool Histogram_json::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr, uint size_arg)
+bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, const uchar *ptr, uint size_arg)
{
DBUG_ENTER("Histogram_json::parse");
type = type_arg;
const char *json = (char *)ptr;
int vt;
- bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets);
+ bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets_text);
if (!result)
{
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), vt);
DBUG_RETURN(true);
}
+ size= hist_buckets_text.size();
+
+ /*
+ Convert the text based array into a data structure that allows lookups and
+ estimates
+ */
+ for (auto &s : hist_buckets_text)
+ {
+ field->store_text(s.data(), s.size(), &my_charset_bin);
+
+ // Get the value in "truncated key tuple format" here:
+ uchar buf[MAX_KEY_LENGTH];
+ uint len_to_copy= field->key_length();
+ uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
+ histogram_bounds.push_back(std::string((char*)buf, bytes));
+ }
+
DBUG_RETURN(false);
}
+
+static
+void store_key_image_to_rec_no_null(Field *field, uchar *ptr) {
+ MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
+ &field->table->write_set);
+ field->set_key_image(ptr, field->key_length());
+ dbug_tmp_restore_column_map(&field->table->write_set, old_map);
+}
+
+/*
+ GSOC-TODO:
+ This is our replacement for Field::pos_in_interval_val_real
+
+ We take midpoint_val and an interval [min_val, max_val], and return
+ a number between 0.0 and 1.0 which specifies how close midpoint_val is
+ to one of the bounds.
+
+ @param field Field object. We don't care about the field's current value
+ (actually, we overwrite it). We need it for its virtual
+ functions.
+
+*/
+double pos_in_interval_through_val_real(Field *field,
+ uchar* min_val,
+ uchar *max_val,
+ uchar *midpoint_val)
+{
+
+ // For each passed value: unpack it into Field's current value. Then, we can
+ // get the value as double.
+
+ store_key_image_to_rec_no_null(field, min_val);
+ double min_val_real= field->val_real();
+
+ store_key_image_to_rec_no_null(field, max_val);
+ double max_val_real= field->val_real();
+
+ store_key_image_to_rec_no_null(field, midpoint_val);
+ double midpoint_val_real= field->val_real();
+
+ // The code below is a copy of logic from Field::pos_in_interval_val_real:
+ double n, d;
+ n= midpoint_val_real - min_val_real;
+ if (n < 0)
+ return 0.0;
+ d= max_val_real - min_val_real;
+ if (d <= 0)
+ return 1.0;
+ return MY_MIN(n/d, 1.0);
+}
+
+// Copy-paste:
+static
+inline ulonglong char_prefix_to_ulonglong(uchar *src)
+{
+ uint sz= sizeof(ulonglong);
+ for (uint i= 0; i < sz/2; i++)
+ {
+ uchar tmp= src[i];
+ src[i]= src[sz-1-i];
+ src[sz-1-i]= tmp;
+ }
+ return uint8korr(src);
+}
+
+// copy-paste:
+static inline double safe_substract(ulonglong a, ulonglong b)
+{
+ return (a > b)? double(a - b) : -double(b - a);
+}
+
+/*
+ GSOC-TODO:
+ This is our replacement for Field::pos_in_interval_val_str
+
+ We take midpoint_val and an interval [min_val, max_val], and return
+ a number between 0.0 and 1.0 which specifies how close midpoint_val is
+ to one of the bounds.
+
+ @param field Field object. We don't care about the field's current value
+ (actually, we overwrite it). We need it for its virtual
+ functions.
+
+ @TODO
+ Instead of copying the pos_in_interval_val_str(), we should do better:
+ if all three passed values have a common prefix, skip it.
+ This will make the returned value more precise.
+
+*/
+
+double pos_in_interval_through_strxfrm(Field *field,
+ uchar *min_val,
+ uchar *max_val,
+ uchar *midpoint_val)
+{
+ // The code below is a copy of logic from Field::pos_in_interval_val_str
+ uchar mp_prefix[sizeof(ulonglong)];
+ uchar minp_prefix[sizeof(ulonglong)];
+ uchar maxp_prefix[sizeof(ulonglong)];
+ ulonglong mp, minp, maxp;
+
+ uint min_len= uint2korr(min_val);
+ uint max_len= uint2korr(max_val);
+ uint midpoint_len= uint2korr(midpoint_val);
+
+ auto cset= field->charset();
+
+ cset->strnxfrm(mp_prefix, sizeof(mp),
+ midpoint_val + HA_KEY_BLOB_LENGTH,
+ midpoint_len);
+ cset->strnxfrm(minp_prefix, sizeof(minp),
+ min_val + HA_KEY_BLOB_LENGTH,
+ min_len);
+ cset->strnxfrm(maxp_prefix, sizeof(maxp),
+ max_val + HA_KEY_BLOB_LENGTH,
+ max_len);
+ mp= char_prefix_to_ulonglong(mp_prefix);
+ minp= char_prefix_to_ulonglong(minp_prefix);
+ maxp= char_prefix_to_ulonglong(maxp_prefix);
+ double n, d;
+ n= safe_substract(mp, minp);
+ if (n < 0)
+ return 0.0;
+ d= safe_substract(maxp, minp);
+ if (d <= 0)
+ return 1.0;
+ return MY_MIN(n/d, 1.0);
+}
+
+
+/*
+ GSOC-TODO:
+ This is how range selectivity function should look like.
+
+ @param field The table field histogram is for. We don't care about the
+ field's current value, we only need its virtual functions to
+ perform various operations
+
+ @param min_endp, max_endp - this specifies the range.
+*/
+double Histogram_json::range_selectivity_new(Field *field, key_range *min_endp,
+ key_range *max_endp)
+{
+ fprintf(stderr, "Histogram_json::range_selectivity_new\n");
+
+
+ /*
+ GSOC-TODO:
+ The code below is NOT what this function have.
+
+ == WHAT THIS CODE DOES ==
+ At the moment it does a linear walk through histogram_bounds and compares
+ min_endp to each of histogram bucket's min and max.
+ ATTENTION: This is a demo of how key_cmp() is used to compare the values.
+
+ When it finds the bucket such that BUCKET_START < min_endp < BUCKET_END,
+ it computes a position of min_endp within the bucket.
+ ATTENTION: calls to pos_in_interval_.... are a demo of how to compute
+ position of a value within a [min,max] range.
+
+ == WHAT THIS CODE SHOULD DO ==
+ * Use binary search to locate the range [MIN_BUCKET; MAX_BUCKET] - the
+ set of buckets that overlaps with the search interval {min_endp, max_endp}.
+
+ * If the search interval covers MIN_BUCKET only partially, compute a
+ position of min_endp within the bucket.
+
+ * The same for max_endp.
+
+ * Compute the final selectivity and return it.
+ */
+ std::string prev_s;
+ bool have_prev_s=false;
+ for (auto &s : histogram_bounds)
+ {
+ if (!have_prev_s)
+ {
+ prev_s = s;
+ have_prev_s= true;
+ continue;
+ }
+
+ // It's a test code, so we only process min_endp.
+ if (min_endp)
+ {
+ const uchar *min_key= min_endp->key;
+ // TODO: also, properly handle SQL NULLs.
+ // in this test patch, we just assume the values are not SQL NULLs.
+ if (field->real_maybe_null())
+ min_key++;
+
+ int res1= field->key_cmp((uchar*)prev_s.data(), min_key);
+ const char *str1="<";
+ if (res1>0) str1=">";
+ if (res1==0) str1="=";
+
+ int res2= field->key_cmp(min_key, (uchar*)s.data());
+ const char *str2="<";
+ if (res2>0) str2=">";
+ if (res2==0) str2="=";
+ fprintf(stderr, "prev_bound %s min_key %s bound\n", str1, str2);
+
+ if (res1<0 && res2 < 0)
+ {
+ double sel;
+ if (field->pos_through_val_str())
+ sel= pos_in_interval_through_strxfrm(field, (uchar*)prev_s.data(),
+ (uchar*)s.data(), (uchar*)min_key);
+ else
+ sel= pos_in_interval_through_val_real(field, (uchar*)prev_s.data(),
+ (uchar*)s.data(), (uchar*)min_key);
+
+ fprintf(stderr, " pos_in_interval=%g\n", sel);
+ }
+
+ prev_s= s;
+ }
+ }
+ fprintf(stderr, "Histogram_json::range_selectivity_new ends\n");
+ return 0.5;
+}
+
void Histogram_json::serialize(Field *field)
{
field->store((char*)values, strlen((char*)values),
@@ -4107,8 +4347,14 @@ double get_column_range_cardinality(Field *field,
max_mp_pos= 1.0;
Histogram_base *hist = col_stats->histogram_;
- if (hist && hist->is_usable(thd))
+ if (hist && hist->is_usable(thd)) {
+ /*
+ GSOC-TODO: for now, we just call range_selectivity_new here.
+ */
+ sel= hist->range_selectivity_new(field, min_endp, max_endp);
+
sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
+ }
else
sel= (max_mp_pos - min_mp_pos);
res= col_non_nulls * sel;
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 2673b36d050..6c498e17ac6 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -149,7 +149,7 @@ bool is_eits_usable(Field* field);
class Histogram_base : public Sql_alloc
{
public:
- virtual bool parse(MEM_ROOT *mem_root, Histogram_type type_arg,
+ virtual bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
const uchar *ptr, uint size)= 0;
virtual void serialize(Field *to_field)= 0;
@@ -173,13 +173,19 @@ class Histogram_base : public Sql_alloc
virtual double point_selectivity(double pos, double avg_selection)=0;
+ virtual double range_selectivity_new(Field *field, key_range *min_endp,
+ key_range *max_endp)
+ {
+ return 1.0;
+ };
+
virtual ~Histogram_base(){}
};
class Histogram_binary : public Histogram_base
{
public:
- bool parse(MEM_ROOT *mem_root, Histogram_type type_arg,
+ bool parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg,
const uchar *ptr_arg, uint size_arg) override;
void serialize(Field *to_field) override;
@@ -341,11 +347,28 @@ class Histogram_json : public Histogram_base
private:
Histogram_type type;
uint8 size; /* Number of elements in the histogram*/
+
+ /*
+ GSOC-TODO: This is used for storing collected JSON text. Rename it
+ accordingly.
+ */
uchar *values;
- std::vector<std::string> hist_buckets;
+
+ // List of values in string form.
+ /*
+ GSOC-TODO: We don't need to save this. It can be a local variable in
+ parse().
+ Eventually we should get rid of this at all, as we can convert the
+ endpoints and add them to histogram_bounds as soon as we've read them.
+ */
+ std::vector<std::string> hist_buckets_text;
+
+ // Array of histogram bucket endpoints in KeyTupleFormat.
+ std::vector<std::string> histogram_bounds;
public:
- bool parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr, uint size) override;
+ bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
+ const uchar *ptr, uint size) override;
void serialize(Field *field) override;
@@ -364,7 +387,7 @@ class Histogram_json : public Histogram_base
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override;
- bool is_available() override {return get_width() > 0 && get_values(); }
+ bool is_available() override {return get_width() > 0 /*&& get_values()*/; }
bool is_usable(THD *thd) override
{
@@ -379,6 +402,13 @@ class Histogram_json : public Histogram_base
double range_selectivity(double min_pos, double max_pos) override {return 0.1;}
double point_selectivity(double pos, double avg_selection) override {return 0.5;}
+
+ /*
+ GSOC-TODO: This function should eventually replace both range_selectivity()
+ and point_selectivity(). See its code for more details.
+ */
+ double range_selectivity_new(Field *field, key_range *min_endp,
+ key_range *max_endp) override;
};
class Columns_statistics;
1
0