developers
Threads by month
- ----- 2025 -----
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- 8 participants
- 6853 discussions

[Maria-developers] Rev 2794: cleunup in file:///home/bell/maria/bzr/work-maria-5.3-scache/
by sanja@askmonty.org 27 May '10
by sanja@askmonty.org 27 May '10
27 May '10
At file:///home/bell/maria/bzr/work-maria-5.3-scache/
------------------------------------------------------------
revno: 2794
revision-id: sanja(a)askmonty.org-20100527182744-1tu96cgyiaodzs32
parent: sanja(a)askmonty.org-20100527174138-7c3guoyiyjh7dhrk
committer: sanja(a)askmonty.org
branch nick: work-maria-5.3-scache
timestamp: Thu 2010-05-27 21:27:44 +0300
message:
cleunup
=== modified file 'sql/sql_subquery_cache.cc'
--- a/sql/sql_subquery_cache.cc 2010-05-27 17:41:38 +0000
+++ b/sql/sql_subquery_cache.cc 2010-05-27 18:27:44 +0000
@@ -148,7 +148,6 @@
void Subquery_cache_tmptable::init()
{
- ulonglong keymap;
List_iterator_fast<Item*> li(*list);
List_iterator_fast<Item> li_items(items);
Item **item;
@@ -198,14 +197,6 @@
goto error;
}
- /* makes all bits set for keys */
- keymap= 1 << (items.elements); /* + 1 - 1 */
- if (!keymap)
- keymap= ULONGLONG_MAX;
- else
- keymap--;
- keymap&=~1;
-
li_items++;
field_counter=1;
if (cache_table->alloc_keys(1) ||
1
0

[Maria-developers] Rev 2793: Fixed some of test. in file:///home/bell/maria/bzr/work-maria-5.3-scache/
by sanja@askmonty.org 27 May '10
by sanja@askmonty.org 27 May '10
27 May '10
At file:///home/bell/maria/bzr/work-maria-5.3-scache/
------------------------------------------------------------
revno: 2793
revision-id: sanja(a)askmonty.org-20100527174138-7c3guoyiyjh7dhrk
parent: sanja(a)askmonty.org-20100525182914-z3zeviggq9026x1n
committer: sanja(a)askmonty.org
branch nick: work-maria-5.3-scache
timestamp: Thu 2010-05-27 20:41:38 +0300
message:
Fixed some of test.
fised problem with saving reference in field.
Igor's routines added
=== modified file 'mysql-test/r/index_merge_myisam.result'
--- a/mysql-test/r/index_merge_myisam.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/index_merge_myisam.result 2010-05-27 17:41:38 +0000
@@ -1419,19 +1419,19 @@
#
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='index_merge=off,index_merge_union=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='index_merge_union=on';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,index_merge_sort_union=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch=4;
ERROR 42000: Variable 'optimizer_switch' can't be set to the value of '4'
set optimizer_switch=NULL;
@@ -1458,21 +1458,21 @@
set optimizer_switch='index_merge=off,index_merge_union=off,default';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch=default;
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set @@global.optimizer_switch=default;
select @@global.optimizer_switch;
@@global.optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
#
# Check index_merge's @@optimizer_switch flags
#
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (a int, b int, c int, filler char(100),
@@ -1582,5 +1582,5 @@
set optimizer_switch=default;
show variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
drop table t0, t1;
=== modified file 'mysql-test/r/subquery_cache.result'
--- a/mysql-test/r/subquery_cache.result 2010-05-25 12:54:57 +0000
+++ b/mysql-test/r/subquery_cache.result 2010-05-27 17:41:38 +0000
@@ -587,4 +587,5 @@
Variable_name Value
Subquery_cache_hit 0
Subquery_cache_miss 4
+drop table t1;
set optimizer_switch='subquery_cache=default';
=== modified file 'mysql-test/r/subselect3.result'
--- a/mysql-test/r/subselect3.result 2010-03-29 14:04:35 +0000
+++ b/mysql-test/r/subselect3.result 2010-05-27 17:41:38 +0000
@@ -105,6 +105,7 @@
Handler_read_rnd_next 5
delete from t2;
insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0);
+set optimizer_switch='subquery_cache=off';
flush status;
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;
oref a Z
@@ -123,6 +124,7 @@
select 'No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.' Z;
Z
No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.
+set @@optimizer_switch=@save_optimizer_switch;
drop table t1, t2;
create table t1 (a int, b int, primary key (a));
insert into t1 values (1,1), (3,1),(100,1);
=== modified file 'mysql-test/r/subselect3_jcl6.result'
--- a/mysql-test/r/subselect3_jcl6.result 2010-03-29 14:04:35 +0000
+++ b/mysql-test/r/subselect3_jcl6.result 2010-05-27 17:41:38 +0000
@@ -109,6 +109,7 @@
Handler_read_rnd_next 5
delete from t2;
insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0);
+set optimizer_switch='subquery_cache=off';
flush status;
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;
oref a Z
@@ -127,6 +128,7 @@
select 'No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.' Z;
Z
No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.
+set @@optimizer_switch=@save_optimizer_switch;
drop table t1, t2;
create table t1 (a int, b int, primary key (a));
insert into t1 values (1,1), (3,1),(100,1);
=== modified file 'mysql-test/r/subselect_no_mat.result'
--- a/mysql-test/r/subselect_no_mat.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/subselect_no_mat.result 2010-05-27 17:41:38 +0000
@@ -1,6 +1,6 @@
show variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='materialization=off';
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t11,t12;
set @save_optimizer_switch=@@optimizer_switch;
@@ -4826,4 +4826,4 @@
set optimizer_switch=default;
show variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
=== modified file 'mysql-test/r/subselect_no_opts.result'
--- a/mysql-test/r/subselect_no_opts.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/subselect_no_opts.result 2010-05-27 17:41:38 +0000
@@ -1,6 +1,6 @@
show variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='materialization=off,semijoin=off';
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t11,t12;
set @save_optimizer_switch=@@optimizer_switch;
@@ -4826,4 +4826,4 @@
set optimizer_switch=default;
show variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
=== modified file 'mysql-test/r/subselect_no_semijoin.result'
--- a/mysql-test/r/subselect_no_semijoin.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/subselect_no_semijoin.result 2010-05-27 17:41:38 +0000
@@ -1,6 +1,6 @@
show variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='semijoin=off';
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t11,t12;
set @save_optimizer_switch=@@optimizer_switch;
@@ -4826,4 +4826,4 @@
set optimizer_switch=default;
show variables like 'optimizer_switch';
Variable_name Value
-optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
=== modified file 'mysql-test/r/subselect_sj.result'
--- a/mysql-test/r/subselect_sj.result 2010-03-29 14:04:35 +0000
+++ b/mysql-test/r/subselect_sj.result 2010-05-27 17:41:38 +0000
@@ -202,39 +202,39 @@
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,materialization=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,semijoin=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,semijoin=off,materialization=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,materialization=off,semijoin=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,semijoin=off,materialization=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,semijoin=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,materialization=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch=default;
drop table t0, t1, t2;
drop table t10, t11, t12;
=== modified file 'mysql-test/r/subselect_sj_jcl6.result'
--- a/mysql-test/r/subselect_sj_jcl6.result 2010-03-29 14:04:35 +0000
+++ b/mysql-test/r/subselect_sj_jcl6.result 2010-05-27 17:41:38 +0000
@@ -206,39 +206,39 @@
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,materialization=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,semijoin=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,semijoin=off,materialization=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,materialization=off,semijoin=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,semijoin=off,materialization=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,semijoin=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch='default,materialization=off,loosescan=off';
select @@optimizer_switch;
@@optimizer_switch
-index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on
+index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on
set optimizer_switch=default;
drop table t0, t1, t2;
drop table t10, t11, t12;
=== modified file 'mysql-test/t/subquery_cache.test'
--- a/mysql-test/t/subquery_cache.test 2010-05-25 12:54:57 +0000
+++ b/mysql-test/t/subquery_cache.test 2010-05-27 17:41:38 +0000
@@ -199,5 +199,6 @@
show status like "subquery_cache%";
select a, a in (select a from t1 where -1 < benchmark(a,100)) from t1 as ext;
show status like "subquery_cache%";
+drop table t1;
set optimizer_switch='subquery_cache=default';
=== modified file 'mysql-test/t/subselect3.test'
--- a/mysql-test/t/subselect3.test 2010-03-20 12:01:47 +0000
+++ b/mysql-test/t/subselect3.test 2010-05-27 17:41:38 +0000
@@ -98,10 +98,12 @@
delete from t2;
insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0);
+set optimizer_switch='subquery_cache=off';
flush status;
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;
show status like '%Handler_read%';
select 'No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.' Z;
+set @@optimizer_switch=@save_optimizer_switch;
drop table t1, t2;
=== modified file 'sql/item.cc'
--- a/sql/item.cc 2010-05-24 17:29:56 +0000
+++ b/sql/item.cc 2010-05-27 17:41:38 +0000
@@ -28,6 +28,9 @@
const String my_null_string("NULL", 4, default_charset_info);
+static int save_field_in_field(Field *from,my_bool * null_value,
+ Field *to, bool no_conversions);
+
/****************************************************************************/
/* Hybrid_type_traits {_real} */
@@ -5107,40 +5110,36 @@
}
+static int save_field_in_field(Field *from, my_bool *null_value,
+ Field *to, bool no_conversions)
+{
+ int res;
+ if (from->is_null())
+ {
+ (*null_value)= 1;
+ res= set_field_to_null_with_conversions(to, no_conversions);
+ }
+ else
+ {
+ to->set_notnull();
+ res= field_conv(to, from);
+ (*null_value)= 0;
+ }
+ return res;
+}
+
/**
Set a field's value from a item.
*/
void Item_field::save_org_in_field(Field *to)
{
- if (field->is_null())
- {
- null_value=1;
- set_field_to_null_with_conversions(to, 1);
- }
- else
- {
- to->set_notnull();
- field_conv(to,field);
- null_value=0;
- }
+ save_field_in_field(field, &null_value, to, TRUE);
}
int Item_field::save_in_field(Field *to, bool no_conversions)
{
- int res;
- if (result_field->is_null())
- {
- null_value=1;
- res= set_field_to_null_with_conversions(to, no_conversions);
- }
- else
- {
- to->set_notnull();
- res= field_conv(to,result_field);
- null_value=0;
- }
- return res;
+ return save_field_in_field(result_field, &null_value, to, no_conversions);
}
@@ -6347,7 +6346,8 @@
int Item_ref::save_in_field(Field *to, bool no_conversions)
{
int res;
- DBUG_ASSERT(!result_field);
+ if (result_field)
+ return save_field_in_field(result_field, &null_value, to, no_conversions);
res= (*ref)->save_in_field(to, no_conversions);
null_value= (*ref)->null_value;
return res;
=== modified file 'sql/item.h'
--- a/sql/item.h 2010-05-24 17:29:56 +0000
+++ b/sql/item.h 2010-05-27 17:41:38 +0000
@@ -1143,11 +1143,6 @@
{ return Field::GEOM_GEOMETRY; };
String *check_well_formed_result(String *str, bool send_error= 0);
bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs);
-
- /**
- Used to get reference on real item (not Item_ref)
- */
- virtual Item **unref(Item **my_ref) { return my_ref; };
};
@@ -2507,11 +2502,6 @@
{
return trace_unsupported_by_check_vcol_func_processor("ref");
}
-
- /**
- Used to get reference on real item (not Item_ref)
- */
- virtual Item **unref(Item **my_ref) {return (*ref)->unref(ref); };
};
=== modified file 'sql/sql_lex.cc'
--- a/sql/sql_lex.cc 2010-05-24 17:29:56 +0000
+++ b/sql/sql_lex.cc 2010-05-27 17:41:38 +0000
@@ -1847,7 +1847,6 @@
DBUG_ENTER("st_select_lex::register_dependency_item");
DBUG_ASSERT(this != last);
DBUG_ASSERT(*dependency);
- dependency= (*dependency)->unref(dependency);
do
{
/* check duplicates */
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-05-24 17:29:56 +0000
+++ b/sql/sql_select.cc 2010-05-27 17:41:38 +0000
@@ -13390,7 +13390,8 @@
Moving away from the current record. Unlock the row
in the handler if it did not match the partial WHERE.
*/
- if (table_ref->has_record && table_ref->use_count == 0)
+ if (table_ref->has_record )
+ if (table_ref->use_count == 0)
{
tab->read_record.file->unlock_row();
table_ref->has_record= FALSE;
=== modified file 'sql/sql_subquery_cache.cc'
--- a/sql/sql_subquery_cache.cc 2010-05-25 10:45:36 +0000
+++ b/sql/sql_subquery_cache.cc 2010-05-27 17:41:38 +0000
@@ -139,12 +139,20 @@
DBUG_RETURN(equalities->fix_fields(table_thd, &equalities));
}
+
+static uint field_enumerator(uchar *arg)
+{
+ return ((uint*)arg)[0]++;
+}
+
+
void Subquery_cache_tmptable::init()
{
ulonglong keymap;
List_iterator_fast<Item*> li(*list);
List_iterator_fast<Item> li_items(items);
Item **item;
+ uint field_counter;
DBUG_ENTER("Subquery_cache_tmptable::init");
DBUG_ASSERT(!inited);
inited= TRUE;
@@ -199,8 +207,11 @@
keymap&=~1;
li_items++;
+ field_counter=1;
if (cache_table->alloc_keys(1) ||
- (cache_table->add_tmp_key(keymap, "cache-table-key") < 0) ||
+ (cache_table->add_tmp_key(0, items.elements - 1,
+ &field_enumerator,
+ (uchar*)&field_counter) < 0) ||
createtmp_table_search_structures(table_thd, cache_table, li_items,
&tab_ref) ||
!(tab= create_index_lookup_join_tab(cache_table)))
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-05-24 17:29:56 +0000
+++ b/sql/table.cc 2010-05-27 17:41:38 +0000
@@ -5114,7 +5114,7 @@
bool TABLE::alloc_keys(uint key_count)
{
DBUG_ASSERT(!s->keys);
- key_info= s->key_info= (KEY*) my_malloc(sizeof(KEY)*key_count, MYF(0));
+ key_info= s->key_info= (KEY*) alloc_root(&mem_root, sizeof(KEY)*key_count);
max_keys= key_count;
return !(key_info);
}
@@ -5123,52 +5123,51 @@
/**
@brief Adds one key to a temporary table.
- @param key_parts bitmap of fields that take a part in the key.
- @param key_name name of the key
-
- @details
- Creates a key for this table from fields which corresponds the bits set to 1
- in the 'key_parts' bitmap. The 'key_name' name is given to the newly created
- key.
+ @param key key number.
+ @param key_parts number of fields in the key
+ @param next_field_no function which returns field numbers which
+ should be included in the key
+ @param arg above function argement
@return <0 an error occur.
@return >=0 number of newly added key.
*/
-int TABLE::add_tmp_key(ulonglong key_parts, const char *key_name)
+bool TABLE::add_tmp_key(uint key, uint key_parts,
+ uint (*next_field_no) (uchar *), uchar *arg)
{
- DBUG_ASSERT(s->keys< max_keys);
+ DBUG_ASSERT(key < max_keys);
+ char buf[NAME_CHAR_LEN];
KEY* keyinfo;
Field **reg_field;
uint i;
bool key_start= TRUE;
- uint key_part_count= my_count_bits(key_parts);
KEY_PART_INFO* key_part_info=
- (KEY_PART_INFO*) my_malloc(sizeof(KEY_PART_INFO)* key_part_count, MYF(0));
+ (KEY_PART_INFO*) alloc_root(&mem_root, sizeof(KEY_PART_INFO)*key_parts);
if (!key_part_info)
- return -1;
- keyinfo= key_info + s->keys;
- keyinfo->key_part=key_part_info;
- keyinfo->usable_key_parts=keyinfo->key_parts= key_part_count;
+ return TRUE;
+ keyinfo= key_info + key;
+ keyinfo->key_part= key_part_info;
+ keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
keyinfo->key_length=0;
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
- keyinfo->name= (char *)key_name;
keyinfo->flags= HA_GENERATED_KEY;
- keyinfo->rec_per_key= (ulong*)my_malloc(sizeof(ulong)*key_part_count, MYF(0));
+ sprintf(buf, "key%i", key);
+ if (!(keyinfo->name= strdup_root(&mem_root, buf)))
+ return TRUE;
+ keyinfo->rec_per_key= (ulong*) alloc_root(&mem_root,
+ sizeof(ulong)*key_parts);
if (!keyinfo->rec_per_key)
- return -1;
- bzero(keyinfo->rec_per_key, sizeof(ulong)*key_part_count);
- for (i= 0, reg_field=field ;
- *reg_field;
- i++, reg_field++)
+ return TRUE;
+ bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
+ for (i= 0; i < key_parts; i++)
{
- if (!(key_parts & (1 << i)))
- continue;
+ reg_field= field + next_field_no(arg);
if (key_start)
- (*reg_field)->key_start.set_bit(s->keys);
+ (*reg_field)->key_start.set_bit(key);
key_start= FALSE;
- (*reg_field)->part_of_key.set_bit(s->keys);
+ (*reg_field)->part_of_key.set_bit(key);
(*reg_field)->flags|= PART_KEY_FLAG;
key_part_info->null_bit= (*reg_field)->null_bit;
key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
@@ -5202,7 +5201,8 @@
key_part_info++;
}
set_if_bigger(s->max_key_length, keyinfo->key_length);
- return ++s->keys - 1;
+ s->keys++;
+ return FALSE;
}
=== modified file 'sql/table.h'
--- a/sql/table.h 2010-05-24 17:29:56 +0000
+++ b/sql/table.h 2010-05-27 17:41:38 +0000
@@ -914,9 +914,10 @@
*/
inline bool needs_reopen_or_name_lock()
{ return s->version != refresh_version; }
+ bool alloc_keys(uint key_count);
+ bool add_tmp_key(uint key, uint key_parts,
+ uint (*next_field_no) (uchar *), uchar *arg);
bool is_children_attached(void);
- bool alloc_keys(uint key_count);
- int add_tmp_key(ulonglong key_parts, const char *key_name);
};
enum enum_schema_table_state
1
0

[Maria-developers] bzr commit into file:///home/tsk/mprog/src/5.3-mwl89/ branch (timour:2792)
by timour@askmonty.org 27 May '10
by timour@askmonty.org 27 May '10
27 May '10
#At file:///home/tsk/mprog/src/5.3-mwl89/ based on revid:psergey@askmonty.org-20100503154606-z7v6errebcv9gax1
2792 timour(a)askmonty.org 2010-05-27
MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation
Phase 1: Implement recursive bottom-up optimization of subqueires instead of
lazy optimization.
The patch implements a preparatory phase for MWL#89, which is a prerequisite
to implement a cost-based choice between both strategies. The patch passes the
complete regression test.
The main change is implemented by the method:
JOIN::optimize_materialized_in_subqueries().
All other changes were required to correct problems resulting from changing the
order of optimization. Most of these problems followed the same pattern - there are
some shared structures between a subquery and its parent query. Depending on which
one is optimized first (parent or child query), these shared strucutres may get
different values, thus resulting in an inconsistent query plan.
modified:
mysql-test/r/subselect_mat.result
sql/item_subselect.cc
sql/item_subselect.h
sql/sql_class.cc
sql/sql_class.h
sql/sql_select.cc
sql/sql_select.h
=== modified file 'mysql-test/r/subselect_mat.result'
--- a/mysql-test/r/subselect_mat.result 2010-04-05 21:15:15 +0000
+++ b/mysql-test/r/subselect_mat.result 2010-05-27 13:13:47 +0000
@@ -1139,7 +1139,7 @@ insert into t1 values (5);
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
-2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
+2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1)
set @@optimizer_switch='default,materialization=off';
@@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin
explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
-2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
+2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
select min(a1) from t1 where 7 in (select b1 from t2);
min(a1)
set @@optimizer_switch='default,materialization=off';
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-04-02 14:27:06 +0000
+++ b/sql/item_subselect.cc 2010-05-27 13:13:47 +0000
@@ -2110,7 +2110,7 @@ bool Item_in_subselect::setup_engine()
if (!(new_engine= new subselect_hash_sj_engine(thd, this,
old_engine)) ||
- new_engine->init_permanent(unit->get_unit_column_types()))
+ new_engine->init_permanent(&old_engine->join->fields_list))
{
Item_subselect::trans_res trans_res;
/*
@@ -2126,6 +2126,15 @@ bool Item_in_subselect::setup_engine()
&eq_creator);
else
trans_res= row_value_in_to_exists_transformer(old_engine->join);
+ /*
+ The IN=>EXISTS transformation above injects new predicates into the
+ WHERE and HAVING clauses. Since the subquery was already optimized,
+ below we force its reoptimization with the new injected conditions
+ by the first call to subselect_single_select_engine::exec().
+ This is the only case of lazy subquery optimization in the server.
+ */
+ DBUG_ASSERT(old_engine->join->optimized);
+ old_engine->join->optimized= false;
res= (trans_res != Item_subselect::RES_OK);
}
if (new_engine)
@@ -3673,6 +3682,7 @@ bitmap_init_memroot(MY_BITMAP *map, uint
bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
{
+ select_union *result_sink;
/* Options to create_tmp_table. */
ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS;
/* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */
@@ -3706,15 +3716,16 @@ bool subselect_hash_sj_engine::init_perm
DBUG_RETURN(TRUE);
}
*/
- if (!(result= new select_materialize_with_stats))
+ if (!(result_sink= new select_materialize_with_stats))
DBUG_RETURN(TRUE);
- if (((select_union*) result)->create_result_table(
- thd, tmp_columns, TRUE, tmp_create_options,
- "materialized subselect", TRUE))
+ if (result_sink->create_result_table(thd, tmp_columns, TRUE,
+ tmp_create_options,
+ "materialized subselect", TRUE))
DBUG_RETURN(TRUE);
- tmp_table= ((select_union*) result)->table;
+ tmp_table= result_sink->table;
+ result= result_sink;
/*
If the subquery has blobs, or the total key lenght is bigger than
@@ -3882,7 +3893,6 @@ subselect_hash_sj_engine::make_unique_en
cur_ref_buff + test(maybe_null), we could
use that information instead.
*/
-
cur_ref_buff + null_count,
null_count ? cur_ref_buff : 0,
cur_key_part->length, tab->ref.items[i]);
@@ -3908,11 +3918,6 @@ subselect_hash_sj_engine::make_unique_en
bool subselect_hash_sj_engine::init_runtime()
{
/*
- Create and optimize the JOIN that will be used to materialize
- the subquery if not yet created.
- */
- materialize_engine->prepare();
- /*
Repeat name resolution for 'cond' since cond is not part of any
clause of the query, and it is not 'fixed' during JOIN::prepare.
*/
@@ -3935,6 +3940,16 @@ subselect_hash_sj_engine::~subselect_has
}
+int subselect_hash_sj_engine::prepare()
+{
+ /*
+ Create and optimize the JOIN that will be used to materialize
+ the subquery if not yet created.
+ */
+ return materialize_engine->prepare();
+}
+
+
/**
Cleanup performed after each PS execution.
@@ -3996,9 +4011,8 @@ int subselect_hash_sj_engine::exec()
the subquery predicate.
*/
thd->lex->current_select= materialize_engine->select_lex;
- if ((res= materialize_join->optimize()))
- goto err; /* purecov: inspected */
- DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
+ /* The subquery should be optimized, and materialized only once. */
+ DBUG_ASSERT(materialize_join->optimized && !is_materialized);
materialize_join->exec();
if ((res= test(materialize_join->error || thd->is_fatal_error)))
goto err;
@@ -4909,7 +4923,7 @@ bool subselect_rowid_merge_engine::parti
/* If there is a non-NULL key, it must be the first key in the keys array. */
DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key));
- /* The prioryty queue for keys must be empty. */
+ /* The priority queue for keys must be empty. */
DBUG_ASSERT(!pq.elements);
/* All data accesses during execution are via handler::ha_rnd_pos() */
=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h 2010-03-29 14:04:35 +0000
+++ b/sql/item_subselect.h 2010-05-27 13:13:47 +0000
@@ -805,7 +805,7 @@ public:
bool init_permanent(List<Item> *tmp_columns);
bool init_runtime();
void cleanup();
- int prepare() { return 0; } /* Override virtual function in base class. */
+ int prepare();
int exec();
virtual void print(String *str, enum_query_type query_type);
uint cols()
=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc 2010-04-05 21:15:15 +0000
+++ b/sql/sql_class.cc 2010-05-27 13:13:47 +0000
@@ -2948,8 +2948,15 @@ create_result_table(THD *thd_arg, List<I
const char *table_alias, bool bit_fields_as_long)
{
DBUG_ASSERT(table == 0);
+ tmp_table_param.init();
tmp_table_param.field_count= column_types->elements;
tmp_table_param.bit_fields_as_long= bit_fields_as_long;
+ /*
+ TIMOUR:
+ Setting this parameter here limits the use of this class only for
+ materialized subqueries.
+ */
+ tmp_table_param.materialized_subquery= true;
if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
(ORDER*) 0, is_union_distinct, 1,
@@ -3034,6 +3041,7 @@ void TMP_TABLE_PARAM::init()
table_charset= 0;
precomputed_group_by= 0;
bit_fields_as_long= 0;
+ materialized_subquery= 0;
DBUG_VOID_RETURN;
}
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2010-04-05 21:15:15 +0000
+++ b/sql/sql_class.h 2010-05-27 13:13:47 +0000
@@ -2772,6 +2772,7 @@ public:
uint convert_blob_length;
CHARSET_INFO *table_charset;
bool schema_table;
+ bool materialized_subquery;
/*
True if GROUP BY and its aggregate functions are already computed
by a table access method (e.g. by loose index scan). In this case
@@ -2790,8 +2791,8 @@ public:
TMP_TABLE_PARAM()
:copy_field(0), group_parts(0),
group_length(0), group_null_parts(0), convert_blob_length(0),
- schema_table(0), precomputed_group_by(0), force_copy_fields(0),
- bit_fields_as_long(0)
+ schema_table(0), materialized_subquery(0), precomputed_group_by(0),
+ force_copy_fields(0), bit_fields_as_long(0)
{}
~TMP_TABLE_PARAM()
{
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-05-03 15:46:06 +0000
+++ b/sql/sql_select.cc 2010-05-27 13:13:47 +0000
@@ -714,6 +714,7 @@ JOIN::optimize()
{
ulonglong select_opts_for_readinfo;
uint no_jbuf_after;
+ int res;
DBUG_ENTER("JOIN::optimize");
// to prevent double initialization on EXPLAIN
@@ -723,6 +724,10 @@ JOIN::optimize()
thd_proc_info(thd, "optimizing");
+ /* Optimize recursively all IN subqueries of this query. */
+ if ((res= optimize_materialized_in_subqueries()))
+ DBUG_RETURN(res);
+
/* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
if (convert_join_subqueries_to_semijoins(this))
DBUG_RETURN(1); /* purecov: inspected */
@@ -848,7 +853,6 @@ JOIN::optimize()
*/
if (tables_list && implicit_grouping)
{
- int res;
/*
opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
to the WHERE conditions,
@@ -1277,7 +1281,6 @@ JOIN::optimize()
if (setup_subquery_materialization())
DBUG_RETURN(1);
- int res;
if ((res= rewrite_to_index_subquery_engine(this)) != -1)
DBUG_RETURN(res);
/*
@@ -2413,8 +2416,9 @@ err:
Setup for execution all subqueries of a query, for which the optimizer
chose hash semi-join.
- @details Iterate over all subqueries of the query, and if they are under an
- IN predicate, and the optimizer chose to compute it via hash semi-join:
+ @details Iterate over all immediate child subqueries of the query, and if
+ they are under an IN predicate, and the optimizer chose to compute it via
+ hash semi-join:
- try to initialize all data structures needed for the materialized execution
of the IN predicate,
- if this fails, then perform the IN=>EXISTS transformation which was
@@ -2454,6 +2458,51 @@ bool JOIN::setup_subquery_materializatio
}
+/**
+ Optimize all immediate children IN subqueries of this join.
+
+ @note
+ This method must be called in the very beginning of JOIN::optimize().
+ As a result all children subqueries are optimized recursively before
+ their parent.
+*/
+
+int
+JOIN::optimize_materialized_in_subqueries()
+{
+ int res;
+ for (SELECT_LEX_UNIT *un= select_lex->first_inner_unit(); un;
+ un= un->next_unit())
+ {
+ for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select())
+ {
+ Item_subselect *subquery_predicate= sl->master_unit()->item;
+ if (subquery_predicate &&
+ subquery_predicate->substype() == Item_subselect::IN_SUBS &&
+ ((Item_in_subselect*) subquery_predicate)->exec_method ==
+ Item_in_subselect::MATERIALIZATION
+ // @todo TIMOUR:
+ // Think also how to pre-optimize for IN_TO_EXISTS because we still
+ // call the optimizer in subselect_single_select_engine::exec()
+ )
+ {
+ JOIN *subquery_join= sl->join;
+ if (subquery_join)
+ {
+ SELECT_LEX *save_select= thd->lex->current_select;
+ thd->lex->current_select= subquery_predicate->get_select_lex();
+ res= subquery_join->optimize();
+ thd->lex->current_select= save_select;
+ if (res)
+ return res;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+
/*****************************************************************************
Create JOIN_TABS, make a guess about the table types,
Approximate how many records will be used in each table
@@ -11142,7 +11191,27 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA
continue; // Some kindf of const item
}
if (type == Item::SUM_FUNC_ITEM)
- ((Item_sum *) item)->result_field= new_field;
+ {
+ Item_sum *agg_item= (Item_sum *) item;
+ /*
+ Update the result field only if it has never been set, or if the
+ created temporary table is not to be used for subquery
+ materialization.
+
+ The reason is that for subqueries that require materialization as part
+ of their plan, we create the 'external' temporary table needed for IN
+ execution, after the 'internal' temporary table needed for grouping.
+ Since both the external and the internal temporary tables are created
+ for the same list of SELECT fields of the subquery, setting
+ 'result_field' for each temp table creation overrides the previous
+ value of result field.
+
+ The condition below prevents the creation of the external temp table
+ to override the 'result_field' that was set for the internal temp table.
+ */
+ if (!agg_item->result_field || !param->materialized_subquery)
+ agg_item->result_field= new_field;
+ }
tmp_from_field++;
reclength+=new_field->pack_length();
if (!(new_field->flags & NOT_NULL_FLAG))
@@ -18881,6 +18950,8 @@ bool JOIN::change_result(select_result *
{
DBUG_ENTER("JOIN::change_result");
result= res;
+ if (tmp_join)
+ tmp_join->result= res;
if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
result->prepare2()))
{
=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h 2010-03-20 12:01:47 +0000
+++ b/sql/sql_select.h 2010-05-27 13:13:47 +0000
@@ -1717,6 +1717,7 @@ private:
*/
bool implicit_grouping;
bool make_simple_join(JOIN *join, TABLE *tmp_table);
+ int optimize_materialized_in_subqueries();
};
1
0

[Maria-developers] bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (igor:2788)
by Igor Babaev 26 May '10
by Igor Babaev 26 May '10
26 May '10
#At lp:maria/5.2 based on revid:psergey@askmonty.org-20100329200940-9ikx6gpww0gtsx00
2788 Igor Babaev 2010-05-26
MWL#106: Backport optimizations for derived tables and views.
The main consolidated patch.
added:
mysql-test/r/derived_view.result
mysql-test/t/derived_view.test
modified:
mysql-test/r/derived.result
mysql-test/r/explain.result
mysql-test/r/func_str.result
mysql-test/r/index_merge_myisam.result
mysql-test/r/information_schema.result
mysql-test/r/innodb_lock_wait_timeout_1.result
mysql-test/r/innodb_mysql.result
mysql-test/r/lock_multi_bug38499.result
mysql-test/r/myisam_mrr.result
mysql-test/r/ps.result
mysql-test/r/ps_ddl.result
mysql-test/r/strict.result
mysql-test/r/subselect.result
mysql-test/r/subselect3.result
mysql-test/r/subselect3_jcl6.result
mysql-test/r/subselect_no_mat.result
mysql-test/r/subselect_no_opts.result
mysql-test/r/subselect_no_semijoin.result
mysql-test/r/table_elim.result
mysql-test/r/view.result
mysql-test/r/view_grant.result
mysql-test/t/lock_multi_bug38499.test
sql/field.cc
sql/field.h
sql/handler.cc
sql/item.cc
sql/item.h
sql/item_cmpfunc.cc
sql/item_cmpfunc.h
sql/item_func.cc
sql/item_func.h
sql/item_subselect.cc
sql/item_subselect.h
sql/mysql_priv.h
sql/opt_range.cc
sql/opt_subselect.cc
sql/opt_sum.cc
sql/records.cc
sql/sp_head.cc
sql/sql_acl.cc
sql/sql_base.cc
sql/sql_bitmap.h
sql/sql_cache.cc
sql/sql_class.cc
sql/sql_class.h
sql/sql_cursor.cc
sql/sql_delete.cc
sql/sql_derived.cc
sql/sql_help.cc
sql/sql_insert.cc
sql/sql_join_cache.cc
sql/sql_lex.cc
sql/sql_lex.h
sql/sql_list.h
sql/sql_load.cc
sql/sql_olap.cc
sql/sql_parse.cc
sql/sql_prepare.cc
sql/sql_select.cc
sql/sql_select.h
sql/sql_show.cc
sql/sql_table.cc
sql/sql_union.cc
sql/sql_update.cc
sql/sql_view.cc
sql/sql_yacc.yy
sql/table.cc
sql/table.h
=== modified file 'mysql-test/r/derived.result'
--- a/mysql-test/r/derived.result 2009-07-11 18:44:29 +0000
+++ b/mysql-test/r/derived.result 2010-05-26 20:18:18 +0000
@@ -57,9 +57,8 @@ a b a b
3 c 3 c
explain select * from t1 as x1, (select * from t1) as x2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY x1 ALL NULL NULL NULL NULL 4
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4 Using join buffer
-2 DERIVED t1 ALL NULL NULL NULL NULL 4
+1 SIMPLE x1 ALL NULL NULL NULL NULL 4
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using join buffer
drop table if exists t2,t3;
select * from (select 1) as a;
1
@@ -91,7 +90,7 @@ a b
2 b
explain select * from (select * from t1 union select * from t1) a;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 3
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 8
2 DERIVED t1 ALL NULL NULL NULL NULL 4
3 UNION t1 ALL NULL NULL NULL NULL 4
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
@@ -113,9 +112,8 @@ a b
3 c
explain select * from (select t1.*, t2.a as t2a from t1,t2 where t1.a=t2.a) t1;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t2 system NULL NULL NULL NULL 1
-2 DERIVED t1 ALL NULL NULL NULL NULL 4 Using where
+1 SIMPLE t2 system NULL NULL NULL NULL 1
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using where
drop table t1, t2;
create table t1(a int not null, t char(8), index(a));
SELECT * FROM (SELECT * FROM t1) as b ORDER BY a ASC LIMIT 0,20;
@@ -142,8 +140,7 @@ a t
20 20
explain select count(*) from t1 as tt1, (select * from t1) as tt2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
-2 DERIVED t1 ALL NULL NULL NULL NULL 10000
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
drop table t1;
SELECT * FROM (SELECT (SELECT * FROM (SELECT 1 as a) as a )) as b;
(SELECT * FROM (SELECT 1 as a) as a )
@@ -172,30 +169,30 @@ insert into t1 values (NULL, 'a', 1), (N
insert into t2 values (1, 100), (1, 101), (1, 102), (2, 100), (2, 103), (2, 104), (3, 101), (3, 102), (3, 105);
SELECT STRAIGHT_JOIN d.pla_id, m2.mat_id FROM t1 m2 INNER JOIN (SELECT mp.pla_id, MIN(m1.matintnum) AS matintnum FROM t2 mp INNER JOIN t1 m1 ON mp.mat_id=m1.mat_id GROUP BY mp.pla_id) d ON d.matintnum=m2.matintnum;
pla_id mat_id
-100 1
-101 1
102 1
-103 2
+101 1
+100 1
104 2
+103 2
105 3
SELECT STRAIGHT_JOIN d.pla_id, m2.test FROM t1 m2 INNER JOIN (SELECT mp.pla_id, MIN(m1.matintnum) AS matintnum FROM t2 mp INNER JOIN t1 m1 ON mp.mat_id=m1.mat_id GROUP BY mp.pla_id) d ON d.matintnum=m2.matintnum;
pla_id test
-100 1
-101 1
102 1
-103 2
+101 1
+100 1
104 2
+103 2
105 3
explain SELECT STRAIGHT_JOIN d.pla_id, m2.mat_id FROM t1 m2 INNER JOIN (SELECT mp.pla_id, MIN(m1.matintnum) AS matintnum FROM t2 mp INNER JOIN t1 m1 ON mp.mat_id=m1.mat_id GROUP BY mp.pla_id) d ON d.matintnum=m2.matintnum;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY m2 ALL NULL NULL NULL NULL 9
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 6 Using where; Using join buffer
+1 PRIMARY <derived2> ref key0 key0 7 test.m2.matintnum 2 Using where
2 DERIVED mp ALL NULL NULL NULL NULL 9 Using temporary; Using filesort
2 DERIVED m1 eq_ref PRIMARY PRIMARY 3 test.mp.mat_id 1
explain SELECT STRAIGHT_JOIN d.pla_id, m2.test FROM t1 m2 INNER JOIN (SELECT mp.pla_id, MIN(m1.matintnum) AS matintnum FROM t2 mp INNER JOIN t1 m1 ON mp.mat_id=m1.mat_id GROUP BY mp.pla_id) d ON d.matintnum=m2.matintnum;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY m2 ALL NULL NULL NULL NULL 9
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 6 Using where; Using join buffer
+1 PRIMARY <derived2> ref key0 key0 7 test.m2.matintnum 2 Using where
2 DERIVED mp ALL NULL NULL NULL NULL 9 Using temporary; Using filesort
2 DERIVED m1 eq_ref PRIMARY PRIMARY 3 test.mp.mat_id 1
drop table t1,t2;
@@ -230,9 +227,8 @@ count(*)
2
explain select count(*) from t1 INNER JOIN (SELECT A.E1, A.E2, A.E3 FROM t1 AS A WHERE A.E3 = (SELECT MAX(B.E3) FROM t1 AS B WHERE A.E2 = B.E2)) AS THEMAX ON t1.E1 = THEMAX.E2 AND t1.E1 = t1.E2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
-1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 THEMAX.E2 1 Using where
-2 DERIVED A ALL NULL NULL NULL NULL 2 Using where
+1 SIMPLE A ALL NULL NULL NULL NULL 2 Using where
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 4 test.A.E2 1 Using where
3 DEPENDENT SUBQUERY B ALL NULL NULL NULL NULL 2 Using where
drop table t1;
create table t1 (a int);
@@ -245,8 +241,8 @@ a a
2 2
explain select * from ( select * from t1 union select * from t1) a,(select * from t1 union select * from t1) b;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
-1 PRIMARY <derived4> ALL NULL NULL NULL NULL 2 Using join buffer
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4
+1 PRIMARY <derived4> ALL NULL NULL NULL NULL 4 Using join buffer
4 DERIVED t1 ALL NULL NULL NULL NULL 2
5 UNION t1 ALL NULL NULL NULL NULL 2
NULL UNION RESULT <union4,5> ALL NULL NULL NULL NULL NULL
@@ -311,7 +307,7 @@ a 7.0000
b 3.5000
explain SELECT s.name, AVG(s.val) AS median FROM (SELECT x.name, x.val FROM t1 x, t1 y WHERE x.name=y.name GROUP BY x.name, x.val HAVING SUM(y.val <= x.val) >= COUNT(*)/2 AND SUM(y.val >= x.val) >= COUNT(*)/2) AS s GROUP BY s.name;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 3 Using temporary; Using filesort
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 289 Using temporary; Using filesort
2 DERIVED x ALL NULL NULL NULL NULL 17 Using temporary; Using filesort
2 DERIVED y ALL NULL NULL NULL NULL 17 Using where; Using join buffer
drop table t1;
@@ -322,8 +318,7 @@ id select_type table type possible_keys
1 SIMPLE t2 index PRIMARY PRIMARY 4 NULL 2 Using where; Using index
explain select a from (select a from t2 where a>1) tt;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t2 index PRIMARY PRIMARY 4 NULL 2 Using where; Using index
+1 SIMPLE t2 index PRIMARY PRIMARY 4 NULL 2 Using where; Using index
drop table t2;
CREATE TABLE `t1` ( `itemid` int(11) NOT NULL default '0', `grpid` varchar(15) NOT NULL default '', `vendor` int(11) NOT NULL default '0', `date_` date NOT NULL default '0000-00-00', `price` decimal(12,2) NOT NULL default '0.00', PRIMARY KEY (`itemid`,`grpid`,`vendor`,`date_`), KEY `itemid` (`itemid`,`vendor`), KEY `itemid_2` (`itemid`,`date_`));
insert into t1 values (128, 'rozn', 2, curdate(), 10),
=== added file 'mysql-test/r/derived_view.result'
--- a/mysql-test/r/derived_view.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/r/derived_view.result 2010-05-26 20:18:18 +0000
@@ -0,0 +1,570 @@
+drop table if exists t1,t2;
+drop view if exists v1,v2,v3,v4;
+create table t1(f1 int, f11 int);
+create table t2(f2 int, f22 int);
+insert into t1 values(1,1),(2,2),(3,3),(5,5),(9,9),(7,7);
+insert into t1 values(17,17),(13,13),(11,11),(15,15),(19,19);
+insert into t2 values(1,1),(3,3),(2,2),(4,4),(8,8),(6,6);
+insert into t2 values(12,12),(14,14),(10,10),(18,18),(16,16);
+Tests:
+for merged derived tables
+explain for simple derived
+explain select * from (select * from t1) tt;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11
+select * from (select * from t1) tt;
+f1 f11
+1 1
+2 2
+3 3
+5 5
+9 9
+7 7
+17 17
+13 13
+11 11
+15 15
+19 19
+explain for multitable derived
+explain extended select * from (select * from t1 join t2 on f1=f2) tt;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00
+1 SIMPLE t2 ALL NULL NULL NULL NULL 11 100.00 Using where; Using join buffer
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22` from `test`.`t1` join `test`.`t2` where (`test`.`t2`.`f2` = `test`.`t1`.`f1`)
+select * from (select * from t1 join t2 on f1=f2) tt;
+f1 f11 f2 f22
+1 1 1 1
+3 3 3 3
+2 2 2 2
+explain for derived with where
+explain extended
+select * from (select * from t1 where f1 in (2,3)) tt where f11=2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where ((`test`.`t1`.`f11` = 2) and (`test`.`t1`.`f1` in (2,3)))
+select * from (select * from t1 where f1 in (2,3)) tt where f11=2;
+f1 f11
+2 2
+join of derived
+explain extended
+select * from (select * from t1 where f1 in (2,3)) tt join
+(select * from t1 where f1 in (1,2)) aa on tt.f1=aa.f1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using join buffer
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` join `test`.`t1` where ((`test`.`t1`.`f1` = `test`.`t1`.`f1`) and (`test`.`t1`.`f1` in (1,2)) and (`test`.`t1`.`f1` in (2,3)))
+select * from (select * from t1 where f1 in (2,3)) tt join
+(select * from t1 where f1 in (1,2)) aa on tt.f1=aa.f1;
+f1 f11 f1 f11
+2 2 2 2
+flush status;
+explain extended
+select * from (select * from t1 where f1 in (2,3)) tt where f11=2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where ((`test`.`t1`.`f11` = 2) and (`test`.`t1`.`f1` in (2,3)))
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 0
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_rnd 0
+Handler_read_rnd_next 0
+flush status;
+select * from (select * from t1 where f1 in (2,3)) tt where f11=2;
+f1 f11
+2 2
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 0
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_rnd 0
+Handler_read_rnd_next 12
+for merged views
+create view v1 as select * from t1;
+create view v2 as select * from t1 join t2 on f1=f2;
+create view v3 as select * from t1 where f1 in (2,3);
+create view v4 as select * from t2 where f2 in (2,3);
+explain for simple views
+explain extended select * from v1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1`
+select * from v1;
+f1 f11
+1 1
+2 2
+3 3
+5 5
+9 9
+7 7
+17 17
+13 13
+11 11
+15 15
+19 19
+explain for multitable views
+explain extended select * from v2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00
+1 SIMPLE t2 ALL NULL NULL NULL NULL 11 100.00 Using where; Using join buffer
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22` from `test`.`t1` join `test`.`t2` where (`test`.`t2`.`f2` = `test`.`t1`.`f1`)
+select * from v2;
+f1 f11 f2 f22
+1 1 1 1
+3 3 3 3
+2 2 2 2
+explain for views with where
+explain extended select * from v3 where f11 in (1,3);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where ((`test`.`t1`.`f11` in (1,3)) and (`test`.`t1`.`f1` in (2,3)))
+select * from v3 where f11 in (1,3);
+f1 f11
+3 3
+explain for joined views
+explain extended
+select * from v3 join v4 on f1=f2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where
+1 SIMPLE t2 ALL NULL NULL NULL NULL 11 100.00 Using where; Using join buffer
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22` from `test`.`t1` join `test`.`t2` where ((`test`.`t2`.`f2` = `test`.`t1`.`f1`) and (`test`.`t1`.`f1` in (2,3)) and (`test`.`t1`.`f1` in (2,3)))
+select * from v3 join v4 on f1=f2;
+f1 f11 f2 f22
+3 3 3 3
+2 2 2 2
+flush status;
+explain extended select * from v4 where f2 in (1,3);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 11 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22` from `test`.`t2` where ((`test`.`t2`.`f2` in (1,3)) and (`test`.`t2`.`f2` in (2,3)))
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 0
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_rnd 0
+Handler_read_rnd_next 0
+flush status;
+select * from v4 where f2 in (1,3);
+f2 f22
+3 3
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 0
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_rnd 0
+Handler_read_rnd_next 12
+for materialized derived tables
+explain for simple derived
+explain extended select * from (select * from t1 group by f1) tt;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 11 100.00
+2 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+Warnings:
+Note 1003 select `tt`.`f1` AS `f1`,`tt`.`f11` AS `f11` from (select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` group by `test`.`t1`.`f1`) `tt`
+select * from (select * from t1 having f1=f1) tt;
+f1 f11
+1 1
+2 2
+3 3
+5 5
+9 9
+7 7
+17 17
+13 13
+11 11
+15 15
+19 19
+explain showing created indexes
+explain extended
+select * from t1 join (select * from t2 group by f2) tt on f1=f2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 11 100.00
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.f1 2 100.00 Using where
+2 DERIVED t2 ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`tt`.`f2` AS `f2`,`tt`.`f22` AS `f22` from `test`.`t1` join (select `test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22` from `test`.`t2` group by `test`.`t2`.`f2`) `tt` where (`tt`.`f2` = `test`.`t1`.`f1`)
+select * from t1 join (select * from t2 group by f2) tt on f1=f2;
+f1 f11 f2 f22
+1 1 1 1
+2 2 2 2
+3 3 3 3
+explain showing late materialization
+flush status;
+explain select * from t1 join (select * from t2 group by f2) tt on f1=f2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 11
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.f1 2 Using where
+2 DERIVED t2 ALL NULL NULL NULL NULL 11 Using temporary; Using filesort
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 0
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_rnd 0
+Handler_read_rnd_next 0
+flush status;
+select * from t1 join (select * from t2 group by f2) tt on f1=f2;
+f1 f11 f2 f22
+1 1 1 1
+2 2 2 2
+3 3 3 3
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 11
+Handler_read_next 3
+Handler_read_prev 0
+Handler_read_rnd 11
+Handler_read_rnd_next 36
+for materialized views
+drop view v1,v2,v3;
+create view v1 as select * from t1 group by f1;
+create view v2 as select * from t2 group by f2;
+create view v3 as select t1.f1,t1.f11 from t1 join t1 as t11 where t1.f1=t11.f1
+having t1.f1<100;
+explain for simple derived
+explain extended select * from v1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 11 100.00
+2 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+Warnings:
+Note 1003 select `v1`.`f1` AS `f1`,`v1`.`f11` AS `f11` from `test`.`v1`
+select * from v1;
+f1 f11
+1 1
+2 2
+3 3
+5 5
+7 7
+9 9
+11 11
+13 13
+15 15
+17 17
+19 19
+explain showing created indexes
+explain extended select * from t1 join v2 on f1=f2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 11 100.00
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.f1 2 100.00 Using where
+2 DERIVED t2 ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`v2`.`f2` AS `f2`,`v2`.`f22` AS `f22` from `test`.`t1` join `test`.`v2` where (`v2`.`f2` = `test`.`t1`.`f1`)
+select * from t1 join v2 on f1=f2;
+f1 f11 f2 f22
+1 1 1 1
+2 2 2 2
+3 3 3 3
+explain extended
+select * from t1,v3 as v31,v3 where t1.f1=v31.f1 and t1.f1=v3.f1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 11 100.00
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.f1 10 100.00 Using where
+1 PRIMARY <derived3> ref key0 key0 5 test.t1.f1 10 100.00 Using where
+3 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00
+3 DERIVED t11 ALL NULL NULL NULL NULL 11 100.00 Using where; Using join buffer
+2 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00
+2 DERIVED t11 ALL NULL NULL NULL NULL 11 100.00 Using where; Using join buffer
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`v31`.`f1` AS `f1`,`v31`.`f11` AS `f11`,`v3`.`f1` AS `f1`,`v3`.`f11` AS `f11` from `test`.`t1` join `test`.`v3` `v31` join `test`.`v3` where ((`v31`.`f1` = `test`.`t1`.`f1`) and (`v3`.`f1` = `test`.`t1`.`f1`))
+flush status;
+select * from t1,v3 as v31,v3 where t1.f1=v31.f1 and t1.f1=v3.f1;
+f1 f11 f1 f11 f1 f11
+1 1 1 1 1 1
+2 2 2 2 2 2
+3 3 3 3 3 3
+5 5 5 5 5 5
+9 9 9 9 9 9
+7 7 7 7 7 7
+17 17 17 17 17 17
+13 13 13 13 13 13
+11 11 11 11 11 11
+15 15 15 15 15 15
+19 19 19 19 19 19
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 22
+Handler_read_next 22
+Handler_read_prev 0
+Handler_read_rnd 0
+Handler_read_rnd_next 60
+explain showing late materialization
+flush status;
+explain select * from t1 join v2 on f1=f2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 11
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.f1 2 Using where
+2 DERIVED t2 ALL NULL NULL NULL NULL 11 Using temporary; Using filesort
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 0
+Handler_read_next 0
+Handler_read_prev 0
+Handler_read_rnd 0
+Handler_read_rnd_next 0
+flush status;
+select * from t1 join v2 on f1=f2;
+f1 f11 f2 f22
+1 1 1 1
+2 2 2 2
+3 3 3 3
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 11
+Handler_read_next 3
+Handler_read_prev 0
+Handler_read_rnd 11
+Handler_read_rnd_next 36
+explain extended select * from v1 join v4 on f1=f2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t2 ALL NULL NULL NULL NULL 11 100.00 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t2.f2 2 100.00 Using where
+2 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+Warnings:
+Note 1003 select `v1`.`f1` AS `f1`,`v1`.`f11` AS `f11`,`test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22` from `test`.`v1` join `test`.`t2` where ((`v1`.`f1` = `test`.`t2`.`f2`) and (`test`.`t2`.`f2` in (2,3)))
+select * from v1 join v4 on f1=f2;
+f1 f11 f2 f22
+3 3 3 3
+2 2 2 2
+merged derived in merged derived
+explain extended select * from (select * from
+(select * from t1 where f1 < 7) tt where f1 > 2) zz;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where ((`test`.`t1`.`f1` > 2) and (`test`.`t1`.`f1` < 7))
+select * from (select * from
+(select * from t1 where f1 < 7) tt where f1 > 2) zz;
+f1 f11
+3 3
+5 5
+materialized derived in merged derived
+explain extended select * from (select * from
+(select * from t1 where f1 < 7 group by f1) tt where f1 > 2) zz;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE <derived3> ALL NULL NULL NULL NULL 11 100.00 Using where
+3 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+Warnings:
+Note 1003 select `tt`.`f1` AS `f1`,`tt`.`f11` AS `f11` from (select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where (`test`.`t1`.`f1` < 7) group by `test`.`t1`.`f1`) `tt` where (`tt`.`f1` > 2)
+select * from (select * from
+(select * from t1 where f1 < 7 group by f1) tt where f1 > 2) zz;
+f1 f11
+3 3
+5 5
+merged derived in materialized derived
+explain extended select * from (select * from
+(select * from t1 where f1 < 7) tt where f1 > 2 group by f1) zz;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 11 100.00
+2 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+Warnings:
+Note 1003 select `zz`.`f1` AS `f1`,`zz`.`f11` AS `f11` from (select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where ((`test`.`t1`.`f1` > 2) and (`test`.`t1`.`f1` < 7)) group by `test`.`t1`.`f1`) `zz`
+select * from (select * from
+(select * from t1 where f1 < 7) tt where f1 > 2 group by f1) zz;
+f1 f11
+3 3
+5 5
+materialized derived in materialized derived
+explain extended select * from (select * from
+(select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) zz;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 11 100.00
+2 DERIVED <derived3> ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+3 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+Warnings:
+Note 1003 select `zz`.`f1` AS `f1`,`zz`.`f11` AS `f11` from (select `tt`.`f1` AS `f1`,`tt`.`f11` AS `f11` from (select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where (`test`.`t1`.`f1` < 7) group by `test`.`t1`.`f1`) `tt` where (`tt`.`f1` > 2) group by `tt`.`f1`) `zz`
+select * from (select * from
+(select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) zz;
+f1 f11
+3 3
+5 5
+mat in merged derived join mat in merged derived
+explain extended select * from
+(select * from (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) x
+join
+(select * from (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) z
+on x.f1 = z.f1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE <derived3> ALL key0 NULL NULL NULL 11 100.00 Using where
+1 SIMPLE <derived5> ref key0 key0 5 tt.f1 2 100.00 Using where
+5 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+3 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+Warnings:
+Note 1003 select `tt`.`f1` AS `f1`,`tt`.`f11` AS `f11`,`tt`.`f1` AS `f1`,`tt`.`f11` AS `f11` from (select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where (`test`.`t1`.`f1` < 7) group by `test`.`t1`.`f1`) `tt` join (select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where (`test`.`t1`.`f1` < 7) group by `test`.`t1`.`f1`) `tt` where ((`tt`.`f1` = `tt`.`f1`) and (`tt`.`f1` > 2) and (`tt`.`f1` > 2))
+flush status;
+select * from
+(select * from (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) x
+join
+(select * from (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) z
+on x.f1 = z.f1;
+f1 f11 f1 f11
+3 3 3 3
+5 5 5 5
+show status like 'Handler_read%';
+Variable_name Value
+Handler_read_first 0
+Handler_read_key 2
+Handler_read_next 2
+Handler_read_prev 0
+Handler_read_rnd 8
+Handler_read_rnd_next 39
+flush status;
+merged in merged derived join merged in merged derived
+explain extended select * from
+(select * from
+(select * from t1 where f1 < 7 ) tt where f1 > 2 ) x
+join
+(select * from
+(select * from t1 where f1 < 7 ) tt where f1 > 2 ) z
+on x.f1 = z.f1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using join buffer
+Warnings:
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` join `test`.`t1` where ((`test`.`t1`.`f1` = `test`.`t1`.`f1`) and (`test`.`t1`.`f1` > 2) and (`test`.`t1`.`f1` < 7) and (`test`.`t1`.`f1` > 2) and (`test`.`t1`.`f1` < 7))
+select * from
+(select * from
+(select * from t1 where f1 < 7 ) tt where f1 > 2 ) x
+join
+(select * from
+(select * from t1 where f1 < 7 ) tt where f1 > 2 ) z
+on x.f1 = z.f1;
+f1 f11 f1 f11
+3 3 3 3
+5 5 5 5
+materialized in materialized derived join
+materialized in materialized derived
+explain extended select * from
+(select * from
+(select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) x
+join
+(select * from
+(select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) z
+on x.f1 = z.f1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL key0 NULL NULL NULL 11 100.00
+1 PRIMARY <derived4> ref key0 key0 5 x.f1 2 100.00 Using where
+4 DERIVED <derived5> ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+5 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+2 DERIVED <derived3> ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+3 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+Warnings:
+Note 1003 select `x`.`f1` AS `f1`,`x`.`f11` AS `f11`,`z`.`f1` AS `f1`,`z`.`f11` AS `f11` from (select `tt`.`f1` AS `f1`,`tt`.`f11` AS `f11` from (select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where (`test`.`t1`.`f1` < 7) group by `test`.`t1`.`f1`) `tt` where (`tt`.`f1` > 2) group by `tt`.`f1`) `x` join (select `tt`.`f1` AS `f1`,`tt`.`f11` AS `f11` from (select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` where (`test`.`t1`.`f1` < 7) group by `test`.`t1`.`f1`) `tt` where (`tt`.`f1` > 2) group by `tt`.`f1`) `z` where (`z`.`f1` = `x`.`f1`)
+select * from
+(select * from
+(select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) x
+join
+(select * from
+(select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) z
+on x.f1 = z.f1;
+f1 f11 f1 f11
+3 3 3 3
+5 5 5 5
+merged view in materialized derived
+explain extended
+select * from (select * from v4 group by 1) tt;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 11 100.00
+2 DERIVED t2 ALL NULL NULL NULL NULL 11 100.00 Using where; Using temporary; Using filesort
+Warnings:
+Note 1003 select `tt`.`f2` AS `f2`,`tt`.`f22` AS `f22` from (select `test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22` from `test`.`t2` where (`test`.`t2`.`f2` in (2,3)) group by 1) `tt`
+select * from (select * from v4 group by 1) tt;
+f2 f22
+2 2
+3 3
+materialized view in merged derived
+explain extended
+select * from ( select * from v1 where f1 < 7) tt;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE <derived3> ALL NULL NULL NULL NULL 11 100.00 Using where
+3 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+Warnings:
+Note 1003 select `v1`.`f1` AS `f1`,`v1`.`f11` AS `f11` from `test`.`v1` where (`v1`.`f1` < 7)
+select * from ( select * from v1 where f1 < 7) tt;
+f1 f11
+1 1
+2 2
+3 3
+5 5
+merged view in a merged view in a merged derived
+create view v6 as select * from v4 where f2 < 7;
+explain extended select * from (select * from v6) tt;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 11 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22` from `test`.`t2` where ((`test`.`t2`.`f2` < 7) and (`test`.`t2`.`f2` in (2,3)))
+select * from (select * from v6) tt;
+f2 f22
+3 3
+2 2
+materialized view in a merged view in a materialized derived
+create view v7 as select * from v1;
+explain extended select * from (select * from v7 group by 1) tt;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 11 100.00
+2 DERIVED <derived4> ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+4 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+Warnings:
+Note 1003 select `tt`.`f1` AS `f1`,`tt`.`f11` AS `f11` from (select `v1`.`f1` AS `f1`,`v1`.`f11` AS `f11` from `test`.`v1` group by 1) `tt`
+select * from (select * from v7 group by 1) tt;
+f1 f11
+1 1
+2 2
+3 3
+5 5
+7 7
+9 9
+11 11
+13 13
+15 15
+17 17
+19 19
+join of above two
+explain extended select * from v6 join v7 on f2=f1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 11 100.00 Using where
+1 SIMPLE <derived5> ref key0 key0 5 test.t2.f2 2 100.00 Using where
+5 DERIVED t1 ALL NULL NULL NULL NULL 11 100.00 Using temporary; Using filesort
+Warnings:
+Note 1003 select `test`.`t2`.`f2` AS `f2`,`test`.`t2`.`f22` AS `f22`,`v1`.`f1` AS `f1`,`v1`.`f11` AS `f11` from `test`.`t2` join `test`.`v1` where ((`v1`.`f1` = `test`.`t2`.`f2`) and (`test`.`t2`.`f2` < 7) and (`test`.`t2`.`f2` in (2,3)))
+select * from v6 join v7 on f2=f1;
+f2 f22 f1 f11
+3 3 3 3
+2 2 2 2
+test two keys
+explain select * from t1 join (select * from t2 group by f2) tt on t1.f1=tt.f2 join t1 xx on tt.f22=xx.f1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 11
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.f1 2 Using where
+1 PRIMARY xx ALL NULL NULL NULL NULL 11 Using where; Using join buffer
+2 DERIVED t2 ALL NULL NULL NULL NULL 11 Using temporary; Using filesort
+select * from t1 join (select * from t2 group by f2) tt on t1.f1=tt.f2 join t1 xx on tt.f22=xx.f1;
+f1 f11 f2 f22 f1 f11
+1 1 1 1 1 1
+2 2 2 2 2 2
+3 3 3 3 3 3
+TODO: Add test with 64 tables mergeable view to test fall back to
+materialization on tables > MAX_TABLES merge
+drop table t1,t2;
+drop view v1,v2,v3,v4,v6,v7;
=== modified file 'mysql-test/r/explain.result'
--- a/mysql-test/r/explain.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/explain.result 2010-05-26 20:18:18 +0000
@@ -102,7 +102,7 @@ INSERT INTO t2 VALUES (),(),();
EXPLAIN SELECT 1 FROM
(SELECT 1 FROM t2,t1 WHERE b < c GROUP BY 1 LIMIT 1) AS d2;
id select_type table type possible_keys key key_len ref rows Extra
-X X X X X X X X X const row not found
+X X X X X X X X X
X X X X X X X X X
X X X X X X X X X Range checked for each record (index map: 0xFFFFFFFFFF)
DROP TABLE t2;
@@ -114,7 +114,7 @@ INSERT INTO t2 VALUES (1),(2);
EXPLAIN EXTENDED SELECT 1
FROM (SELECT COUNT(DISTINCT t1.a) FROM t1,t2 GROUP BY t1.a) AS s1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4 100.00
2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 Using temporary; Using filesort
2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using join buffer
Warnings:
@@ -122,7 +122,7 @@ Note 1003 select 1 AS `1` from (select c
EXPLAIN EXTENDED SELECT 1
FROM (SELECT COUNT(DISTINCT t1.a) FROM t1,t2 GROUP BY t1.a) AS s1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4 100.00
2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 Using temporary; Using filesort
2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using join buffer
Warnings:
@@ -132,7 +132,7 @@ prepare s1 from
FROM (SELECT COUNT(DISTINCT t1.a) FROM t1,t2 GROUP BY t1.a) AS s1';
execute s1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4 100.00
2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 Using temporary; Using filesort
2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using join buffer
Warnings:
@@ -142,14 +142,14 @@ prepare s1 from
FROM (SELECT COUNT(DISTINCT t1.a) FROM t1,t2 GROUP BY t1.a) AS s1';
execute s1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4 100.00
2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 Using temporary; Using filesort
2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using join buffer
Warnings:
Note 1003 select 1 AS `1` from (select count(distinct `test`.`t1`.`a`) AS `COUNT(DISTINCT t1.a)` from `test`.`t1` join `test`.`t2` group by `test`.`t1`.`a`) `s1`
execute s1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4 100.00
2 DERIVED t1 ALL NULL NULL NULL NULL 2 100.00 Using temporary; Using filesort
2 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using join buffer
Warnings:
=== modified file 'mysql-test/r/func_str.result'
--- a/mysql-test/r/func_str.result 2009-12-04 15:36:58 +0000
+++ b/mysql-test/r/func_str.result 2010-05-26 20:18:18 +0000
@@ -2549,14 +2549,12 @@ create table t1(f1 tinyint default null)
insert into t1 values (-1),(null);
explain select 1 as a from t1,(select decode(f1,f1) as b from t1) a;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 2
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 Using join buffer
-2 DERIVED t1 ALL NULL NULL NULL NULL 2
+1 SIMPLE t1 ALL NULL NULL NULL NULL 2
+1 SIMPLE t1 ALL NULL NULL NULL NULL 2 Using join buffer
explain select 1 as a from t1,(select encode(f1,f1) as b from t1) a;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 2
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 Using join buffer
-2 DERIVED t1 ALL NULL NULL NULL NULL 2
+1 SIMPLE t1 ALL NULL NULL NULL NULL 2
+1 SIMPLE t1 ALL NULL NULL NULL NULL 2 Using join buffer
drop table t1;
#
# Bug#49141: Encode function is significantly slower in 5.1 compared to 5.0
=== modified file 'mysql-test/r/index_merge_myisam.result'
--- a/mysql-test/r/index_merge_myisam.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/index_merge_myisam.result 2010-05-26 20:18:18 +0000
@@ -285,8 +285,7 @@ id select_type table type possible_keys
NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL
explain select * from (select * from t1 where key1 = 3 or key2 =3) as Z where key8 >5;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t1 index_merge i1,i2 i1,i2 4,4 NULL 2 Using union(i1,i2); Using where; Using index
+1 SIMPLE t1 ALL i1,i2,i8 NULL NULL NULL 1024 Using where
create table t3 like t0;
insert into t3 select * from t0;
alter table t3 add key9 int not null, add index i9(key9);
=== modified file 'mysql-test/r/information_schema.result'
--- a/mysql-test/r/information_schema.result 2010-03-15 11:51:23 +0000
+++ b/mysql-test/r/information_schema.result 2010-05-26 20:18:18 +0000
@@ -1285,8 +1285,7 @@ id select_type table type possible_keys
1 SIMPLE tables ALL NULL NULL NULL NULL NULL Open_frm_only; Scanned all databases; Using filesort
explain select * from (select table_name from information_schema.tables) as a;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 0 const row not found
-2 DERIVED tables ALL NULL NULL NULL NULL NULL Skip_open_table; Scanned all databases
+1 SIMPLE tables ALL NULL NULL NULL NULL NULL Skip_open_table; Scanned all databases
drop view v1;
create table t1 (f1 int(11));
create table t2 (f1 int(11), f2 int(11));
=== modified file 'mysql-test/r/innodb_lock_wait_timeout_1.result'
--- a/mysql-test/r/innodb_lock_wait_timeout_1.result 2009-11-12 11:43:33 +0000
+++ b/mysql-test/r/innodb_lock_wait_timeout_1.result 2010-05-26 20:18:18 +0000
@@ -104,7 +104,7 @@ id 1
select_type PRIMARY
table <derived2>
type ALL
-possible_keys NULL
+possible_keys key0
key NULL
key_len NULL
ref NULL
@@ -308,7 +308,7 @@ id 1
select_type PRIMARY
table <derived2>
type ALL
-possible_keys NULL
+possible_keys key0
key NULL
key_len NULL
ref NULL
=== modified file 'mysql-test/r/innodb_mysql.result'
--- a/mysql-test/r/innodb_mysql.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/innodb_mysql.result 2010-05-26 20:18:18 +0000
@@ -1731,8 +1731,8 @@ EXPLAIN
SELECT 1 FROM (SELECT COUNT(DISTINCT c1)
FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t1 index c3,c2 c2 10 NULL 5
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
+2 DERIVED t1 index_merge c3,c2 c3,c2 5,10 NULL 1 Using intersect(c3,c2); Using where; Using filesort
DROP TABLE t1;
CREATE TABLE t1 (c1 REAL, c2 REAL, c3 REAL, KEY (c3), KEY (c2, c3))
ENGINE=InnoDB;
@@ -1745,8 +1745,8 @@ EXPLAIN
SELECT 1 FROM (SELECT COUNT(DISTINCT c1)
FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t1 index c3,c2 c2 18 NULL 5
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
+2 DERIVED t1 index_merge c3,c2 c3,c2 9,18 NULL 1 Using intersect(c3,c2); Using where; Using filesort
DROP TABLE t1;
CREATE TABLE t1 (c1 DECIMAL(12,2), c2 DECIMAL(12,2), c3 DECIMAL(12,2),
KEY (c3), KEY (c2, c3))
@@ -1760,8 +1760,8 @@ EXPLAIN
SELECT 1 FROM (SELECT COUNT(DISTINCT c1)
FROM t1 WHERE c2 IN (1, 1) AND c3 = 2 GROUP BY c2) x;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1
-2 DERIVED t1 index c3,c2 c2 14 NULL 5
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
+2 DERIVED t1 index_merge c3,c2 c3,c2 7,14 NULL 1 Using intersect(c3,c2); Using where; Using filesort
DROP TABLE t1;
End of 5.1 tests
drop table if exists t1, t2, t3;
=== modified file 'mysql-test/r/lock_multi_bug38499.result'
--- a/mysql-test/r/lock_multi_bug38499.result 2009-08-28 21:49:16 +0000
+++ b/mysql-test/r/lock_multi_bug38499.result 2010-05-26 20:18:18 +0000
@@ -2,7 +2,9 @@ SET @odl_sync_frm = @@global.sync_frm;
SET @@global.sync_frm = OFF;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1( a INT, b INT );
+CREATE TABLE t2( a INT, b INT );
INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4);
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4);
# 1. test regular tables
# 1.1. test altering of columns that multiupdate doesn't use
# 1.1.1. normal mode
@@ -18,5 +20,5 @@ ALTER TABLE t1 ADD COLUMN a INT;
# 2.2. test altering of columns that multiupdate uses
# 2.2.1. normal mode
# 2.2.2. PS mode
-DROP TABLE t1;
+DROP TABLE t1,t2;
SET @@global.sync_frm = @odl_sync_frm;
=== modified file 'mysql-test/r/myisam_mrr.result'
--- a/mysql-test/r/myisam_mrr.result 2010-03-11 21:43:31 +0000
+++ b/mysql-test/r/myisam_mrr.result 2010-05-26 20:18:18 +0000
@@ -347,7 +347,7 @@ GROUP BY t2.pk
);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE
-2 SUBQUERY t2 ALL int_key int_key 5 3 33.33 Using index condition; Using filesort
+2 SUBQUERY t2 ALL int_key int_key 5 const 3 33.33 Using index condition; Using filesort
Warnings:
Note 1003 select min(`test`.`t1`.`pk`) AS `MIN(t1.pk)` from `test`.`t1` where 0
DROP TABLE t1, t2;
=== modified file 'mysql-test/r/ps.result'
--- a/mysql-test/r/ps.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/ps.result 2010-05-26 20:18:18 +0000
@@ -156,7 +156,6 @@ prepare stmt1 from @stmt ;
execute stmt1 ;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
-6 DERIVED NULL NULL NULL NULL NULL NULL NULL no matching row in const table
5 DEPENDENT SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
4 DEPENDENT SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
@@ -164,7 +163,6 @@ id select_type table type possible_keys
execute stmt1 ;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
-6 DERIVED NULL NULL NULL NULL NULL NULL NULL no matching row in const table
5 DEPENDENT SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
4 DEPENDENT SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
@@ -172,7 +170,6 @@ id select_type table type possible_keys
explain SELECT (SELECT SUM(c1 + c12 + 0.0) FROM t2 where (t1.c2 - 0e-3) = t2.c2 GROUP BY t1.c15 LIMIT 1) as scalar_s, exists (select 1.0e+0 from t2 where t2.c3 * 9.0000000000 = t1.c4) as exists_s, c5 * 4 in (select c6 + 0.3e+1 from t2) as in_s, (c7 - 4, c8 - 4) in (select c9 + 4.0, c10 + 40e-1 from t2) as in_row_s FROM t1, (select c25 x, c32 y from t2) tt WHERE x * 1 = c25;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
-6 DERIVED NULL NULL NULL NULL NULL NULL NULL no matching row in const table
5 DEPENDENT SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
4 DEPENDENT SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
=== modified file 'mysql-test/r/ps_ddl.result'
--- a/mysql-test/r/ps_ddl.result 2010-01-16 07:44:24 +0000
+++ b/mysql-test/r/ps_ddl.result 2010-05-26 20:18:18 +0000
@@ -1507,12 +1507,12 @@ create view v_27690_1 as select A.a, A.b
execute stmt;
a b a b
1 1 1 1
-2 2 1 1
-1 1 1 1
-2 2 1 1
1 1 2 2
+2 2 1 1
2 2 2 2
+1 1 1 1
1 1 2 2
+2 2 1 1
2 2 2 2
call p_verify_reprepare_count(1);
SUCCESS
@@ -1520,12 +1520,12 @@ SUCCESS
execute stmt;
a b a b
1 1 1 1
-2 2 1 1
-1 1 1 1
-2 2 1 1
1 1 2 2
+2 2 1 1
2 2 2 2
+1 1 1 1
1 1 2 2
+2 2 1 1
2 2 2 2
call p_verify_reprepare_count(0);
SUCCESS
=== modified file 'mysql-test/r/strict.result'
--- a/mysql-test/r/strict.result 2009-06-11 16:21:32 +0000
+++ b/mysql-test/r/strict.result 2010-05-26 20:18:18 +0000
@@ -1107,6 +1107,8 @@ Warnings:
Error 1411 Incorrect datetime value: '2004.12.12 10:22:61' for function str_to_date
Error 1411 Incorrect datetime value: '2004.12.12 10:22:61' for function str_to_date
Error 1411 Incorrect datetime value: '2004.12.12 10:22:61' for function str_to_date
+Error 1411 Incorrect datetime value: '2004.12.12 10:22:61' for function str_to_date
+Error 1411 Incorrect datetime value: '2004.12.12 10:22:61' for function str_to_date
drop table t1;
create table t1 (col1 char(3), col2 integer);
insert into t1 (col1) values (cast(1000 as char(3)));
=== modified file 'mysql-test/r/subselect.result'
--- a/mysql-test/r/subselect.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/subselect.result 2010-05-26 20:18:18 +0000
@@ -46,13 +46,13 @@ SELECT (SELECT a) as a;
ERROR 42S22: Reference 'a' not supported (forward reference in item list)
EXPLAIN EXTENDED SELECT 1 FROM (SELECT 1 as a) as b HAVING (SELECT a)=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used
2 DERIVED NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1276 Field or reference 'b.a' of SELECT #3 was resolved in SELECT #1
Note 1276 Field or reference 'b.a' of SELECT #3 was resolved in SELECT #1
-Note 1003 select 1 AS `1` from (select 1 AS `a`) `b` having ((select '1' AS `a`) = 1)
+Note 1003 select 1 AS `1` from (select 1 AS `a`) `b` having ((select `b`.`a` AS `a`) = 1)
SELECT 1 FROM (SELECT 1 as a) as b HAVING (SELECT a)=1;
1
1
@@ -201,11 +201,10 @@ select (select t3.a from t3 where a<8 or
explain extended select (select t3.a from t3 where a<8 order by 1 desc limit 1), a from
(select * from t2 where a>1) as tt;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived3> system NULL NULL NULL NULL 1 100.00
-3 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where
+1 PRIMARY t2 ALL NULL NULL NULL NULL 2 100.00 Using where
2 SUBQUERY t3 ALL NULL NULL NULL NULL 3 100.00 Using where; Using filesort
Warnings:
-Note 1003 select (select `test`.`t3`.`a` AS `a` from `test`.`t3` where (`test`.`t3`.`a` < 8) order by 1 desc limit 1) AS `(select t3.a from t3 where a<8 order by 1 desc limit 1)`,'2' AS `a` from (select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`a` > 1)) `tt`
+Note 1003 select (select `test`.`t3`.`a` AS `a` from `test`.`t3` where (`test`.`t3`.`a` < 8) order by 1 desc limit 1) AS `(select t3.a from t3 where a<8 order by 1 desc limit 1)`,`test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a` > 1)
select * from t1 where t1.a=(select t2.a from t2 where t2.b=(select max(a) from t3) order by 1 desc limit 1);
a
2
@@ -365,9 +364,9 @@ INSERT INTO t8 (pseudo,email) VALUES ('2
EXPLAIN EXTENDED SELECT pseudo,(SELECT email FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce')) FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce');
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
-4 SUBQUERY t8 const PRIMARY PRIMARY 37 1 100.00 Using index
+4 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
2 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00
-3 SUBQUERY t8 const PRIMARY PRIMARY 37 1 100.00 Using index
+3 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
Warnings:
Note 1003 select 'joce' AS `pseudo`,(select 'test' AS `email` from `test`.`t8` where 1) AS `(SELECT email FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce'))` from `test`.`t8` where 1
SELECT pseudo FROM t8 WHERE pseudo=(SELECT pseudo,email FROM
@@ -1339,7 +1338,7 @@ a
explain extended select * from t2 where t2.a in (select a from t1);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 index a a 5 NULL 4 100.00 Using index
-1 PRIMARY t1 ref a a 5 test.t2.a 101 100.00 Using index; FirstMatch(t2)
+1 PRIMARY t1 ref a a 5 test.t2.a 100 100.00 Using index; FirstMatch(t2)
Warnings:
Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1`) where (`test`.`t1`.`a` = `test`.`t2`.`a`)
select * from t2 where t2.a in (select a from t1 where t1.b <> 30);
@@ -1349,7 +1348,7 @@ a
explain extended select * from t2 where t2.a in (select a from t1 where t1.b <> 30);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 index a a 5 NULL 4 100.00 Using index
-1 PRIMARY t1 ref a a 5 test.t2.a 101 100.00 Using where; Using index; FirstMatch(t2)
+1 PRIMARY t1 ref a a 5 test.t2.a 100 100.00 Using where; Using index; FirstMatch(t2)
Warnings:
Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`a` = `test`.`t2`.`a`) and (`test`.`t1`.`b` <> 30))
select * from t2 where t2.a in (select t1.a from t1,t3 where t1.b=t3.a);
@@ -1360,7 +1359,7 @@ explain extended select * from t2 where
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 index a a 5 NULL 4 100.00 Using index
1 PRIMARY t3 index a a 5 NULL 3 100.00 Using index
-1 PRIMARY t1 ref a a 10 test.t2.a,test.t3.a 116 100.61 Using index; FirstMatch(t2)
+1 PRIMARY t1 ref a a 10 test.t2.a,test.t3.a 11 100.00 Using index; FirstMatch(t2)
Warnings:
Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1` join `test`.`t3`) where ((`test`.`t1`.`a` = `test`.`t2`.`a`) and (`test`.`t1`.`b` = `test`.`t3`.`a`))
insert into t1 values (3,31);
@@ -1376,7 +1375,7 @@ a
explain extended select * from t2 where t2.a in (select a from t1 where t1.b <> 30);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 index a a 5 NULL 4 100.00 Using index
-1 PRIMARY t1 ref a a 5 test.t2.a 101 100.00 Using where; Using index; FirstMatch(t2)
+1 PRIMARY t1 ref a a 5 test.t2.a 100 100.00 Using where; Using index; FirstMatch(t2)
Warnings:
Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`a` = `test`.`t2`.`a`) and (`test`.`t1`.`b` <> 30))
drop table t0, t1, t2, t3;
=== modified file 'mysql-test/r/subselect3.result'
--- a/mysql-test/r/subselect3.result 2010-03-29 14:04:35 +0000
+++ b/mysql-test/r/subselect3.result 2010-05-26 20:18:18 +0000
@@ -879,7 +879,7 @@ Level Code Message
Note 1276 Field or reference 'test.t1.a' of SELECT #3 was resolved in SELECT #2
Note 1276 Field or reference 'test.t1.c' of SELECT #3 was resolved in SELECT #2
Error 1054 Unknown column 'c' in 'field list'
-Note 1003 select `c` AS `c` from (select (select count(`test`.`t1`.`a`) AS `COUNT(a)` from (select count(`test`.`t1`.`b`) AS `COUNT(b)` from `test`.`t1`) `x` group by `t1`.`c`) AS `(SELECT COUNT(a) FROM
+Note 1003 select `c` AS `c` from (select (select count(`test`.`t1`.`a`) AS `COUNT(a)` from (select count(`test`.`t1`.`b`) AS `COUNT(b)` from `test`.`t1`) `x` group by `test`.`t1`.`c`) AS `(SELECT COUNT(a) FROM
(SELECT COUNT(b) FROM t1) AS x GROUP BY c
)` from `test`.`t1` group by `test`.`t1`.`b`) `y`
DROP TABLE t1;
@@ -1105,9 +1105,8 @@ a
set @@optimizer_switch=default;
explain select * from (select a from t0) X where a in (select a from t1);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 11
-1 PRIMARY t1 ALL NULL NULL NULL NULL 20 Using where; FirstMatch(<derived2>)
-2 DERIVED t0 ALL NULL NULL NULL NULL 11
+1 PRIMARY t0 ALL NULL NULL NULL NULL 11
+1 PRIMARY t1 ALL NULL NULL NULL NULL 20 Using where; FirstMatch(t0)
drop table t0, t1;
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
=== modified file 'mysql-test/r/subselect3_jcl6.result'
--- a/mysql-test/r/subselect3_jcl6.result 2010-03-29 14:04:35 +0000
+++ b/mysql-test/r/subselect3_jcl6.result 2010-05-26 20:18:18 +0000
@@ -883,7 +883,7 @@ Level Code Message
Note 1276 Field or reference 'test.t1.a' of SELECT #3 was resolved in SELECT #2
Note 1276 Field or reference 'test.t1.c' of SELECT #3 was resolved in SELECT #2
Error 1054 Unknown column 'c' in 'field list'
-Note 1003 select `c` AS `c` from (select (select count(`test`.`t1`.`a`) AS `COUNT(a)` from (select count(`test`.`t1`.`b`) AS `COUNT(b)` from `test`.`t1`) `x` group by `t1`.`c`) AS `(SELECT COUNT(a) FROM
+Note 1003 select `c` AS `c` from (select (select count(`test`.`t1`.`a`) AS `COUNT(a)` from (select count(`test`.`t1`.`b`) AS `COUNT(b)` from `test`.`t1`) `x` group by `test`.`t1`.`c`) AS `(SELECT COUNT(a) FROM
(SELECT COUNT(b) FROM t1) AS x GROUP BY c
)` from `test`.`t1` group by `test`.`t1`.`b`) `y`
DROP TABLE t1;
@@ -1110,9 +1110,8 @@ a
set @@optimizer_switch=default;
explain select * from (select a from t0) X where a in (select a from t1);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 11
-1 PRIMARY t1 ALL NULL NULL NULL NULL 20 Using where; FirstMatch(<derived2>); Using join buffer
-2 DERIVED t0 ALL NULL NULL NULL NULL 11
+1 PRIMARY t0 ALL NULL NULL NULL NULL 11
+1 PRIMARY t1 ALL NULL NULL NULL NULL 20 Using where; FirstMatch(t0); Using join buffer
drop table t0, t1;
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
=== modified file 'mysql-test/r/subselect_no_mat.result'
--- a/mysql-test/r/subselect_no_mat.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/subselect_no_mat.result 2010-05-26 20:18:18 +0000
@@ -50,13 +50,13 @@ SELECT (SELECT a) as a;
ERROR 42S22: Reference 'a' not supported (forward reference in item list)
EXPLAIN EXTENDED SELECT 1 FROM (SELECT 1 as a) as b HAVING (SELECT a)=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used
2 DERIVED NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1276 Field or reference 'b.a' of SELECT #3 was resolved in SELECT #1
Note 1276 Field or reference 'b.a' of SELECT #3 was resolved in SELECT #1
-Note 1003 select 1 AS `1` from (select 1 AS `a`) `b` having ((select '1' AS `a`) = 1)
+Note 1003 select 1 AS `1` from (select 1 AS `a`) `b` having ((select `b`.`a` AS `a`) = 1)
SELECT 1 FROM (SELECT 1 as a) as b HAVING (SELECT a)=1;
1
1
@@ -205,11 +205,10 @@ select (select t3.a from t3 where a<8 or
explain extended select (select t3.a from t3 where a<8 order by 1 desc limit 1), a from
(select * from t2 where a>1) as tt;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived3> system NULL NULL NULL NULL 1 100.00
-3 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where
+1 PRIMARY t2 ALL NULL NULL NULL NULL 2 100.00 Using where
2 SUBQUERY t3 ALL NULL NULL NULL NULL 3 100.00 Using where; Using filesort
Warnings:
-Note 1003 select (select `test`.`t3`.`a` AS `a` from `test`.`t3` where (`test`.`t3`.`a` < 8) order by 1 desc limit 1) AS `(select t3.a from t3 where a<8 order by 1 desc limit 1)`,'2' AS `a` from (select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`a` > 1)) `tt`
+Note 1003 select (select `test`.`t3`.`a` AS `a` from `test`.`t3` where (`test`.`t3`.`a` < 8) order by 1 desc limit 1) AS `(select t3.a from t3 where a<8 order by 1 desc limit 1)`,`test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a` > 1)
select * from t1 where t1.a=(select t2.a from t2 where t2.b=(select max(a) from t3) order by 1 desc limit 1);
a
2
@@ -369,9 +368,9 @@ INSERT INTO t8 (pseudo,email) VALUES ('2
EXPLAIN EXTENDED SELECT pseudo,(SELECT email FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce')) FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce');
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
-4 SUBQUERY t8 const PRIMARY PRIMARY 37 1 100.00 Using index
+4 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
2 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00
-3 SUBQUERY t8 const PRIMARY PRIMARY 37 1 100.00 Using index
+3 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
Warnings:
Note 1003 select 'joce' AS `pseudo`,(select 'test' AS `email` from `test`.`t8` where 1) AS `(SELECT email FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce'))` from `test`.`t8` where 1
SELECT pseudo FROM t8 WHERE pseudo=(SELECT pseudo,email FROM
@@ -1343,7 +1342,7 @@ a
explain extended select * from t2 where t2.a in (select a from t1);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 index a a 5 NULL 4 100.00 Using index
-1 PRIMARY t1 ref a a 5 test.t2.a 101 100.00 Using index; FirstMatch(t2)
+1 PRIMARY t1 ref a a 5 test.t2.a 100 100.00 Using index; FirstMatch(t2)
Warnings:
Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1`) where (`test`.`t1`.`a` = `test`.`t2`.`a`)
select * from t2 where t2.a in (select a from t1 where t1.b <> 30);
@@ -1353,7 +1352,7 @@ a
explain extended select * from t2 where t2.a in (select a from t1 where t1.b <> 30);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 index a a 5 NULL 4 100.00 Using index
-1 PRIMARY t1 ref a a 5 test.t2.a 101 100.00 Using where; Using index; FirstMatch(t2)
+1 PRIMARY t1 ref a a 5 test.t2.a 100 100.00 Using where; Using index; FirstMatch(t2)
Warnings:
Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`a` = `test`.`t2`.`a`) and (`test`.`t1`.`b` <> 30))
select * from t2 where t2.a in (select t1.a from t1,t3 where t1.b=t3.a);
@@ -1364,7 +1363,7 @@ explain extended select * from t2 where
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 index a a 5 NULL 4 100.00 Using index
1 PRIMARY t3 index a a 5 NULL 3 100.00 Using index
-1 PRIMARY t1 ref a a 10 test.t2.a,test.t3.a 116 100.61 Using index; FirstMatch(t2)
+1 PRIMARY t1 ref a a 10 test.t2.a,test.t3.a 11 100.00 Using index; FirstMatch(t2)
Warnings:
Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1` join `test`.`t3`) where ((`test`.`t1`.`a` = `test`.`t2`.`a`) and (`test`.`t1`.`b` = `test`.`t3`.`a`))
insert into t1 values (3,31);
@@ -1380,7 +1379,7 @@ a
explain extended select * from t2 where t2.a in (select a from t1 where t1.b <> 30);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 index a a 5 NULL 4 100.00 Using index
-1 PRIMARY t1 ref a a 5 test.t2.a 101 100.00 Using where; Using index; FirstMatch(t2)
+1 PRIMARY t1 ref a a 5 test.t2.a 100 100.00 Using where; Using index; FirstMatch(t2)
Warnings:
Note 1003 select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`a` = `test`.`t2`.`a`) and (`test`.`t1`.`b` <> 30))
drop table t0, t1, t2, t3;
=== modified file 'mysql-test/r/subselect_no_opts.result'
--- a/mysql-test/r/subselect_no_opts.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/subselect_no_opts.result 2010-05-26 20:18:18 +0000
@@ -50,13 +50,13 @@ SELECT (SELECT a) as a;
ERROR 42S22: Reference 'a' not supported (forward reference in item list)
EXPLAIN EXTENDED SELECT 1 FROM (SELECT 1 as a) as b HAVING (SELECT a)=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used
2 DERIVED NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1276 Field or reference 'b.a' of SELECT #3 was resolved in SELECT #1
Note 1276 Field or reference 'b.a' of SELECT #3 was resolved in SELECT #1
-Note 1003 select 1 AS `1` from (select 1 AS `a`) `b` having ((select '1' AS `a`) = 1)
+Note 1003 select 1 AS `1` from (select 1 AS `a`) `b` having ((select `b`.`a` AS `a`) = 1)
SELECT 1 FROM (SELECT 1 as a) as b HAVING (SELECT a)=1;
1
1
@@ -205,11 +205,10 @@ select (select t3.a from t3 where a<8 or
explain extended select (select t3.a from t3 where a<8 order by 1 desc limit 1), a from
(select * from t2 where a>1) as tt;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived3> system NULL NULL NULL NULL 1 100.00
-3 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where
+1 PRIMARY t2 ALL NULL NULL NULL NULL 2 100.00 Using where
2 SUBQUERY t3 ALL NULL NULL NULL NULL 3 100.00 Using where; Using filesort
Warnings:
-Note 1003 select (select `test`.`t3`.`a` AS `a` from `test`.`t3` where (`test`.`t3`.`a` < 8) order by 1 desc limit 1) AS `(select t3.a from t3 where a<8 order by 1 desc limit 1)`,'2' AS `a` from (select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`a` > 1)) `tt`
+Note 1003 select (select `test`.`t3`.`a` AS `a` from `test`.`t3` where (`test`.`t3`.`a` < 8) order by 1 desc limit 1) AS `(select t3.a from t3 where a<8 order by 1 desc limit 1)`,`test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a` > 1)
select * from t1 where t1.a=(select t2.a from t2 where t2.b=(select max(a) from t3) order by 1 desc limit 1);
a
2
@@ -369,9 +368,9 @@ INSERT INTO t8 (pseudo,email) VALUES ('2
EXPLAIN EXTENDED SELECT pseudo,(SELECT email FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce')) FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce');
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
-4 SUBQUERY t8 const PRIMARY PRIMARY 37 1 100.00 Using index
+4 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
2 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00
-3 SUBQUERY t8 const PRIMARY PRIMARY 37 1 100.00 Using index
+3 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
Warnings:
Note 1003 select 'joce' AS `pseudo`,(select 'test' AS `email` from `test`.`t8` where 1) AS `(SELECT email FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce'))` from `test`.`t8` where 1
SELECT pseudo FROM t8 WHERE pseudo=(SELECT pseudo,email FROM
=== modified file 'mysql-test/r/subselect_no_semijoin.result'
--- a/mysql-test/r/subselect_no_semijoin.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/subselect_no_semijoin.result 2010-05-26 20:18:18 +0000
@@ -50,13 +50,13 @@ SELECT (SELECT a) as a;
ERROR 42S22: Reference 'a' not supported (forward reference in item list)
EXPLAIN EXTENDED SELECT 1 FROM (SELECT 1 as a) as b HAVING (SELECT a)=1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 1 100.00
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 100.00
3 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used
2 DERIVED NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1276 Field or reference 'b.a' of SELECT #3 was resolved in SELECT #1
Note 1276 Field or reference 'b.a' of SELECT #3 was resolved in SELECT #1
-Note 1003 select 1 AS `1` from (select 1 AS `a`) `b` having ((select '1' AS `a`) = 1)
+Note 1003 select 1 AS `1` from (select 1 AS `a`) `b` having ((select `b`.`a` AS `a`) = 1)
SELECT 1 FROM (SELECT 1 as a) as b HAVING (SELECT a)=1;
1
1
@@ -205,11 +205,10 @@ select (select t3.a from t3 where a<8 or
explain extended select (select t3.a from t3 where a<8 order by 1 desc limit 1), a from
(select * from t2 where a>1) as tt;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived3> system NULL NULL NULL NULL 1 100.00
-3 DERIVED t2 ALL NULL NULL NULL NULL 2 100.00 Using where
+1 PRIMARY t2 ALL NULL NULL NULL NULL 2 100.00 Using where
2 SUBQUERY t3 ALL NULL NULL NULL NULL 3 100.00 Using where; Using filesort
Warnings:
-Note 1003 select (select `test`.`t3`.`a` AS `a` from `test`.`t3` where (`test`.`t3`.`a` < 8) order by 1 desc limit 1) AS `(select t3.a from t3 where a<8 order by 1 desc limit 1)`,'2' AS `a` from (select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`a` > 1)) `tt`
+Note 1003 select (select `test`.`t3`.`a` AS `a` from `test`.`t3` where (`test`.`t3`.`a` < 8) order by 1 desc limit 1) AS `(select t3.a from t3 where a<8 order by 1 desc limit 1)`,`test`.`t2`.`a` AS `a` from `test`.`t2` where (`test`.`t2`.`a` > 1)
select * from t1 where t1.a=(select t2.a from t2 where t2.b=(select max(a) from t3) order by 1 desc limit 1);
a
2
@@ -369,9 +368,9 @@ INSERT INTO t8 (pseudo,email) VALUES ('2
EXPLAIN EXTENDED SELECT pseudo,(SELECT email FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce')) FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce');
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
-4 SUBQUERY t8 const PRIMARY PRIMARY 37 1 100.00 Using index
+4 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
2 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00
-3 SUBQUERY t8 const PRIMARY PRIMARY 37 1 100.00 Using index
+3 SUBQUERY t8 const PRIMARY PRIMARY 37 const 1 100.00 Using index
Warnings:
Note 1003 select 'joce' AS `pseudo`,(select 'test' AS `email` from `test`.`t8` where 1) AS `(SELECT email FROM t8 WHERE pseudo=(SELECT pseudo FROM t8 WHERE pseudo='joce'))` from `test`.`t8` where 1
SELECT pseudo FROM t8 WHERE pseudo=(SELECT pseudo,email FROM
=== modified file 'mysql-test/r/table_elim.result'
--- a/mysql-test/r/table_elim.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/table_elim.result 2010-05-26 20:18:18 +0000
@@ -117,58 +117,58 @@ t2 where id=f.id);
This should use one table:
explain select id from v1 where id=2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY f const PRIMARY PRIMARY 4 const 1 Using index
+1 SIMPLE f const PRIMARY PRIMARY 4 const 1 Using index
This should use one table:
explain extended select id from v1 where id in (1,2,3,4);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY f range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index
+1 SIMPLE f range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index
Warnings:
-Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` where (`f`.`id` in (1,2,3,4))
This should use facts and a1 tables:
explain extended select id from v1 where attr1 between 12 and 14;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY a1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using index condition; Using MRR
-1 PRIMARY f eq_ref PRIMARY PRIMARY 4 test.a1.id 1 100.00 Using index
+1 SIMPLE a1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using index condition; Using MRR
+1 SIMPLE f eq_ref PRIMARY PRIMARY 4 test.a1.id 1 100.00 Using index
Warnings:
-Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` join `test`.`t1` `a1` where ((`f`.`id` = `a1`.`id`) and (`a1`.`attr1` between 12 and 14))
This should use facts, a2 and its subquery:
explain extended select id from v1 where attr2 between 12 and 14;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY a2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using index condition; Using where; Using MRR
-1 PRIMARY f eq_ref PRIMARY PRIMARY 4 test.a2.id 1 100.00 Using index
+1 SIMPLE a2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using index condition; Using where; Using MRR
+1 SIMPLE f eq_ref PRIMARY PRIMARY 4 test.a2.id 1 100.00 Using index
3 DEPENDENT SUBQUERY t2 ref PRIMARY PRIMARY 4 test.a2.id 2 100.00 Using index
Warnings:
-Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` join `test`.`t2` `a2` where ((`f`.`id` = `a2`.`id`) and (`a2`.`attr2` between 12 and 14) and (`a2`.`fromdate` = (select max(`test`.`t2`.`fromdate`) AS `MAX(fromdate)` from `test`.`t2` where (`test`.`t2`.`id` = `a2`.`id`))))
This should use one table:
explain select id from v2 where id=2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY f const PRIMARY PRIMARY 4 const 1 Using index
+1 SIMPLE f const PRIMARY PRIMARY 4 const 1 Using index
This should use one table:
explain extended select id from v2 where id in (1,2,3,4);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY f range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index
+1 SIMPLE f range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index
Warnings:
-Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` where (`f`.`id` in (1,2,3,4))
This should use facts and a1 tables:
explain extended select id from v2 where attr1 between 12 and 14;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY a1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using index condition; Using MRR
-1 PRIMARY f eq_ref PRIMARY PRIMARY 4 test.a1.id 1 100.00 Using index
+1 SIMPLE a1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using index condition; Using MRR
+1 SIMPLE f eq_ref PRIMARY PRIMARY 4 test.a1.id 1 100.00 Using index
Warnings:
-Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` join `test`.`t1` `a1` where ((`f`.`id` = `a1`.`id`) and (`a1`.`attr1` between 12 and 14))
This should use facts, a2 and its subquery:
explain extended select id from v2 where attr2 between 12 and 14;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY a2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using index condition; Using MRR
-1 PRIMARY f eq_ref PRIMARY PRIMARY 4 test.a2.id 1 100.00 Using where; Using index
+1 SIMPLE a2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using index condition; Using MRR
+1 SIMPLE f eq_ref PRIMARY PRIMARY 4 test.a2.id 1 100.00 Using where; Using index
3 DEPENDENT SUBQUERY t2 ref PRIMARY PRIMARY 4 test.f.id 2 100.00 Using index
Warnings:
-Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` join `test`.`t2` `a2` where ((`f`.`id` = `a2`.`id`) and (`a2`.`attr2` between 12 and 14) and (`a2`.`fromdate` = (select max(`test`.`t2`.`fromdate`) AS `MAX(fromdate)` from `test`.`t2` where (`test`.`t2`.`id` = `f`.`id`))))
drop view v1, v2;
drop table t0, t1, t2;
=== modified file 'mysql-test/r/view.result'
--- a/mysql-test/r/view.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/view.result 2010-05-26 20:18:18 +0000
@@ -117,7 +117,7 @@ c
12
explain extended select c from v5;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <derived3> ALL NULL NULL NULL NULL 5 100.00
+1 SIMPLE <derived3> ALL NULL NULL NULL NULL 5 100.00
3 DERIVED t1 ALL NULL NULL NULL NULL 5 100.00
Warnings:
Note 1003 select (`v2`.`c` + 1) AS `c` from `test`.`v2`
@@ -237,7 +237,7 @@ a
3
explain select * from v1;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 3
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 6
2 DERIVED t1 ALL NULL NULL NULL NULL 6 Using temporary
select * from t1;
a
@@ -302,7 +302,7 @@ a+1
4
explain select * from v1;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 4
2 DERIVED t1 ALL NULL NULL NULL NULL 4 Using filesort
drop view v1;
drop table t1;
=== modified file 'mysql-test/r/view_grant.result'
--- a/mysql-test/r/view_grant.result 2009-10-16 11:12:21 +0000
+++ b/mysql-test/r/view_grant.result 2010-05-26 20:18:18 +0000
@@ -110,7 +110,7 @@ show create view mysqltest.v1;
ERROR 42000: SHOW VIEW command denied to user 'mysqltest_1'@'localhost' for table 'v1'
explain select c from mysqltest.v2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 0 const row not found
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
2 DERIVED NULL NULL NULL NULL NULL NULL NULL no matching row in const table
show create view mysqltest.v2;
ERROR 42000: SHOW VIEW command denied to user 'mysqltest_1'@'localhost' for table 'v2'
@@ -131,7 +131,7 @@ View Create View character_set_client co
v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `mysqltest`.`v1` AS select (`mysqltest`.`t1`.`a` + 1) AS `c`,(`mysqltest`.`t1`.`b` + 1) AS `d` from `mysqltest`.`t1` latin1 latin1_swedish_ci
explain select c from mysqltest.v2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 0 const row not found
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
2 DERIVED NULL NULL NULL NULL NULL NULL NULL no matching row in const table
show create view mysqltest.v2;
View Create View character_set_client collation_connection
@@ -144,7 +144,7 @@ View Create View character_set_client co
v3 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `mysqltest`.`v3` AS select (`mysqltest`.`t2`.`a` + 1) AS `c`,(`mysqltest`.`t2`.`b` + 1) AS `d` from `mysqltest`.`t2` latin1 latin1_swedish_ci
explain select c from mysqltest.v4;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> system NULL NULL NULL NULL 0 const row not found
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2
2 DERIVED NULL NULL NULL NULL NULL NULL NULL no matching row in const table
show create view mysqltest.v4;
View Create View character_set_client collation_connection
=== added file 'mysql-test/t/derived_view.test'
--- a/mysql-test/t/derived_view.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/t/derived_view.test 2010-05-26 20:18:18 +0000
@@ -0,0 +1,217 @@
+--disable_warnings
+drop table if exists t1,t2;
+drop view if exists v1,v2,v3,v4;
+--enable_warnings
+create table t1(f1 int, f11 int);
+create table t2(f2 int, f22 int);
+insert into t1 values(1,1),(2,2),(3,3),(5,5),(9,9),(7,7);
+insert into t1 values(17,17),(13,13),(11,11),(15,15),(19,19);
+insert into t2 values(1,1),(3,3),(2,2),(4,4),(8,8),(6,6);
+insert into t2 values(12,12),(14,14),(10,10),(18,18),(16,16);
+
+--echo Tests:
+
+--echo for merged derived tables
+--echo explain for simple derived
+explain select * from (select * from t1) tt;
+select * from (select * from t1) tt;
+--echo explain for multitable derived
+explain extended select * from (select * from t1 join t2 on f1=f2) tt;
+select * from (select * from t1 join t2 on f1=f2) tt;
+--echo explain for derived with where
+explain extended
+ select * from (select * from t1 where f1 in (2,3)) tt where f11=2;
+select * from (select * from t1 where f1 in (2,3)) tt where f11=2;
+--echo join of derived
+explain extended
+ select * from (select * from t1 where f1 in (2,3)) tt join
+ (select * from t1 where f1 in (1,2)) aa on tt.f1=aa.f1;
+select * from (select * from t1 where f1 in (2,3)) tt join
+ (select * from t1 where f1 in (1,2)) aa on tt.f1=aa.f1;
+
+flush status;
+explain extended
+ select * from (select * from t1 where f1 in (2,3)) tt where f11=2;
+show status like 'Handler_read%';
+flush status;
+select * from (select * from t1 where f1 in (2,3)) tt where f11=2;
+show status like 'Handler_read%';
+
+--echo for merged views
+create view v1 as select * from t1;
+create view v2 as select * from t1 join t2 on f1=f2;
+create view v3 as select * from t1 where f1 in (2,3);
+create view v4 as select * from t2 where f2 in (2,3);
+--echo explain for simple views
+explain extended select * from v1;
+select * from v1;
+--echo explain for multitable views
+explain extended select * from v2;
+select * from v2;
+--echo explain for views with where
+explain extended select * from v3 where f11 in (1,3);
+select * from v3 where f11 in (1,3);
+--echo explain for joined views
+explain extended
+ select * from v3 join v4 on f1=f2;
+select * from v3 join v4 on f1=f2;
+
+flush status;
+explain extended select * from v4 where f2 in (1,3);
+show status like 'Handler_read%';
+flush status;
+select * from v4 where f2 in (1,3);
+show status like 'Handler_read%';
+
+--echo for materialized derived tables
+--echo explain for simple derived
+explain extended select * from (select * from t1 group by f1) tt;
+select * from (select * from t1 having f1=f1) tt;
+--echo explain showing created indexes
+explain extended
+ select * from t1 join (select * from t2 group by f2) tt on f1=f2;
+select * from t1 join (select * from t2 group by f2) tt on f1=f2;
+--echo explain showing late materialization
+flush status;
+explain select * from t1 join (select * from t2 group by f2) tt on f1=f2;
+show status like 'Handler_read%';
+flush status;
+select * from t1 join (select * from t2 group by f2) tt on f1=f2;
+show status like 'Handler_read%';
+
+--echo for materialized views
+drop view v1,v2,v3;
+create view v1 as select * from t1 group by f1;
+create view v2 as select * from t2 group by f2;
+create view v3 as select t1.f1,t1.f11 from t1 join t1 as t11 where t1.f1=t11.f1
+ having t1.f1<100;
+--echo explain for simple derived
+explain extended select * from v1;
+select * from v1;
+--echo explain showing created indexes
+explain extended select * from t1 join v2 on f1=f2;
+select * from t1 join v2 on f1=f2;
+explain extended
+ select * from t1,v3 as v31,v3 where t1.f1=v31.f1 and t1.f1=v3.f1;
+flush status;
+select * from t1,v3 as v31,v3 where t1.f1=v31.f1 and t1.f1=v3.f1;
+show status like 'Handler_read%';
+--echo explain showing late materialization
+flush status;
+explain select * from t1 join v2 on f1=f2;
+show status like 'Handler_read%';
+flush status;
+select * from t1 join v2 on f1=f2;
+show status like 'Handler_read%';
+
+explain extended select * from v1 join v4 on f1=f2;
+select * from v1 join v4 on f1=f2;
+
+--echo merged derived in merged derived
+explain extended select * from (select * from
+ (select * from t1 where f1 < 7) tt where f1 > 2) zz;
+select * from (select * from
+ (select * from t1 where f1 < 7) tt where f1 > 2) zz;
+
+--echo materialized derived in merged derived
+explain extended select * from (select * from
+ (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) zz;
+select * from (select * from
+ (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) zz;
+
+--echo merged derived in materialized derived
+explain extended select * from (select * from
+ (select * from t1 where f1 < 7) tt where f1 > 2 group by f1) zz;
+select * from (select * from
+ (select * from t1 where f1 < 7) tt where f1 > 2 group by f1) zz;
+
+--echo materialized derived in materialized derived
+explain extended select * from (select * from
+ (select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) zz;
+select * from (select * from
+ (select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) zz;
+
+--echo mat in merged derived join mat in merged derived
+explain extended select * from
+ (select * from (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) x
+join
+ (select * from (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) z
+ on x.f1 = z.f1;
+
+flush status;
+select * from
+ (select * from (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) x
+join
+ (select * from (select * from t1 where f1 < 7 group by f1) tt where f1 > 2) z
+ on x.f1 = z.f1;
+show status like 'Handler_read%';
+flush status;
+
+--echo merged in merged derived join merged in merged derived
+explain extended select * from
+ (select * from
+ (select * from t1 where f1 < 7 ) tt where f1 > 2 ) x
+join
+ (select * from
+ (select * from t1 where f1 < 7 ) tt where f1 > 2 ) z
+ on x.f1 = z.f1;
+
+select * from
+ (select * from
+ (select * from t1 where f1 < 7 ) tt where f1 > 2 ) x
+join
+ (select * from
+ (select * from t1 where f1 < 7 ) tt where f1 > 2 ) z
+ on x.f1 = z.f1;
+
+--echo materialized in materialized derived join
+--echo materialized in materialized derived
+explain extended select * from
+ (select * from
+ (select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) x
+join
+ (select * from
+ (select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) z
+ on x.f1 = z.f1;
+
+select * from
+ (select * from
+ (select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) x
+join
+ (select * from
+ (select * from t1 where f1 < 7 group by f1) tt where f1 > 2 group by f1) z
+ on x.f1 = z.f1;
+
+--echo merged view in materialized derived
+explain extended
+select * from (select * from v4 group by 1) tt;
+select * from (select * from v4 group by 1) tt;
+
+--echo materialized view in merged derived
+explain extended
+select * from ( select * from v1 where f1 < 7) tt;
+select * from ( select * from v1 where f1 < 7) tt;
+
+--echo merged view in a merged view in a merged derived
+create view v6 as select * from v4 where f2 < 7;
+explain extended select * from (select * from v6) tt;
+select * from (select * from v6) tt;
+
+--echo materialized view in a merged view in a materialized derived
+create view v7 as select * from v1;
+explain extended select * from (select * from v7 group by 1) tt;
+select * from (select * from v7 group by 1) tt;
+
+--echo join of above two
+explain extended select * from v6 join v7 on f2=f1;
+select * from v6 join v7 on f2=f1;
+
+--echo test two keys
+explain select * from t1 join (select * from t2 group by f2) tt on t1.f1=tt.f2 join t1 xx on tt.f22=xx.f1;
+select * from t1 join (select * from t2 group by f2) tt on t1.f1=tt.f2 join t1 xx on tt.f22=xx.f1;
+
+
+--echo TODO: Add test with 64 tables mergeable view to test fall back to
+--echo materialization on tables > MAX_TABLES merge
+drop table t1,t2;
+drop view v1,v2,v3,v4,v6,v7;
=== modified file 'mysql-test/t/lock_multi_bug38499.test'
--- a/mysql-test/t/lock_multi_bug38499.test 2009-08-28 21:49:16 +0000
+++ b/mysql-test/t/lock_multi_bug38499.test 2010-05-26 20:18:18 +0000
@@ -16,7 +16,9 @@ connect (writer,localhost,root,,);
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1( a INT, b INT );
+CREATE TABLE t2( a INT, b INT );
INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4);
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4);
--echo # 1. test regular tables
--echo # 1.1. test altering of columns that multiupdate doesn't use
@@ -28,7 +30,7 @@ while ($i) {
--dec $i
--connection writer
- send UPDATE t1, (SELECT 1 FROM t1 t1i) d SET a = 0 WHERE 1=0;
+ send UPDATE t1, (SELECT 1 FROM t2 t1i) d SET a = 0 WHERE 1=0;
--connection locker
ALTER TABLE t1 ADD COLUMN (c INT);
@@ -41,7 +43,7 @@ while ($i) {
--echo # 1.1.2. PS mode
--connection writer
-PREPARE stmt FROM 'UPDATE t1, (SELECT 1 FROM t1 t1i) d SET a = 0 WHERE 1=0';
+PREPARE stmt FROM 'UPDATE t1, (SELECT 1 FROM t2 t1i) d SET a = 0 WHERE 1=0';
let $i = 100;
while ($i) {
@@ -75,7 +77,7 @@ while ($i) {
UPDATE t1 SET a=b;
--connection writer
---send UPDATE t1, (SELECT 1 FROM t1 t1i) d SET a = 0 WHERE 1=0;
+--send UPDATE t1, (SELECT 1 FROM t2 t1i) d SET a = 0 WHERE 1=0;
--connection locker
--error 0,ER_CANT_DROP_FIELD_OR_KEY
@@ -100,7 +102,7 @@ while ($i) {
UPDATE t1 SET a=b;
--connection writer
- PREPARE stmt FROM 'UPDATE t1, (SELECT 1 FROM t1 t1i) d SET a = 0 WHERE 1=0';
+ PREPARE stmt FROM 'UPDATE t1, (SELECT 1 FROM t2 t1i) d SET a = 0 WHERE 1=0';
--send EXECUTE stmt
--connection locker
@@ -210,7 +212,7 @@ while ($i) {
}
--enable_query_log
--connection default
-DROP TABLE t1;
+DROP TABLE t1,t2;
# Close connections
=== modified file 'sql/field.cc'
--- a/sql/field.cc 2010-03-20 12:01:47 +0000
+++ b/sql/field.cc 2010-05-26 20:18:18 +0000
@@ -10458,3 +10458,27 @@ Field::set_datetime_warning(MYSQL_ERROR:
field_name);
}
}
+
+
+/*
+ @brief
+ Return possible keys for a field
+
+ @details
+ Return bit map of keys over this field which can be used by the range
+ optimizer. For a field of a generic table such keys are all keys that starts
+ from this field. For a field of a materialized derived table/view such keys
+ are all keys in which this field takes a part. This is less restrictive as
+ keys for a materialized derived table/view are generated on the fly from
+ present fields, thus the case when a field for the beginning of a key is
+ absent is impossible.
+
+ @return map of possible keys
+*/
+
+key_map Field::get_possible_keys()
+{
+ DBUG_ASSERT(table->pos_in_table_list);
+ return (table->pos_in_table_list->is_materialized_derived() ?
+ part_of_key : key_start);
+}
=== modified file 'sql/field.h'
--- a/sql/field.h 2010-03-15 11:51:23 +0000
+++ b/sql/field.h 2010-05-26 20:18:18 +0000
@@ -582,6 +582,9 @@ public:
DBUG_ASSERT(0);
return GEOM_GEOMETRY;
}
+
+ key_map get_possible_keys();
+
/* Hash value */
virtual void hash(ulong *nr, ulong *nr2);
friend bool reopen_table(THD *,struct st_table *,bool);
=== modified file 'sql/handler.cc'
--- a/sql/handler.cc 2010-03-20 12:01:47 +0000
+++ b/sql/handler.cc 2010-05-26 20:18:18 +0000
@@ -2480,8 +2480,9 @@ int handler::update_auto_increment()
void handler::column_bitmaps_signal()
{
DBUG_ENTER("column_bitmaps_signal");
- DBUG_PRINT("info", ("read_set: 0x%lx write_set: 0x%lx", (long) table->read_set,
- (long) table->write_set));
+ if (table)
+ DBUG_PRINT("info", ("read_set: 0x%lx write_set: 0x%lx",
+ (long) table->read_set, (long) table->write_set));
DBUG_VOID_RETURN;
}
=== modified file 'sql/item.cc'
--- a/sql/item.cc 2010-03-20 12:01:47 +0000
+++ b/sql/item.cc 2010-05-26 20:18:18 +0000
@@ -711,6 +711,23 @@ bool Item_field::register_field_in_bitma
return 0;
}
+
+/*
+ Mark field in write_map
+
+ NOTES
+ This is used by UPDATE to register underlying fields of used view fields.
+*/
+
+bool Item_field::register_field_in_write_map(uchar *arg)
+{
+ TABLE *table= (TABLE *) arg;
+ if (field->table == table || !table)
+ bitmap_set_bit(field->table->write_set, field->field_index);
+ return 0;
+}
+
+
bool Item::check_cols(uint c)
{
if (c != 1)
@@ -2202,6 +2219,10 @@ table_map Item_field::used_tables() cons
return (depended_from ? OUTER_REF_TABLE_BIT : field->table->map);
}
+table_map Item_field::all_used_tables() const
+{
+ return (depended_from ? OUTER_REF_TABLE_BIT : field->table->map);
+}
void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
{
@@ -2454,7 +2475,7 @@ my_decimal *Item_float::val_decimal(my_d
void Item_string::print(String *str, enum_query_type query_type)
{
- if (query_type == QT_ORDINARY && is_cs_specified())
+ if (query_type != QT_IS && is_cs_specified())
{
str->append('_');
str->append(collation.collation->csname);
@@ -2462,7 +2483,7 @@ void Item_string::print(String *str, enu
str->append('\'');
- if (query_type == QT_ORDINARY ||
+ if (query_type != QT_IS ||
my_charset_same(str_value.charset(), system_charset_info))
{
str_value.print(str);
@@ -3945,6 +3966,34 @@ resolve_ref_in_select_and_group(THD *thd
}
+/*
+ @brief
+ Whether a table belongs to an outer select.
+
+ @param table table to check
+ @param select current select
+
+ @details
+ Try to find select the table belongs to by ascending the derived tables chain.
+*/
+
+static
+bool is_outer_table(TABLE_LIST *table, SELECT_LEX *select)
+{
+ DBUG_ASSERT(table->select_lex != select);
+ TABLE_LIST *tl;
+
+ for (tl= select->master_unit()->derived;
+ tl && tl->is_merged_derived();
+ select= tl->select_lex, tl= select->master_unit()->derived)
+ {
+ if (tl->select_lex == table->select_lex)
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
/**
Resolve the name of an outer select column reference.
@@ -4382,7 +4431,8 @@ bool Item_field::fix_fields(THD *thd, It
if (!outer_fixed && cached_table && cached_table->select_lex &&
context->select_lex &&
- cached_table->select_lex != context->select_lex)
+ cached_table->select_lex != context->select_lex &&
+ is_outer_table(cached_table, context->select_lex))
{
int ret;
if ((ret= fix_outer_field(thd, &from_field, reference)) < 0)
@@ -5786,8 +5836,9 @@ public:
st_select_lex *sel;
for (sel= current_select; sel; sel= sel->outer_select())
{
+ List_iterator<TABLE_LIST> li(sel->leaf_tables);
TABLE_LIST *tbl;
- for (tbl= sel->leaf_tables; tbl; tbl= tbl->next_leaf)
+ while ((tbl= li++))
{
if (tbl->table == item->field->table)
{
@@ -7506,6 +7557,8 @@ Item_result Item_type_holder::result_typ
enum_field_types Item_type_holder::get_real_type(Item *item)
{
+ if (item->type() == REF_ITEM)
+ item= item->real_item();
switch(item->type())
{
case FIELD_ITEM:
=== modified file 'sql/item.h'
--- a/sql/item.h 2010-03-20 12:01:47 +0000
+++ b/sql/item.h 2010-05-26 20:18:18 +0000
@@ -778,6 +778,7 @@ public:
class Field_enumerator)
*/
virtual table_map used_tables() const { return (table_map) 0L; }
+ virtual table_map all_used_tables() const { return used_tables(); }
/*
Return table map of tables that can't be NULL tables (tables that are
used in a context where if they would contain a NULL row generated
@@ -934,8 +935,12 @@ public:
virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; }
virtual bool is_expensive_processor(uchar *arg) { return 0; }
virtual bool register_field_in_read_map(uchar *arg) { return 0; }
+ virtual bool register_field_in_write_map(uchar *arg) { return 0; }
virtual bool enumerate_field_refs_processor(uchar *arg) { return 0; }
virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; }
+ virtual bool view_used_tables_processor(uchar *arg) { return 0; }
+ virtual bool eval_not_null_tables(uchar *opt_arg) { return 0; }
+
/*
The next function differs from the previous one that a bitmap to be updated
is passed as uchar *arg.
@@ -1143,6 +1148,12 @@ public:
{ return Field::GEOM_GEOMETRY; };
String *check_well_formed_result(String *str, bool send_error= 0);
bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs);
+ table_map view_used_tables(TABLE_LIST *view)
+ {
+ view->view_used_tables= 0;
+ walk(&Item::view_used_tables_processor, 0, (uchar *) view);
+ return view->view_used_tables;
+ }
};
@@ -1616,6 +1627,7 @@ public:
int save_in_field(Field *field,bool no_conversions);
void save_org_in_field(Field *field);
table_map used_tables() const;
+ table_map all_used_tables() const;
enum Item_result result_type () const
{
return field->result_type();
@@ -1645,6 +1657,7 @@ public:
bool add_field_to_set_processor(uchar * arg);
bool find_item_in_field_list_processor(uchar *arg);
bool register_field_in_read_map(uchar *arg);
+ bool register_field_in_write_map(uchar *arg);
bool register_field_in_bitmap(uchar *arg);
bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
bool vcol_in_partition_func_processor(uchar *bool_arg);
@@ -2515,11 +2528,14 @@ public:
*/
class Item_direct_view_ref :public Item_direct_ref
{
+ TABLE_LIST *view;
public:
Item_direct_view_ref(Name_resolution_context *context_arg, Item **item,
- const char *table_name_arg,
- const char *field_name_arg)
- :Item_direct_ref(context_arg, item, table_name_arg, field_name_arg) {}
+ const char *table_name_arg,
+ const char *field_name_arg,
+ TABLE_LIST *view_arg)
+ :Item_direct_ref(context_arg, item, table_name_arg, field_name_arg),
+ view(view_arg) {}
/* Constructor need to process subselect with temporary tables (see Item) */
Item_direct_view_ref(THD *thd, Item_direct_ref *item)
:Item_direct_ref(thd, item) {}
@@ -2533,6 +2549,24 @@ public:
return item;
}
virtual Ref_Type ref_type() { return VIEW_REF; }
+ table_map used_tables() const
+ {
+ return depended_from ?
+ OUTER_REF_TABLE_BIT :
+ (view->merged ? (*ref)->used_tables() : view->table->map);
+ }
+ bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
+ {
+ return (*ref)->walk(processor, walk_subquery, arg) ||
+ (this->*processor)(arg);
+ }
+ bool view_used_tables_processor(uchar *arg)
+ {
+ TABLE_LIST *view_arg= (TABLE_LIST *) arg;
+ if (view_arg == view)
+ view_arg->view_used_tables|= (*ref)->used_tables();
+ return 0;
+ }
};
@@ -2885,6 +2919,17 @@ public:
value.
*/
+/*
+ Cached_item_XXX objects are not exactly caches. They do the following:
+
+ Each Cached_item_XXX object has
+ - its source item
+ - saved value of the source item
+ - cmp() method that compares the saved value with the current value of the
+ source item, and if they were not equal saves item's value into the saved
+ value.
+*/
+
class Cached_item :public Sql_alloc
{
public:
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc 2010-03-20 12:01:47 +0000
+++ b/sql/item_cmpfunc.cc 2010-05-26 20:18:18 +0000
@@ -2204,6 +2204,16 @@ bool Item_func_between::fix_fields(THD *
thd->lex->current_select->between_count++;
+
+ return 0;
+}
+
+
+bool Item_func_between::eval_not_null_tables(uchar *opt_arg)
+{
+ if (Item_func_opt_neg::eval_not_null_tables(NULL))
+ return 1;
+
/* not_null_tables_cache == union(T1(e),T1(e1),T1(e2)) */
if (pred_level && !negated)
return 0;
@@ -2212,9 +2222,8 @@ bool Item_func_between::fix_fields(THD *
not_null_tables_cache= (args[0]->not_null_tables() |
(args[1]->not_null_tables() &
args[2]->not_null_tables()));
-
return 0;
-}
+}
void Item_func_between::fix_length_and_dec()
@@ -2575,13 +2584,22 @@ Item_func_if::fix_fields(THD *thd, Item
if (Item_func::fix_fields(thd, ref))
return 1;
+ return 0;
+}
+
+
+bool
+Item_func_if::eval_not_null_tables(uchar *opt_arg)
+{
+ if (Item_func::eval_not_null_tables(NULL))
+ return 1;
+
not_null_tables_cache= (args[1]->not_null_tables() &
args[2]->not_null_tables());
return 0;
}
-
void
Item_func_if::fix_length_and_dec()
{
@@ -3761,11 +3779,22 @@ bool Item_func_in::nulls_in_row()
bool
Item_func_in::fix_fields(THD *thd, Item **ref)
{
- Item **arg, **arg_end;
if (Item_func_opt_neg::fix_fields(thd, ref))
return 1;
+ return 0;
+}
+
+
+bool
+Item_func_in::eval_not_null_tables(uchar *opt_arg)
+{
+ Item **arg, **arg_end;
+
+ if (Item_func_opt_neg::eval_not_null_tables(NULL))
+ return 1;
+
/* not_null_tables_cache == union(T1(e),union(T1(ei))) */
if (pred_level && negated)
return 0;
@@ -4186,7 +4215,6 @@ Item_cond::fix_fields(THD *thd, Item **r
*/
while ((item=li++))
{
- table_map tmp_table_map;
while (item->type() == Item::COND_ITEM &&
((Item_cond*) item)->functype() == functype() &&
!((Item_cond*) item)->list.is_empty())
@@ -4208,11 +4236,12 @@ Item_cond::fix_fields(THD *thd, Item **r
and_tables_cache= (table_map) 0;
else
{
- tmp_table_map= item->not_null_tables();
+ table_map tmp_table_map= item->not_null_tables();
not_null_tables_cache|= tmp_table_map;
and_tables_cache&= tmp_table_map;
const_item_cache= FALSE;
- }
+ }
+
with_sum_func= with_sum_func || item->with_sum_func;
with_subselect|= item->with_subselect;
if (item->maybe_null)
@@ -4226,6 +4255,28 @@ Item_cond::fix_fields(THD *thd, Item **r
}
+bool
+Item_cond::eval_not_null_tables(uchar *opt_arg)
+{
+ Item *item;
+ List_iterator<Item> li(list);
+ and_tables_cache= ~(table_map) 0;
+ while ((item=li++))
+ {
+ table_map tmp_table_map;
+ if (item->const_item())
+ and_tables_cache= (table_map) 0;
+ else
+ {
+ tmp_table_map= item->not_null_tables();
+ not_null_tables_cache|= tmp_table_map;
+ and_tables_cache&= tmp_table_map;
+ }
+ }
+ return 0;
+}
+
+
void Item_cond::fix_after_pullout(st_select_lex *new_parent, Item **ref)
{
List_iterator<Item> li(list);
=== modified file 'sql/item_cmpfunc.h'
--- a/sql/item_cmpfunc.h 2010-03-20 12:01:47 +0000
+++ b/sql/item_cmpfunc.h 2010-05-26 20:18:18 +0000
@@ -631,6 +631,7 @@ public:
bool is_bool_func() { return 1; }
CHARSET_INFO *compare_collation() { return cmp_collation.collation; }
uint decimal_precision() const { return 1; }
+ bool eval_not_null_tables(uchar *opt_arg);
};
@@ -730,6 +731,7 @@ public:
void fix_length_and_dec();
uint decimal_precision() const;
const char *func_name() const { return "if"; }
+ bool eval_not_null_tables(uchar *opt_arg);
};
@@ -1256,6 +1258,7 @@ public:
bool nulls_in_row();
bool is_bool_func() { return 1; }
CHARSET_INFO *compare_collation() { return cmp_collation.collation; }
+ bool eval_not_null_tables(uchar *opt_arg);
};
class cmp_item_row :public cmp_item
@@ -1510,6 +1513,7 @@ public:
bool subst_argument_checker(uchar **arg) { return TRUE; }
Item *compile(Item_analyzer analyzer, uchar **arg_p,
Item_transformer transformer, uchar *arg_t);
+ bool eval_not_null_tables(uchar *opt_arg);
};
=== modified file 'sql/item_func.cc'
--- a/sql/item_func.cc 2010-03-20 12:01:47 +0000
+++ b/sql/item_func.cc 2010-05-26 20:18:18 +0000
@@ -192,7 +192,6 @@ Item_func::fix_fields(THD *thd, Item **r
with_sum_func= with_sum_func || item->with_sum_func;
used_tables_cache|= item->used_tables();
- not_null_tables_cache|= item->not_null_tables();
const_item_cache&= item->const_item();
with_subselect|= item->with_subselect;
}
@@ -206,6 +205,21 @@ Item_func::fix_fields(THD *thd, Item **r
}
+bool
+Item_func::eval_not_null_tables(uchar *opt_arg)
+{
+ Item **arg,**arg_end;
+ if (arg_count)
+ {
+ for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++)
+ {
+ not_null_tables_cache|= (*arg)->not_null_tables();
+ }
+ }
+ return FALSE;
+}
+
+
void Item_func::fix_after_pullout(st_select_lex *new_parent, Item **ref)
{
Item **arg,**arg_end;
@@ -3895,6 +3909,20 @@ bool Item_func_set_user_var::fix_fields(
entry->collation.set(args[0]->collation.collation, DERIVATION_IMPLICIT);
collation.set(entry->collation.collation, DERIVATION_IMPLICIT);
cached_result_type= args[0]->result_type();
+ {
+ /*
+ When this function is used in a derived table/view force the derived
+ table to be materialized to preserve possible side-effect of setting a
+ user variable.
+ */
+ SELECT_LEX_UNIT *unit= thd->lex->current_select->master_unit();
+ TABLE_LIST *derived;
+ for (derived= unit->derived;
+ derived;
+ derived= derived->select_lex->master_unit()->derived)
+ derived->set_materialized_derived();
+ }
+
return FALSE;
}
=== modified file 'sql/item_func.h'
--- a/sql/item_func.h 2010-03-20 12:01:47 +0000
+++ b/sql/item_func.h 2010-05-26 20:18:18 +0000
@@ -181,6 +181,7 @@ public:
Item_transformer transformer, uchar *arg_t);
void traverse_cond(Cond_traverser traverser,
void * arg, traverse_order order);
+ bool eval_not_null_tables(uchar *opt_arg);
// bool is_expensive_processor(uchar *arg);
// virtual bool is_expensive() { return 0; }
inline double fix_result(double value)
@@ -1617,14 +1618,7 @@ public:
void fix_length_and_dec() { decimals=0; max_length=1; maybe_null=1;}
bool check_vcol_func_processor(uchar *int_arg)
{
-#if 0
- DBUG_ENTER("Item_func_is_free_lock::check_vcol_func_processor");
- DBUG_PRINT("info",
- ("check_vcol_func_processor returns TRUE: unsupported function"));
- DBUG_RETURN(TRUE);
-#else
return trace_unsupported_by_check_vcol_func_processor(func_name());
-#endif
}
};
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-03-29 14:04:35 +0000
+++ b/sql/item_subselect.cc 2010-05-26 20:18:18 +0000
@@ -2894,6 +2894,9 @@ int subselect_uniquesubquery_engine::exe
DBUG_RETURN(0);
}
+ if (!tab->preread_init_done && tab->preread_init())
+ DBUG_RETURN(1);
+
if (null_keypart)
DBUG_RETURN(scan_table());
@@ -3026,7 +3029,7 @@ subselect_uniquesubquery_engine::~subsel
int subselect_indexsubquery_engine::exec()
{
- DBUG_ENTER("subselect_indexsubquery_engine::exec");
+ DBUG_ENTER("subselect_indexsubquery_engine");
int error;
bool null_finding= 0;
TABLE *table= tab->table;
@@ -3057,6 +3060,9 @@ int subselect_indexsubquery_engine::exec
DBUG_RETURN(0);
}
+ if (!tab->preread_init_done && tab->preread_init())
+ DBUG_RETURN(1);
+
if (null_keypart)
DBUG_RETURN(scan_table());
@@ -3158,10 +3164,13 @@ void subselect_uniquesubquery_engine::ex
}
-table_map subselect_engine::calc_const_tables(TABLE_LIST *table)
+table_map subselect_engine::calc_const_tables(List<TABLE_LIST> &list)
{
table_map map= 0;
- for (; table; table= table->next_leaf)
+ List_iterator<TABLE_LIST> ti(list);
+ TABLE_LIST *table;
+ //for (; table; table= table->next_leaf)
+ while ((table= ti++))
{
TABLE *tbl= table->table;
if (tbl && tbl->const_table)
@@ -3173,14 +3182,13 @@ table_map subselect_engine::calc_const_t
table_map subselect_single_select_engine::upper_select_const_tables()
{
- return calc_const_tables((TABLE_LIST *) select_lex->outer_select()->
- leaf_tables);
+ return calc_const_tables(select_lex->outer_select()->leaf_tables);
}
table_map subselect_union_engine::upper_select_const_tables()
{
- return calc_const_tables((TABLE_LIST *) unit->outer_select()->leaf_tables);
+ return calc_const_tables(unit->outer_select()->leaf_tables);
}
@@ -3711,7 +3719,7 @@ bool subselect_hash_sj_engine::init_perm
if (((select_union*) result)->create_result_table(
thd, tmp_columns, TRUE, tmp_create_options,
- "materialized subselect", TRUE))
+ "materialized subselect", TRUE, TRUE))
DBUG_RETURN(TRUE);
tmp_table= ((select_union*) result)->table;
=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h 2010-03-29 14:04:35 +0000
+++ b/sql/item_subselect.h 2010-05-26 20:18:18 +0000
@@ -531,6 +531,7 @@ public:
virtual bool may_be_null() { return maybe_null; };
virtual table_map upper_select_const_tables()= 0;
static table_map calc_const_tables(TABLE_LIST *);
+ static table_map calc_const_tables(List<TABLE_LIST> &list);
virtual void print(String *str, enum_query_type query_type)= 0;
virtual bool change_result(Item_subselect *si,
select_result_interceptor *result)= 0;
=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h 2010-03-20 12:01:47 +0000
+++ b/sql/mysql_priv.h 2010-05-26 20:18:18 +0000
@@ -62,12 +62,15 @@ class Parser_state;
QT_ORDINARY -- ordinary SQL query.
QT_IS -- SQL query to be shown in INFORMATION_SCHEMA (in utf8 and without
- character set introducers).
+ character set introducers).
+ QT_VIEW_INTERNAL -- view internal representation (like QT_ORDINARY except
+ ORDER BY clause)
*/
enum enum_query_type
{
QT_ORDINARY,
- QT_IS
+ QT_IS,
+ QT_VIEW_INTERNAL
};
/* TODO convert all these three maps to Bitmap classes */
@@ -511,7 +514,6 @@ protected:
#define OPTION_PROFILING (ULL(1) << 33)
-
/**
Maximum length of time zone name that we support
(Time zone name is char(64) in db). mysqlbinlog needs it.
@@ -1276,11 +1278,9 @@ int mysql_explain_select(THD *thd, SELEC
select_result *result);
bool mysql_union(THD *thd, LEX *lex, select_result *result,
SELECT_LEX_UNIT *unit, ulong setup_tables_done_option);
-bool mysql_handle_derived(LEX *lex, bool (*processor)(THD *thd,
- LEX *lex,
- TABLE_LIST *table));
-bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *t);
-bool mysql_derived_filling(THD *thd, LEX *lex, TABLE_LIST *t);
+bool mysql_handle_derived(LEX *lex, uint phases);
+bool mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases);
+bool mysql_handle_list_of_derived(LEX *lex, TABLE_LIST *dt_list, uint phases);
Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
Item ***copy_func, Field **from_field,
Field **def_field,
@@ -1288,6 +1288,17 @@ Field *create_tmp_field(THD *thd, TABLE
bool table_cant_handle_bit_fields,
bool make_copy_field,
uint convert_blob_length);
+bool open_tmp_table(TABLE *table);
+#if defined(WITH_MARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES)
+#define TMP_ENGINE_HTON maria_hton
+#else
+#define TMP_ENGINE_HTON myisam_hton
+#endif
+bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
+ ENGINE_COLUMNDEF *start_recinfo,
+ ENGINE_COLUMNDEF **recinfo,
+ ulonglong options);
+
void sp_prepare_create_field(THD *thd, Create_field *sql_field);
int prepare_create_field(Create_field *sql_field,
uint *blob_columns,
@@ -1600,17 +1611,21 @@ bool get_key_map_from_key_list(key_map *
bool insert_fields(THD *thd, Name_resolution_context *context,
const char *db_name, const char *table_name,
List_iterator<Item> *it, bool any_privileges);
+void make_leaves_list(List<TABLE_LIST> &list, TABLE_LIST *tables,
+ bool full_table_list, TABLE_LIST *boundary);
bool setup_tables(THD *thd, Name_resolution_context *context,
List<TABLE_LIST> *from_clause, TABLE_LIST *tables,
- TABLE_LIST **leaves, bool select_insert);
+ List<TABLE_LIST> &leaves, bool select_insert,
+ bool full_table_list);
bool setup_tables_and_check_access(THD *thd,
Name_resolution_context *context,
List<TABLE_LIST> *from_clause,
TABLE_LIST *tables,
- TABLE_LIST **leaves,
+ List<TABLE_LIST> &leaves,
bool select_insert,
ulong want_access_first,
- ulong want_access);
+ ulong want_access,
+ bool full_table_list);
int setup_wild(THD *thd, TABLE_LIST *tables, List<Item> &fields,
List<Item> *sum_func_list, uint wild_num);
bool setup_fields(THD *thd, Item** ref_pointer_array,
@@ -1629,7 +1644,7 @@ inline bool setup_fields_with_no_wrap(TH
thd->lex->select_lex.no_wrap_view_item= FALSE;
return res;
}
-int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
+int setup_conds(THD *thd, TABLE_LIST *tables, List<TABLE_LIST> &leaves,
COND **conds);
int setup_ftfuncs(SELECT_LEX* select);
int init_ftfuncs(THD *thd, SELECT_LEX* select, bool no_order);
@@ -1651,7 +1666,8 @@ inline int open_and_lock_tables(THD *thd
/* simple open_and_lock_tables without derived handling for single table */
TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l,
thr_lock_type lock_type);
-bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags);
+bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags,
+ uint dt_phases);
int lock_tables(THD *thd, TABLE_LIST *tables, uint counter, bool *need_reopen);
int decide_logging_format(THD *thd, TABLE_LIST *tables);
TABLE *open_temporary_table(THD *thd, const char *path, const char *db,
@@ -1680,6 +1696,7 @@ void remove_db_from_cache(const char *db
void flush_tables();
bool is_equal(const LEX_STRING *a, const LEX_STRING *b);
char *make_default_log_name(char *buff,const char* log_ext);
+void unfix_fields(List<Item> &items);
#ifdef WITH_PARTITION_STORAGE_ENGINE
uint fast_alter_partition_table(THD *thd, TABLE *table,
@@ -2528,7 +2545,7 @@ Item * all_any_subquery_creator(Item *le
inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr)
{
table->used_fields= 0;
- table->const_table= 0;
+ table_list->reset_const_table();
table->null_row= 0;
table->status= STATUS_NO_RECORD;
table->maybe_null= table_list->outer_join;
@@ -2544,6 +2561,14 @@ inline void setup_table_map(TABLE *table
table->force_index_order= table->force_index_group= 0;
table->covering_keys= table->s->keys_for_keyread;
table->merge_keys.clear_all();
+ TABLE_LIST *orig= table_list->select_lex ?
+ table_list->select_lex->master_unit()->derived : 0;
+ if (!orig || !orig->is_merged_derived())
+ {
+ /* Tables merged from derived were set up already.*/
+ table->covering_keys= table->s->keys_for_keyread;
+ table->merge_keys.clear_all();
+ }
}
=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc 2010-03-20 12:01:47 +0000
+++ b/sql/opt_range.cc 2010-05-26 20:18:18 +0000
@@ -7450,7 +7450,7 @@ ha_rows check_quick_select(PARAM *param,
SEL_ARG_RANGE_SEQ seq;
RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next, 0, 0};
handler *file= param->table->file;
- ha_rows rows;
+ ha_rows rows= HA_POS_ERROR;
uint keynr= param->real_keynr[idx];
DBUG_ENTER("check_quick_select");
@@ -7490,8 +7490,13 @@ ha_rows check_quick_select(PARAM *param,
*mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
*bufsize= param->thd->variables.mrr_buff_size;
- rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
- bufsize, mrr_flags, cost);
+ /*
+ Skip materialized derived table/view result table from MRR check as
+ they aren't contain any data yet.
+ */
+ if (param->table->pos_in_table_list->is_non_derived())
+ rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0,
+ bufsize, mrr_flags, cost);
if (rows != HA_POS_ERROR)
{
param->table->quick_rows[keynr]=rows;
=== modified file 'sql/opt_subselect.cc'
--- a/sql/opt_subselect.cc 2010-03-15 19:52:58 +0000
+++ b/sql/opt_subselect.cc 2010-05-26 20:18:18 +0000
@@ -154,9 +154,9 @@ int check_and_do_in_subquery_rewrites(JO
!join->having && !select_lex->with_sum_func && // 4
thd->thd_marker.emb_on_expr_nest && // 5
select_lex->outer_select()->join && // 6
- select_lex->master_unit()->first_select()->leaf_tables && // 7
+ select_lex->master_unit()->first_select()->leaf_tables.elements && // 7
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8
- select_lex->outer_select()->leaf_tables && // 9
+ select_lex->outer_select()->leaf_tables.elements && // 9
!((join->select_options | // 10
select_lex->outer_select()->join->select_options) // 10
& SELECT_STRAIGHT_JOIN)) // 10
@@ -212,9 +212,9 @@ int check_and_do_in_subquery_rewrites(JO
if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) &&
in_subs && // 1
!select_lex->is_part_of_union() && // 2
- select_lex->master_unit()->first_select()->leaf_tables && // 3
+ select_lex->master_unit()->first_select()->leaf_tables.elements && // 3
thd->lex->sql_command == SQLCOM_SELECT && // *
- select_lex->outer_select()->leaf_tables && // 3A
+ select_lex->outer_select()->leaf_tables.elements && // 3A
subquery_types_allow_materialization(in_subs) &&
// psergey-todo: duplicated_subselect_card_check: where it's done?
(in_subs->is_top_level_item() ||
@@ -391,11 +391,26 @@ bool convert_join_subqueries_to_semijoin
Item_in_subselect **in_subq;
Item_in_subselect **in_subq_end;
THD *thd= join->thd;
+ TABLE_LIST *tbl;
+ List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
DBUG_ENTER("convert_join_subqueries_to_semijoins");
if (join->sj_subselects.elements() == 0)
DBUG_RETURN(FALSE);
+ for (in_subq= join->sj_subselects.front(),
+ in_subq_end= join->sj_subselects.back();
+ in_subq != in_subq_end;
+ in_subq++)
+ {
+ SELECT_LEX *subq_sel= (*in_subq)->get_select_lex();
+ if (subq_sel->handle_derived(thd->lex, DT_OPTIMIZE))
+ DBUG_RETURN(1);
+ if (subq_sel->handle_derived(thd->lex, DT_MERGE))
+ DBUG_RETURN(TRUE);
+ subq_sel->update_used_tables();
+ }
+
/* First, convert child join's subqueries. We proceed bottom-up here */
for (in_subq= join->sj_subselects.front(),
in_subq_end= join->sj_subselects.back();
@@ -422,11 +437,12 @@ bool convert_join_subqueries_to_semijoin
// Temporary measure: disable semi-joins when they are together with outer
// joins.
- for (TABLE_LIST *tbl= join->select_lex->leaf_tables; tbl; tbl=tbl->next_leaf)
+ while ((tbl= ti++))
{
TABLE_LIST *embedding= tbl->embedding;
- if (tbl->on_expr || (tbl->embedding && !(embedding->sj_on_expr &&
- !embedding->embedding)))
+ if (tbl->on_expr ||
+ (embedding && embedding->outer_join &&
+ !(embedding->sj_on_expr && !embedding->embedding)))
{
in_subq= join->sj_subselects.front();
arena= thd->activate_stmt_arena_if_needed(&backup);
@@ -737,7 +753,7 @@ static bool convert_subq_to_sj(JOIN *par
st_select_lex *subq_lex= subq_pred->unit->first_select();
nested_join->join_list.empty();
List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list);
- TABLE_LIST *tl, *last_leaf;
+ TABLE_LIST *tl;
while ((tl= li++))
{
tl->embedding= sj_nest;
@@ -752,17 +768,15 @@ static bool convert_subq_to_sj(JOIN *par
NOTE: We actually insert them at the front! That's because the order is
reversed in this list.
*/
- for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) ;
- tl->next_leaf= subq_lex->leaf_tables;
- last_leaf= tl;
+ parent_lex->leaf_tables.concat(&subq_lex->leaf_tables);
/*
Same as above for next_local chain
(a theory: a next_local chain always starts with ::leaf_tables
because view's tables are inserted after the view)
*/
- for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) ;
- tl->next_local= subq_lex->leaf_tables;
+ for (tl= parent_lex->leaf_tables.head(); tl->next_local; tl= tl->next_local) ;
+ tl->next_local= subq_lex->leaf_tables.head();
/* A theory: no need to re-connect the next_global chain */
@@ -776,7 +790,8 @@ static bool convert_subq_to_sj(JOIN *par
/* n. Adjust the parent_join->tables counter */
uint table_no= parent_join->tables;
/* n. Walk through child's tables and adjust table->map */
- for (tl= subq_lex->leaf_tables; tl; tl= tl->next_leaf, table_no++)
+ List_iterator_fast<TABLE_LIST> si(subq_lex->leaf_tables);
+ while ((tl= si++))
{
tl->table->tablenr= table_no;
tl->table->map= ((table_map)1) << table_no;
@@ -786,6 +801,7 @@ static bool convert_subq_to_sj(JOIN *par
emb && emb->select_lex == old_sl;
emb= emb->embedding)
emb->select_lex= parent_join->select_lex;
+ table_no++;
}
parent_join->tables += subq_lex->join->tables;
@@ -872,7 +888,8 @@ static bool convert_subq_to_sj(JOIN *par
{
/* Inject into the WHERE */
parent_join->conds= and_items(parent_join->conds, sj_nest->sj_on_expr);
- parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds);
+ if (!parent_join->conds->fixed)
+ parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds);
parent_join->select_lex->where= parent_join->conds;
}
@@ -1424,6 +1441,7 @@ void advance_sj_state(JOIN *join, table_
TABLE_LIST *emb_sj_nest;
POSITION *pos= join->positions + idx;
remaining_tables &= ~new_join_tab->table->map;
+ bool disable_jbuf= join->thd->variables.join_cache_level == 0;
pos->prefix_cost.convert_from_cost(*current_read_time);
pos->prefix_record_count= *current_record_count;
@@ -1593,7 +1611,8 @@ void advance_sj_state(JOIN *join, table_
optimize_wo_join_buffering(join, pos->first_loosescan_table, idx,
remaining_tables,
TRUE, //first_alt
- pos->first_loosescan_table + n_tables,
+ disable_jbuf ? join->tables :
+ pos->first_loosescan_table + n_tables,
&reopt_rec_count,
&reopt_cost, &sj_inner_fanout);
/*
@@ -1734,8 +1753,8 @@ void advance_sj_state(JOIN *join, table_
/* Need to re-run best-access-path as we prefix_rec_count has changed */
for (i= first_tab + mat_info->tables; i <= idx; i++)
{
- best_access_path(join, join->positions[i].table, rem_tables, i, FALSE,
- prefix_rec_count, &curpos, &dummy);
+ best_access_path(join, join->positions[i].table, rem_tables, i,
+ disable_jbuf, prefix_rec_count, &curpos, &dummy);
prefix_rec_count *= curpos.records_read;
prefix_cost += curpos.read_time;
}
@@ -2031,6 +2050,7 @@ at_sjmat_pos(const JOIN *join, table_map
void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
{
uint table_count=join->tables;
+ bool disable_jbuf= join->thd->variables.join_cache_level == 0;
uint tablenr;
table_map remaining_tables= 0;
table_map handled_tabs= 0;
@@ -2092,8 +2112,9 @@ void fix_semijoin_strategies_for_picked_
join->cur_sj_inner_tables= 0;
for (i= first + sjm->tables; i <= tablenr; i++)
{
- best_access_path(join, join->best_positions[i].table, rem_tables, i, FALSE,
- prefix_rec_count, join->best_positions + i, &dummy);
+ best_access_path(join, join->best_positions[i].table, rem_tables, i,
+ disable_jbuf, prefix_rec_count,
+ join->best_positions + i, &dummy);
prefix_rec_count *= join->best_positions[i].records_read;
rem_tables &= ~join->best_positions[i].table->table->map;
}
=== modified file 'sql/opt_sum.cc'
--- a/sql/opt_sum.cc 2010-01-04 17:54:42 +0000
+++ b/sql/opt_sum.cc 2010-05-26 20:18:18 +0000
@@ -74,10 +74,12 @@ static int maxmin_in_range(bool max_fl,
# Multiplication of number of rows in all tables
*/
-static ulonglong get_exact_record_count(TABLE_LIST *tables)
+static ulonglong get_exact_record_count(List<TABLE_LIST> &tables)
{
ulonglong count= 1;
- for (TABLE_LIST *tl= tables; tl; tl= tl->next_leaf)
+ TABLE_LIST *tl;
+ List_iterator<TABLE_LIST> ti(tables);
+ while ((tl= ti++))
{
ha_rows tmp= tl->table->file->records();
if ((tmp == HA_POS_ERROR))
@@ -110,9 +112,11 @@ static ulonglong get_exact_record_count(
HA_ERR_... if a deadlock or a lock wait timeout happens, for example
*/
-int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds)
+int opt_sum_query(List<TABLE_LIST> &tables, List<Item> &all_fields,COND *conds)
{
List_iterator_fast<Item> it(all_fields);
+ List_iterator<TABLE_LIST> ti(tables);
+ TABLE_LIST *tl;
int const_result= 1;
bool recalc_const_item= 0;
ulonglong count= 1;
@@ -120,7 +124,7 @@ int opt_sum_query(TABLE_LIST *tables, Li
table_map removed_tables= 0, outer_tables= 0, used_tables= 0;
table_map where_tables= 0;
Item *item;
- int error;
+ int error= 0;
if (conds)
where_tables= conds->used_tables();
@@ -129,7 +133,7 @@ int opt_sum_query(TABLE_LIST *tables, Li
Analyze outer join dependencies, and, if possible, compute the number
of returned rows.
*/
- for (TABLE_LIST *tl= tables; tl; tl= tl->next_leaf)
+ while ((tl= ti++))
{
TABLE_LIST *embedded;
for (embedded= tl ; embedded; embedded= embedded->embedding)
@@ -170,6 +174,14 @@ int opt_sum_query(TABLE_LIST *tables, Li
is_exact_count= FALSE;
count= 1; // ensure count != 0
}
+ else if (tl->is_materialized_derived())
+ {
+ /*
+ Can't remove a derived table as it's number of rows is just an
+ estimate.
+ */
+ return 0;
+ }
else
{
error= tl->table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
=== modified file 'sql/records.cc'
--- a/sql/records.cc 2010-02-01 06:14:12 +0000
+++ b/sql/records.cc 2010-05-26 20:18:18 +0000
@@ -286,7 +286,8 @@ void end_read_record(READ_RECORD *info)
if (info->table)
{
filesort_free_buffers(info->table,0);
- (void) info->file->extra(HA_EXTRA_NO_CACHE);
+ if (info->table->created)
+ (void) info->file->extra(HA_EXTRA_NO_CACHE);
if (info->read_record != rr_quick) // otherwise quick_range does it
(void) info->file->ha_index_or_rnd_end();
info->table=0;
=== modified file 'sql/sp_head.cc'
--- a/sql/sp_head.cc 2010-03-15 11:51:23 +0000
+++ b/sql/sp_head.cc 2010-05-26 20:18:18 +0000
@@ -2821,6 +2821,9 @@ int sp_instr::exec_open_and_lock_tables(
result= -1;
else
result= 0;
+ /* Prepare all derived tables/views to catch possible errors. */
+ if (!result)
+ result= mysql_handle_derived(thd->lex, DT_PREPARE) ? -1 : 0;
return result;
}
=== modified file 'sql/sql_acl.cc'
--- a/sql/sql_acl.cc 2010-03-15 11:51:23 +0000
+++ b/sql/sql_acl.cc 2010-05-26 20:18:18 +0000
@@ -3003,7 +3003,8 @@ int mysql_table_grant(THD *thd, TABLE_LI
class LEX_COLUMN *column;
List_iterator <LEX_COLUMN> column_iter(columns);
- if (open_and_lock_tables(thd, table_list))
+ if (open_and_lock_tables(thd, table_list) ||
+ mysql_handle_derived(thd->lex, DT_PREPARE))
DBUG_RETURN(TRUE);
while ((column = column_iter++))
=== modified file 'sql/sql_base.cc'
--- a/sql/sql_base.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_base.cc 2010-05-26 20:18:18 +0000
@@ -2998,6 +2998,7 @@ TABLE *open_table(THD *thd, TABLE_LIST *
table->fulltext_searched= 0;
table->file->ft_handler= 0;
table->reginfo.impossible_range= 0;
+ table->created= TRUE;
/* Catch wrong handling of the auto_increment_field_not_null. */
DBUG_ASSERT(!table->auto_increment_field_not_null);
table->auto_increment_field_not_null= FALSE;
@@ -5044,9 +5045,10 @@ int open_and_lock_tables_derived(THD *th
close_tables_for_reopen(thd, &tables);
}
if (derived &&
- (mysql_handle_derived(thd->lex, &mysql_derived_prepare) ||
- (thd->fill_derived_tables() &&
- mysql_handle_derived(thd->lex, &mysql_derived_filling))))
+ (mysql_handle_derived(thd->lex, DT_INIT)))
+ DBUG_RETURN(TRUE); /* purecov: inspected */
+ if (thd->prepare_derived_at_open && derived &&
+ (mysql_handle_derived(thd->lex, DT_PREPARE)))
DBUG_RETURN(TRUE); /* purecov: inspected */
DBUG_RETURN(0);
}
@@ -5062,6 +5064,7 @@ int open_and_lock_tables_derived(THD *th
flags - bitmap of flags to modify how the tables will be open:
MYSQL_LOCK_IGNORE_FLUSH - open table even if someone has
done a flush or namelock on it.
+ dt_phases - set of flags to pass to the mysql_handle_derived
RETURN
FALSE - ok
@@ -5072,13 +5075,14 @@ int open_and_lock_tables_derived(THD *th
data from the tables.
*/
-bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags)
+bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags,
+ uint dt_phases)
{
uint counter;
DBUG_ENTER("open_normal_and_derived_tables");
DBUG_ASSERT(!thd->fill_derived_tables());
if (open_tables(thd, &tables, &counter, flags) ||
- mysql_handle_derived(thd->lex, &mysql_derived_prepare))
+ mysql_handle_derived(thd->lex, dt_phases))
DBUG_RETURN(TRUE); /* purecov: inspected */
DBUG_RETURN(0);
}
@@ -5714,9 +5718,7 @@ find_field_in_view(THD *thd, TABLE_LIST
Field_iterator_view field_it;
field_it.set(table_list);
Query_arena *arena= 0, backup;
-
- DBUG_ASSERT(table_list->schema_table_reformed ||
- (ref != 0 && table_list->view != 0));
+
for (; !field_it.end_of_fields(); field_it.next())
{
if (!my_strcasecmp(system_charset_info, field_it.name(), name))
@@ -5735,6 +5737,8 @@ find_field_in_view(THD *thd, TABLE_LIST
if (!item)
DBUG_RETURN(0);
+ if (!ref)
+ DBUG_RETURN((Field*) view_ref_found);
/*
*ref != NULL means that *ref contains the item that we need to
replace. If the item was aliased by the user, set the alias to
@@ -6134,6 +6138,8 @@ find_field_in_table_ref(THD *thd, TABLE_
Field *field_to_set= NULL;
if (fld == view_ref_found)
{
+ if (!ref)
+ DBUG_RETURN(fld);
Item *it= (*ref)->real_item();
if (it->type() == Item::FIELD_ITEM)
field_to_set= ((Item_field*)it)->field;
@@ -6141,6 +6147,8 @@ find_field_in_table_ref(THD *thd, TABLE_
{
if (thd->mark_used_columns == MARK_COLUMNS_READ)
it->walk(&Item::register_field_in_read_map, 1, (uchar *) 0);
+ else
+ it->walk(&Item::register_field_in_write_map, 1, (uchar *) 0);
}
}
else
@@ -6280,7 +6288,9 @@ find_field_in_tables(THD *thd, Item_iden
find_field_in_table even in the case of information schema tables
when table_ref->field_translation != NULL.
*/
- if (table_ref->table && !table_ref->view)
+ if (table_ref->table &&
+ (!table_ref->is_merged_derived() ||
+ (!table_ref->is_multitable() && table_ref->merged_for_insert)))
found= find_field_in_table(thd, table_ref->table, name, length,
TRUE, &(item->cached_field_index));
else
@@ -6298,7 +6308,8 @@ find_field_in_tables(THD *thd, Item_iden
Only views fields should be marked as dependent, not an underlying
fields.
*/
- if (!table_ref->belong_to_view)
+ if (!table_ref->belong_to_view &&
+ !table_ref->belong_to_derived)
{
SELECT_LEX *current_sel= thd->lex->current_select;
SELECT_LEX *last_select= table_ref->select_lex;
@@ -6884,6 +6895,10 @@ mark_common_columns(THD *thd, TABLE_LIST
*/
if (nj_col_2 && (!using_fields ||is_using_column_1))
{
+ /*
+ Create non-fixed fully qualified field and let fix_fields to
+ resolve it.
+ */
Item *item_1= nj_col_1->create_item(thd);
Item *item_2= nj_col_2->create_item(thd);
Field *field_1= nj_col_1->field();
@@ -7548,27 +7563,36 @@ bool setup_fields(THD *thd, Item **ref_p
make_leaves_list()
list pointer to pointer on list first element
tables table list
+ full_table_list whether to include tables from mergeable derived table/view.
+ we need them for checks for INSERT/UPDATE statements only.
RETURN pointer on pointer to next_leaf of last element
*/
-TABLE_LIST **make_leaves_list(TABLE_LIST **list, TABLE_LIST *tables)
+void make_leaves_list(List<TABLE_LIST> &list, TABLE_LIST *tables,
+ bool full_table_list, TABLE_LIST *boundary)
+
{
for (TABLE_LIST *table= tables; table; table= table->next_local)
{
- if (table->merge_underlying_list)
- {
- DBUG_ASSERT(table->view &&
- table->effective_algorithm == VIEW_ALGORITHM_MERGE);
- list= make_leaves_list(list, table->merge_underlying_list);
+ if (table == boundary)
+ full_table_list= !full_table_list;
+ if (full_table_list && table->is_merged_derived())
+ {
+ SELECT_LEX *select_lex= table->get_single_select();
+ /*
+ It's safe to use select_lex->leaf_tables because all derived
+ tables/views were already prepared and has their leaf_tables
+ set properly.
+ */
+ make_leaves_list(list, select_lex->get_table_list(),
+ full_table_list, boundary);
}
else
{
- *list= table;
- list= &table->next_leaf;
+ list.push_back(table);
}
}
- return list;
}
/*
@@ -7583,6 +7607,7 @@ TABLE_LIST **make_leaves_list(TABLE_LIST
leaves List of join table leaves list (select_lex->leaf_tables)
refresh It is onle refresh for subquery
select_insert It is SELECT ... INSERT command
+ full_table_list a parameter to pass to the make_leaves_list function
NOTE
Check also that the 'used keys' and 'ignored keys' exists and set up the
@@ -7601,9 +7626,13 @@ TABLE_LIST **make_leaves_list(TABLE_LIST
bool setup_tables(THD *thd, Name_resolution_context *context,
List<TABLE_LIST> *from_clause, TABLE_LIST *tables,
- TABLE_LIST **leaves, bool select_insert)
+ List<TABLE_LIST> &leaves, bool select_insert,
+ bool full_table_list)
{
uint tablenr= 0;
+ List_iterator<TABLE_LIST> ti(leaves);
+ TABLE_LIST *table_list;
+
DBUG_ENTER("setup_tables");
DBUG_ASSERT ((select_insert && !tables->next_name_resolution_table) || !tables ||
@@ -7615,40 +7644,57 @@ bool setup_tables(THD *thd, Name_resolut
TABLE_LIST *first_select_table= (select_insert ?
tables->next_local:
0);
- if (!(*leaves))
- make_leaves_list(leaves, tables);
-
- TABLE_LIST *table_list;
- for (table_list= *leaves;
- table_list;
- table_list= table_list->next_leaf, tablenr++)
- {
- TABLE *table= table_list->table;
- table->pos_in_table_list= table_list;
- if (first_select_table &&
- table_list->top_table() == first_select_table)
- {
- /* new counting for SELECT of INSERT ... SELECT command */
- first_select_table= 0;
- tablenr= 0;
+ SELECT_LEX *select_lex= select_insert ? &thd->lex->select_lex :
+ thd->lex->current_select;
+ if (select_lex->first_cond_optimization)
+ {
+ leaves.empty();
+ select_lex->leaf_tables_exec.empty();
+ make_leaves_list(leaves, tables, full_table_list, first_select_table);
+
+ while ((table_list= ti++))
+ {
+ TABLE *table= table_list->table;
+ table->pos_in_table_list= table_list;
+ if (first_select_table &&
+ table_list->top_table() == first_select_table)
+ {
+ /* new counting for SELECT of INSERT ... SELECT command */
+ first_select_table= 0;
+ thd->lex->select_lex.insert_tables= tablenr;
+ tablenr= 0;
+ }
+ setup_table_map(table, table_list, tablenr);
+ if (table_list->process_index_hints(table))
+ DBUG_RETURN(1);
+ tablenr++;
}
- setup_table_map(table, table_list, tablenr);
- if (table_list->process_index_hints(table))
+ if (tablenr > MAX_TABLES)
+ {
+ my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES);
DBUG_RETURN(1);
+ }
}
- if (tablenr > MAX_TABLES)
- {
- my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES);
- DBUG_RETURN(1);
- }
+ else
+ {
+ List_iterator_fast <TABLE_LIST> ti(select_lex->leaf_tables_exec);
+ select_lex->leaf_tables.empty();
+ while ((table_list= ti++))
+ {
+ table_list->table->tablenr= table_list->tablenr_exec;
+ table_list->table->map= table_list->map_exec;
+ table_list->table->pos_in_table_list= table_list;
+ select_lex->leaf_tables.push_back(table_list);
+ }
+ }
+
for (table_list= tables;
table_list;
table_list= table_list->next_local)
{
if (table_list->merge_underlying_list)
{
- DBUG_ASSERT(table_list->view &&
- table_list->effective_algorithm == VIEW_ALGORITHM_MERGE);
+ DBUG_ASSERT(table_list->is_merged_derived());
Query_arena *arena= thd->stmt_arena, backup;
bool res;
if (arena->is_conventional())
@@ -7675,7 +7721,7 @@ bool setup_tables(THD *thd, Name_resolut
prepare tables and check access for the view tables
SYNOPSIS
- setup_tables_and_check_view_access()
+ setup_tables_and_check_access()
thd Thread handler
context name resolution contest to setup table list there
from_clause Top-level list of table references in the FROM clause
@@ -7685,6 +7731,7 @@ bool setup_tables(THD *thd, Name_resolut
refresh It is onle refresh for subquery
select_insert It is SELECT ... INSERT command
want_access what access is needed
+ full_table_list a parameter to pass to the make_leaves_list function
NOTE
a wrapper for check_tables that will also check the resulting
@@ -7698,33 +7745,32 @@ bool setup_tables_and_check_access(THD *
Name_resolution_context *context,
List<TABLE_LIST> *from_clause,
TABLE_LIST *tables,
- TABLE_LIST **leaves,
+ List<TABLE_LIST> &leaves,
bool select_insert,
ulong want_access_first,
- ulong want_access)
+ ulong want_access,
+ bool full_table_list)
{
- TABLE_LIST *leaves_tmp= NULL;
bool first_table= true;
if (setup_tables(thd, context, from_clause, tables,
- &leaves_tmp, select_insert))
+ leaves, select_insert, full_table_list))
return TRUE;
- if (leaves)
- *leaves= leaves_tmp;
-
- for (; leaves_tmp; leaves_tmp= leaves_tmp->next_leaf)
+ List_iterator<TABLE_LIST> ti(leaves);
+ TABLE_LIST *table_list;
+ while((table_list= ti++))
{
- if (leaves_tmp->belong_to_view &&
+ if (table_list->belong_to_view &&
check_single_table_access(thd, first_table ? want_access_first :
- want_access, leaves_tmp, FALSE))
+ want_access, table_list, FALSE))
{
tables->hide_view_error(thd);
return TRUE;
}
first_table= 0;
}
- return FALSE;
+ return FALSE;
}
@@ -7860,8 +7906,8 @@ insert_fields(THD *thd, Name_resolution_
information_schema table, or a nested table reference. See the comment
for TABLE_LIST.
*/
- if (!((table && !tables->view && (table->grant.privilege & SELECT_ACL)) ||
- (tables->view && (tables->grant.privilege & SELECT_ACL))) &&
+ if (!(table && tables->is_non_derived() && (table->grant.privilege & SELECT_ACL) ||
+ (!tables->is_non_derived() && (tables->grant.privilege & SELECT_ACL))) &&
!any_privileges)
{
field_iterator.set(tables);
@@ -7891,7 +7937,7 @@ insert_fields(THD *thd, Name_resolution_
if (!(item= field_iterator.create_item(thd)))
DBUG_RETURN(TRUE);
- DBUG_ASSERT(item->fixed);
+// DBUG_ASSERT(item->fixed);
/* cache the table for the Item_fields inserted by expanding stars */
if (item->type() == Item::FIELD_ITEM && tables->cacheable_table)
((Item_field *)item)->cached_table= tables;
@@ -8021,13 +8067,14 @@ insert_fields(THD *thd, Name_resolution_
FALSE if all is OK
*/
-int setup_conds(THD *thd, TABLE_LIST *tables, TABLE_LIST *leaves,
+int setup_conds(THD *thd, TABLE_LIST *tables, List<TABLE_LIST> &leaves,
COND **conds)
{
SELECT_LEX *select_lex= thd->lex->current_select;
Query_arena *arena= thd->stmt_arena, backup;
TABLE_LIST *table= NULL; // For HP compilers
TABLE_LIST *save_emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
+ List_iterator<TABLE_LIST> ti(leaves);
/*
it_is_update set to TRUE when tables of primary SELECT_LEX (SELECT_LEX
which belong to LEX, i.e. most up SELECT) will be updated by
@@ -8039,9 +8086,15 @@ int setup_conds(THD *thd, TABLE_LIST *ta
bool it_is_update= (select_lex == &thd->lex->select_lex) &&
thd->lex->which_check_option_applicable();
bool save_is_item_list_lookup= select_lex->is_item_list_lookup;
- select_lex->is_item_list_lookup= 0;
+ TABLE_LIST *derived= select_lex->master_unit()->derived;
DBUG_ENTER("setup_conds");
+ /* Do not fix conditions for the derived tables that have been merged */
+ if (derived && derived->merged)
+ DBUG_RETURN(0);
+
+ select_lex->is_item_list_lookup= 0;
+
if (select_lex->conds_processed_with_permanent_arena ||
arena->is_conventional())
arena= 0; // For easier test
@@ -8054,7 +8107,10 @@ int setup_conds(THD *thd, TABLE_LIST *ta
for (table= tables; table; table= table->next_local)
{
- if (table->prepare_where(thd, conds, FALSE))
+ if (select_lex == &thd->lex->select_lex &&
+ select_lex->first_cond_optimization &&
+ table->merged_for_insert &&
+ table->prepare_where(thd, conds, FALSE))
goto err_no_arena;
}
@@ -8072,7 +8128,7 @@ int setup_conds(THD *thd, TABLE_LIST *ta
Apply fix_fields() to all ON clauses at all levels of nesting,
including the ones inside view definitions.
*/
- for (table= leaves; table; table= table->next_leaf)
+ while ((table= ti++))
{
TABLE_LIST *embedded; /* The table at the current level of nesting. */
TABLE_LIST *embedding= table; /* The parent nested table reference. */
@@ -9283,6 +9339,27 @@ void close_performance_schema_table(THD
thd->restore_backup_open_tables_state(backup);
}
+
+/**
+ @brief
+ Remove 'fixed' flag from items in a list
+
+ @param items list of items to un-fix
+
+ @details
+ This function sets to 0 the 'fixed' flag for items in the 'items' list.
+ It's needed to force correct marking of views' fields for INSERT/UPDATE
+ statements.
+*/
+
+void unfix_fields(List<Item> &fields)
+{
+ List_iterator<Item> li(fields);
+ Item *item;
+ while ((item= li++))
+ item->fixed= 0;
+}
+
/**
@} (end of group Data_Dictionary)
*/
=== modified file 'sql/sql_bitmap.h'
--- a/sql/sql_bitmap.h 2009-08-12 22:34:21 +0000
+++ b/sql/sql_bitmap.h 2010-05-26 20:18:18 +0000
@@ -91,6 +91,10 @@ public:
DBUG_ASSERT(sizeof(buffer) >= 4);
return (ulonglong) uint4korr(buffer);
}
+ uint bits_set()
+ {
+ return bitmap_bits_set(&map);
+ }
};
/* An iterator to quickly walk over bits in unlonglong bitmap. */
@@ -169,5 +173,16 @@ public:
public:
Iterator(Bitmap<64> &bmp) : Table_map_iterator(bmp.map) {}
};
+ uint bits_set()
+ {
+ //TODO: use my_count_bits()
+ uint res= 0, i= 0;
+ for (; i < 64 ; i++)
+ {
+ if (map & ((ulonglong)1<<i))
+ res++;
+ }
+ return res;
+ }
};
=== modified file 'sql/sql_cache.cc'
--- a/sql/sql_cache.cc 2010-01-29 10:42:31 +0000
+++ b/sql/sql_cache.cc 2010-05-26 20:18:18 +0000
@@ -3477,16 +3477,17 @@ Query_cache::process_and_count_tables(TH
}
else
{
- DBUG_PRINT("qcache", ("table: %s db: %s type: %u",
- tables_used->table->s->table_name.str,
- tables_used->table->s->db.str,
- tables_used->table->s->db_type()->db_type));
if (tables_used->derived)
{
+ DBUG_PRINT("qcache", ("table: %s", tables_used->alias));
table_count--;
DBUG_PRINT("qcache", ("derived table skipped"));
continue;
}
+ DBUG_PRINT("qcache", ("table: %s db: %s type: %u",
+ tables_used->table->s->table_name.str,
+ tables_used->table->s->db.str,
+ tables_used->table->s->db_type()->db_type));
*tables_type|= tables_used->table->file->table_cache_type();
/*
=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_class.cc 2010-05-26 20:18:18 +0000
@@ -771,6 +771,7 @@ THD::THD()
thr_lock_owner_init(&main_lock_id, &lock_info);
m_internal_handler= NULL;
+ prepare_derived_at_open= FALSE;
}
@@ -2946,7 +2947,8 @@ bool
select_materialize_with_stats::
create_result_table(THD *thd_arg, List<Item> *column_types,
bool is_union_distinct, ulonglong options,
- const char *table_alias, bool bit_fields_as_long)
+ const char *table_alias, bool bit_fields_as_long,
+ bool create_table)
{
DBUG_ASSERT(table == 0);
tmp_table_param.field_count= column_types->elements;
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2010-03-20 12:01:47 +0000
+++ b/sql/sql_class.h 2010-05-26 20:18:18 +0000
@@ -1474,6 +1474,9 @@ public:
*/
TABLE_LIST *emb_on_expr_nest;
} thd_marker;
+
+ bool prepare_derived_at_open;
+
#ifndef MYSQL_CLIENT
int binlog_setup_trx_data();
@@ -2810,12 +2813,12 @@ public:
class select_union :public select_result_interceptor
{
-protected:
- TMP_TABLE_PARAM tmp_table_param;
public:
+ TMP_TABLE_PARAM tmp_table_param;
TABLE *table;
+ ha_rows records;
- select_union() :table(0) { tmp_table_param.init(); }
+ select_union() :table(0), records(0) { tmp_table_param.init(); }
int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
bool send_data(List<Item> &items);
bool send_eof();
@@ -2823,7 +2826,9 @@ public:
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
- const char *alias, bool bit_fields_as_long);
+ const char *alias,
+ bool bit_fields_as_long,
+ bool create_table);
};
/* Base subselect interface class */
@@ -2885,9 +2890,11 @@ protected:
public:
select_materialize_with_stats() {}
- virtual bool create_result_table(THD *thd, List<Item> *column_types,
- bool is_distinct, ulonglong options,
- const char *alias, bool bit_fields_as_long);
+ bool create_result_table(THD *thd, List<Item> *column_types,
+ bool is_distinct, ulonglong options,
+ const char *alias,
+ bool bit_fields_as_long,
+ bool create_table);
bool init_result_table(ulonglong select_options);
bool send_data(List<Item> &items);
void cleanup()
@@ -3175,7 +3182,7 @@ public:
class multi_update :public select_result_interceptor
{
TABLE_LIST *all_tables; /* query/update command tables */
- TABLE_LIST *leaves; /* list of leves of join table tree */
+ List<TABLE_LIST> *leaves; /* list of leves of join table tree */
TABLE_LIST *update_tables, *table_being_updated;
TABLE **tmp_tables, *main_table, *table_to_update;
TMP_TABLE_PARAM *tmp_table_param;
@@ -3201,7 +3208,7 @@ class multi_update :public select_result
bool error_handled;
public:
- multi_update(TABLE_LIST *ut, TABLE_LIST *leaves_list,
+ multi_update(TABLE_LIST *ut, List<TABLE_LIST> *leaves_list,
List<Item> *fields, List<Item> *values,
enum_duplicates handle_duplicates, bool ignore);
~multi_update();
=== modified file 'sql/sql_cursor.cc'
--- a/sql/sql_cursor.cc 2010-02-17 21:59:41 +0000
+++ b/sql/sql_cursor.cc 2010-05-26 20:18:18 +0000
@@ -715,8 +715,8 @@ bool Select_materialize::send_fields(Lis
DBUG_ASSERT(table == 0);
if (create_result_table(unit->thd, unit->get_unit_column_types(),
FALSE, thd->options | TMP_TABLE_ALL_COLUMNS, "",
- FALSE))
- return TRUE;
+ FALSE, TRUE))
+ return TRUE;
materialized_cursor= new (&table->mem_root)
Materialized_cursor(result, table);
=== modified file 'sql/sql_delete.cc'
--- a/sql/sql_delete.cc 2010-03-10 13:55:40 +0000
+++ b/sql/sql_delete.cc 2010-05-26 20:18:18 +0000
@@ -58,10 +58,18 @@ bool mysql_delete(THD *thd, TABLE_LIST *
if (open_and_lock_tables(thd, table_list))
DBUG_RETURN(TRUE);
- if (!(table= table_list->table))
+
+ if (mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) ||
+ mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
+ DBUG_RETURN(TRUE);
+
+ if (!(table= table_list->table) || !table->created)
{
- my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
- table_list->view_db.str, table_list->view_name.str);
+ if (!table_list->updatable)
+ my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "DELETE");
+ else
+ my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
+ table_list->view_db.str, table_list->view_name.str);
DBUG_RETURN(TRUE);
}
thd_proc_info(thd, "init");
@@ -70,6 +78,11 @@ bool mysql_delete(THD *thd, TABLE_LIST *
if (mysql_prepare_delete(thd, table_list, &conds))
DBUG_RETURN(TRUE);
+ if (thd->lex->current_select->first_cond_optimization)
+ {
+ thd->lex->current_select->save_leaf_tables(thd);
+ thd->lex->current_select->first_cond_optimization= 0;
+ }
/* check ORDER BY even if it can be ignored */
if (order && order->elements)
{
@@ -384,6 +397,12 @@ cleanup:
query_cache_invalidate3(thd, table_list, 1);
}
+ if (thd->lex->current_select->first_cond_optimization)
+ {
+ thd->lex->current_select->save_leaf_tables(thd);
+ thd->lex->current_select->first_cond_optimization= 0;
+ }
+
delete select;
transactional_table= table->file->has_transactions();
@@ -481,8 +500,8 @@ int mysql_prepare_delete(THD *thd, TABLE
if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
&thd->lex->select_lex.top_join_list,
table_list,
- &select_lex->leaf_tables, FALSE,
- DELETE_ACL, SELECT_ACL) ||
+ select_lex->leaf_tables, FALSE,
+ DELETE_ACL, SELECT_ACL, TRUE) ||
setup_conds(thd, table_list, select_lex->leaf_tables, conds) ||
setup_ftfuncs(select_lex))
DBUG_RETURN(TRUE);
@@ -540,6 +559,11 @@ int mysql_multi_delete_prepare(THD *thd)
TABLE_LIST *target_tbl;
DBUG_ENTER("mysql_multi_delete_prepare");
+ TABLE_LIST *tables= lex->query_tables;
+ if (mysql_handle_derived(lex, DT_INIT) ||
+ mysql_handle_list_of_derived(lex, tables, DT_MERGE_FOR_INSERT) ||
+ mysql_handle_list_of_derived(lex, tables, DT_PREPARE))
+ DBUG_RETURN(TRUE);
/*
setup_tables() need for VIEWs. JOIN::prepare() will not do it second
time.
@@ -549,8 +573,8 @@ int mysql_multi_delete_prepare(THD *thd)
if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
&thd->lex->select_lex.top_join_list,
lex->query_tables,
- &lex->select_lex.leaf_tables, FALSE,
- DELETE_ACL, SELECT_ACL))
+ lex->select_lex.leaf_tables, FALSE,
+ DELETE_ACL, SELECT_ACL, TRUE))
DBUG_RETURN(TRUE);
@@ -564,16 +588,13 @@ int mysql_multi_delete_prepare(THD *thd)
target_tbl;
target_tbl= target_tbl->next_local)
{
+
if (!(target_tbl->table= target_tbl->correspondent_table->table))
{
- DBUG_ASSERT(target_tbl->correspondent_table->view &&
- target_tbl->correspondent_table->merge_underlying_list &&
- target_tbl->correspondent_table->merge_underlying_list->
- next_local);
- my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
- target_tbl->correspondent_table->view_db.str,
- target_tbl->correspondent_table->view_name.str);
- DBUG_RETURN(TRUE);
+ my_error(ER_VIEW_DELETE_MERGE_VIEW, MYF(0),
+ target_tbl->correspondent_table->view_db.str,
+ target_tbl->correspondent_table->view_name.str);
+ DBUG_RETURN(TRUE);
}
if (!target_tbl->correspondent_table->updatable ||
@@ -623,6 +644,12 @@ multi_delete::prepare(List<Item> &values
unit= u;
do_delete= 1;
thd_proc_info(thd, "deleting from main table");
+ SELECT_LEX *select_lex= u->first_select();
+ if (select_lex->first_cond_optimization)
+ {
+ if (select_lex->handle_derived(thd->lex, DT_MERGE))
+ DBUG_RETURN(TRUE);
+ }
DBUG_RETURN(0);
}
=== modified file 'sql/sql_derived.cc'
--- a/sql/sql_derived.cc 2010-02-17 21:59:41 +0000
+++ b/sql/sql_derived.cc 2010-05-26 20:18:18 +0000
@@ -23,38 +23,79 @@
#include "mysql_priv.h"
#include "sql_select.h"
+typedef bool (*dt_processor)(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived);
+bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived);
+
+
+dt_processor processors[]=
+{
+ &mysql_derived_init,
+ &mysql_derived_prepare,
+ &mysql_derived_optimize,
+ &mysql_derived_merge,
+ &mysql_derived_merge_for_insert,
+ &mysql_derived_create,
+ &mysql_derived_fill,
+ &mysql_derived_reinit,
+};
/*
- Call given derived table processor (preparing or filling tables)
+ @brief
+ Run specified phases on all derived tables/views in given LEX.
- SYNOPSIS
- mysql_handle_derived()
- lex LEX for this thread
- processor procedure of derived table processing
-
- RETURN
- FALSE OK
- TRUE Error
-*/
+ @param lex LEX for this thread
+ @param phases phases to run derived tables/views through
+ @return FALSE OK
+ @return TRUE Error
+*/
bool
-mysql_handle_derived(LEX *lex, bool (*processor)(THD*, LEX*, TABLE_LIST*))
+mysql_handle_derived(LEX *lex, uint phases)
{
bool res= FALSE;
- if (lex->derived_tables)
+ THD *thd= lex->thd;
+ if (!lex->derived_tables)
+ return FALSE;
+
+ lex->thd->derived_tables_processing= TRUE;
+
+ for (uint phase= 0; phase < DT_PHASES && !res; phase++)
{
- lex->thd->derived_tables_processing= TRUE;
+ uint phase_flag= DT_INIT << phase;
+ if (phase_flag > phases)
+ break;
+ if (!(phases & phase_flag))
+ continue;
+ if (phase_flag >= DT_CREATE && !thd->fill_derived_tables())
+ break;
+
for (SELECT_LEX *sl= lex->all_selects_list;
- sl;
+ sl && !res;
sl= sl->next_select_in_list())
{
for (TABLE_LIST *cursor= sl->get_table_list();
- cursor;
+ cursor && !res;
cursor= cursor->next_local)
{
- if ((res= (*processor)(lex->thd, lex, cursor)))
- goto out;
+ uint8 allowed_phases= (cursor->is_merged_derived() ? DT_PHASES_MERGE :
+ DT_PHASES_MATERIALIZE);
+ /*
+ Skip derived tables to which the phase isn't applicable.
+ TODO: mark derived at the parse time, later set it's type
+ (merged or materialized)
+ */
+ if ((phase_flag != DT_PREPARE && !(allowed_phases & phase_flag)) ||
+ (cursor->merged_for_insert && phase_flag != DT_REINIT))
+ continue;
+ res= (*processors[phase])(lex->thd, lex, cursor);
}
if (lex->describe)
{
@@ -67,30 +108,454 @@ mysql_handle_derived(LEX *lex, bool (*pr
}
}
}
-out:
+ lex->thd->derived_tables_processing= FALSE;
+ return res;
+}
+
+/*
+ @brief
+ Run through phases for the given derived table/view.
+
+ @param lex LEX for this thread
+ @param derived the derived table to handle
+ @param phase_map phases to process tables/views through
+
+ @details
+
+ This function process the derived table (view) 'derived' to performs all
+ actions that are to be done on the table at the phases specified by
+ phase_map. The processing is carried out starting from the actions
+ performed at the earlier phases (those having smaller ordinal numbers).
+
+ @note
+ This function runs specified phases of the derived tables handling on the
+ given derived table/view. This function is used in the chain of calls:
+ SELECT_LEX::handle_derived ->
+ TABLE_LIST::handle_derived ->
+ mysql_handle_single_derived
+ This chain of calls implements the bottom-up handling of the derived tables:
+ i.e. most inner derived tables/views are handled first. This order is
+ required for the all phases except the merge and the create steps.
+ For the sake of code simplicity this order is kept for all phases.
+
+ @return FALSE ok
+ @return TRUE error
+*/
+
+bool
+mysql_handle_single_derived(LEX *lex, TABLE_LIST *derived, uint phases)
+{
+ bool res= FALSE;
+ THD *thd= lex->thd;
+ uint8 allowed_phases= (derived->is_merged_derived() ? DT_PHASES_MERGE :
+ DT_PHASES_MATERIALIZE);
+ if (!lex->derived_tables)
+ return FALSE;
+
+ lex->thd->derived_tables_processing= TRUE;
+
+ for (uint phase= 0; phase < DT_PHASES; phase++)
+ {
+ uint phase_flag= DT_INIT << phase;
+ if (phase_flag > phases)
+ break;
+ if (!(phases & phase_flag))
+ continue;
+ /* Skip derived tables to which the phase isn't applicable. */
+ if (phase_flag != DT_PREPARE &&
+ !(allowed_phases & phase_flag))
+ continue;
+ if (phase_flag >= DT_CREATE && !thd->fill_derived_tables())
+ break;
+
+ if ((res= (*processors[phase])(lex->thd, lex, derived)))
+ break;
+ }
lex->thd->derived_tables_processing= FALSE;
return res;
}
/**
- @brief Create temporary table structure (but do not fill it).
+ @brief
+ Run specified phases for derived tables/views in the given list
+
+ @param lex LEX for this thread
+ @param table_list list of derived tables/view to handle
+ @param phase_map phases to process tables/views through
+
+ @details
+ This function runs phases specified by the 'phases_map' on derived
+ tables/views found in the 'dt_list' with help of the
+ TABLE_LIST::handle_derived function.
+ 'lex' is passed as an argument to the TABLE_LIST::handle_derived.
+
+ @return FALSE ok
+ @return TRUE error
+*/
+
+bool
+mysql_handle_list_of_derived(LEX *lex, TABLE_LIST *table_list, uint phases)
+{
+ for (TABLE_LIST *tl= table_list; tl; tl= tl->next_local)
+ {
+ if (tl->is_view_or_derived() &&
+ tl->handle_derived(lex, phases))
+ return TRUE;
+ }
+ return FALSE;
+}
- @param thd Thread handle
- @param lex LEX for this thread
- @param orig_table_list TABLE_LIST for the upper SELECT
- @details
+/**
+ @brief
+ Merge a derived table/view into the embedding select
- This function is called before any command containing derived tables is
- executed. Currently the function is used for derived tables, i.e.
+ @param thd thread handle
+ @param lex LEX of the embedding query.
+ @param derived reference to the derived table.
+
+ @details
+ This function merges the given derived table / view into the parent select
+ construction. Any derived table/reference to view occurred in the FROM
+ clause of the embedding select is represented by a TABLE_LIST structure a
+ pointer to which is passed to the function as in the parameter 'derived'.
+ This structure contains the number/map, alias, a link to SELECT_LEX of the
+ derived table and other info. If the 'derived' table is used in a nested join
+ then additionally the structure contains a reference to the ON expression
+ for this join.
+
+ The merge process results in elimination of the derived table (or the
+ reference to a view) such that:
+ - the FROM list of the derived table/view is wrapped into a nested join
+ after which the nest is added to the FROM list of the embedding select
+ - the WHERE condition of the derived table (view) is ANDed with the ON
+ condition attached to the table.
+
+ @note
+ Tables are merged into the leaf_tables list, original derived table is removed
+ from this list also. SELECT_LEX::table_list list is left untouched.
+ Where expression is merged with derived table's on_expr and can be found after
+ the merge through the SELECT_LEX::table_list.
+
+ Examples of the derived table/view merge:
+
+ Schema:
+ Tables: t1(f1), t2(f2), t3(f3)
+ View v1: SELECT f1 FROM t1 WHERE f1 < 1
+
+ Example with a view:
+ Before merge:
+
+ The query (Q1): SELECT f1,f2 FROM t2 LEFT JOIN v1 ON f1 = f2
+
+ (LEX of the main query)
+ |
+ (select_lex)
+ |
+ (FROM table list)
+ |
+ (join list)= t2, v1
+ / \
+ / (on_expr)= (f1 = f2)
+ |
+ (LEX of the v1 view)
+ |
+ (select_lex)= SELECT f1 FROM t1 WHERE f1 < 1
+
+
+ After merge:
+
+ The rewritten query Q1 (Q1'):
+ SELECT f1,f2 FROM t2 LEFT JOIN (t1) ON ((f1 = f2) and (f1 < 1))
+
+ (LEX of the main query)
+ |
+ (select_lex)
+ |
+ (FROM table list)
+ |
+ (join list)= t2, (t1)
+ \
+ (on_expr)= (f1 = f2) and (f1 < 1)
+
+ In this example table numbers are assigned as follows:
+ (outer select): t2 - 1, v1 - 2
+ (inner select): t1 - 1
+ After the merge table numbers will be:
+ (outer select): t2 - 1, t1 - 2
+
+ Example with a derived table:
+ The query Q2:
+ SELECT f1,f2
+ FROM (SELECT f1 FROM t1, t3 WHERE f1=f3 and f1 < 1) tt, t2
+ WHERE f1 = f2
+
+ Before merge:
+ (LEX of the main query)
+ |
+ (select_lex)
+ / \
+ (FROM table list) (WHERE clause)= (f1 = f2)
+ |
+ (join list)= tt, t2
+ / \
+ / (on_expr)= (empty)
+ /
+ (select_lex)= SELECT f1 FROM t1, t3 WHERE f1 = f3 and f1 < 1
+
+ After merge:
+
+ The rewritten query Q2 (Q2'):
+ SELECT f1,f2
+ FROM (t1, t3) JOIN t2 ON (f1 = f3 and f1 < 1)
+ WHERE f1 = f2
+
+ (LEX of the main query)
+ |
+ (select_lex)
+ / \
+ (FROM table list) (WHERE clause)= (f1 = f2)
+ |
+ (join list)= t2, (t1, t3)
+ \
+ (on_expr)= (f1 = f3 and f1 < 1)
+
+ In this example table numbers are assigned as follows:
+ (outer select): tt - 1, t2 - 2
+ (inner select): t1 - 1, t3 - 2
+ After the merge table numbers will be:
+ (outer select): t1 - 1, t2 - 2, t3 - 3
- - Anonymous derived tables, or
- - Named derived tables (aka views) with the @c TEMPTABLE algorithm.
-
- The table reference, contained in @c orig_table_list, is updated with the
- fields of a new temporary table.
+ @return FALSE if derived table/view were successfully merged.
+ @return TRUE if an error occur.
+*/
+
+bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+ bool res= FALSE;
+ SELECT_LEX *dt_select= derived->get_single_select();
+ table_map map;
+ uint tablenr;
+ SELECT_LEX *parent_lex= derived->select_lex;
+ Query_arena *arena, backup;
+
+ if (derived->merged)
+ return FALSE;
+
+ arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test
+ derived->merged= TRUE;
+ /*
+ Check whether there is enough free bits in table map to merge subquery.
+ If not - materialize it. This check isn't cached so when there is a big
+ and small subqueries, and the bigger one can't be merged it wouldn't
+ block the smaller one.
+ */
+ if (parent_lex->get_free_table_map(&map, &tablenr))
+ {
+ /* There is no enough table bits, fall back to materialization. */
+ derived->change_refs_to_fields();
+ derived->set_materialized_derived();
+ goto exit_merge;
+ }
+
+ if (dt_select->leaf_tables.elements + tablenr > MAX_TABLES)
+ {
+ /* There is no enough table bits, fall back to materialization. */
+ derived->change_refs_to_fields();
+ derived->set_materialized_derived();
+ goto exit_merge;
+ }
+
+ if (dt_select->options & OPTION_SCHEMA_TABLE)
+ parent_lex->options |= OPTION_SCHEMA_TABLE;
+
+ parent_lex->cond_count+= dt_select->cond_count;
+ if (!derived->get_unit()->prepared)
+ {
+ dt_select->leaf_tables.empty();
+ make_leaves_list(dt_select->leaf_tables, derived, TRUE, 0);
+ }
+
+ if (!derived->merged_for_insert)
+ { derived->nested_join= (NESTED_JOIN*) thd->calloc(sizeof(NESTED_JOIN));
+ if (!derived->nested_join)
+ {
+ res= TRUE;
+ goto exit_merge;
+ }
+
+ /* Merge derived table's subquery in the parent select. */
+ if (parent_lex->merge_subquery(derived, dt_select, tablenr, map))
+ {
+ res= TRUE;
+ goto exit_merge;
+ }
+
+ /*
+ exclude select lex so it doesn't show up in explain.
+ do this only for derived table as for views this is already done.
+
+ From sql_view.cc
+ Add subqueries units to SELECT into which we merging current view.
+ unit(->next)* chain starts with subqueries that are used by this
+ view and continues with subqueries that are used by other views.
+ We must not add any subquery twice (otherwise we'll form a loop),
+ to do this we remember in end_unit the first subquery that has
+ been already added.
+ */
+ derived->get_unit()->exclude_level();
+ if (parent_lex->join)
+ parent_lex->join->tables+= dt_select->join->tables - 1;
+ }
+ if (derived->get_unit()->prepared)
+ {
+ Item *expr= derived->on_expr;
+ expr= and_conds(expr, dt_select->join ? dt_select->join->conds : 0);
+ if (expr && (derived->prep_on_expr || expr != derived->on_expr))
+ {
+ derived->on_expr= expr;
+ derived->prep_on_expr= expr->copy_andor_structure(thd);
+ }
+ if (derived->on_expr &&
+ ((!derived->on_expr->fixed &&
+ derived->on_expr->fix_fields(thd, &derived->on_expr)) ||
+ derived->on_expr->check_cols(1)))
+ {
+ res= TRUE; /* purecov: inspected */
+ goto exit_merge;
+ }
+ // Update used tables cache according to new table map
+ if (derived->on_expr)
+ derived->on_expr->update_used_tables();
+ }
+
+exit_merge:
+ if (arena)
+ thd->restore_active_arena(arena, &backup);
+ return res;
+}
+
+
+/**
+ @brief
+ Merge a view for the embedding INSERT/UPDATE/DELETE
+
+ @param thd thread handle
+ @param lex LEX of the embedding query.
+ @param derived reference to the derived table.
+
+ @details
+ This function substitutes the derived table for the first table from
+ the query of the derived table thus making it a correct target table for the
+ INSERT/UPDATE/DELETE statements. As this operation is correct only for
+ single table views only, for multi table views this function does nothing.
+ The derived parameter isn't checked to be a view as derived tables aren't
+ allowed for INSERT/UPDATE/DELETE statements.
+
+ @return FALSE if derived table/view were successfully merged.
+ @return TRUE if an error occur.
+*/
+
+bool mysql_derived_merge_for_insert(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+ SELECT_LEX *dt_select= derived->get_single_select();
+
+ if (derived->merged_for_insert)
+ return FALSE;
+ if (!derived->is_multitable())
+ {
+ TABLE_LIST *tl=((TABLE_LIST*)dt_select->table_list.first);
+ TABLE *table= tl->table;
+ /* preserve old map & tablenr. */
+ if (!derived->merged_for_insert && derived->table)
+ table->set_table_map(derived->table->map, derived->table->tablenr);
+
+ derived->table= table;
+ derived->schema_table=
+ ((TABLE_LIST*)dt_select->table_list.first)->schema_table;
+ if (!derived->merged)
+ {
+ Query_arena *arena, backup;
+ arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test
+ derived->select_lex->leaf_tables.push_back(tl);
+ derived->nested_join= (NESTED_JOIN*) thd->calloc(sizeof(NESTED_JOIN));
+ if (derived->nested_join)
+ {
+ derived->wrap_into_nested_join(tl->select_lex->top_join_list);
+ derived->get_unit()->exclude_level();
+ }
+ if (arena)
+ thd->restore_active_arena(arena, &backup);
+ derived->merged= TRUE;
+ if (!derived->nested_join)
+ return TRUE;
+ }
+ }
+ else
+ {
+ if (!derived->merged_for_insert && mysql_derived_merge(thd, lex, derived))
+ return TRUE;
+ }
+ derived->merged_for_insert= TRUE;
+
+ return FALSE;
+}
+
+
+/*
+ @brief
+ Initialize a derived table/view
+
+ @param thd Thread handle
+ @param lex LEX of the embedding query.
+ @param derived reference to the derived table.
+
+ @detail
+ Fill info about derived table/view without preparing an
+ underlying select. Such as: create a field translation for views, mark it as
+ a multitable if it is and so on.
+
+ @return
+ false OK
+ true Error
+*/
+
+
+bool mysql_derived_init(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+ SELECT_LEX_UNIT *unit= derived->get_unit();
+ DBUG_ENTER("mysql_derived_init");
+
+ // Skip already prepared views/DT
+ if (!unit || unit->prepared)
+ DBUG_RETURN(FALSE);
+
+ DBUG_RETURN(derived->init_derived(thd, TRUE));
+}
+
+
+/*
+ @brief
+ Create temporary table structure (but do not fill it)
+
+ @param thd Thread handle
+ @param lex LEX of the embedding query.
+ @param derived reference to the derived table.
+
+ @detail
+ Prepare underlying select for a derived table/view. To properly resolve
+ names in the embedding query the TABLE structure is created. Actual table
+ is created later by the mysql_derived_create function.
+
+ This function is called before any command containing derived table
+ is executed. All types of derived tables are handled by this function:
+ - Anonymous derived tables, or
+ - Named derived tables (aka views).
+
+ The table reference, contained in @c derived, is updated with the
+ fields of a new temporary table.
Derived tables are stored in @c thd->derived_tables and closed by
close_thread_tables().
@@ -114,202 +579,359 @@ out:
the state of privilege checking (GRANT_INFO struct) is copied as-is to the
temporary table.
- This function implements a signature called "derived table processor", and
- is passed as a function pointer to mysql_handle_derived().
+ Only the TABLE structure is created here, actual table is created by the
+ mysql_derived_create function.
@note This function sets @c SELECT_ACL for @c TEMPTABLE views as well as
anonymous derived tables, but this is ok since later access checking will
distinguish between them.
- @see mysql_handle_derived(), mysql_derived_filling(), GRANT_INFO
+ @see mysql_handle_derived(), mysql_derived_fill(), GRANT_INFO
@return
false OK
true Error
*/
-bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *orig_table_list)
+bool mysql_derived_prepare(THD *thd, LEX *lex, TABLE_LIST *derived)
{
- SELECT_LEX_UNIT *unit= orig_table_list->derived;
- ulonglong create_options;
+ SELECT_LEX_UNIT *unit= derived->get_unit();
DBUG_ENTER("mysql_derived_prepare");
bool res= FALSE;
- if (unit)
- {
- SELECT_LEX *first_select= unit->first_select();
- TABLE *table= 0;
- select_union *derived_result;
- /* prevent name resolving out of derived table */
- for (SELECT_LEX *sl= first_select; sl; sl= sl->next_select())
- sl->context.outer_context= 0;
+ // Skip already prepared views/DT
+ if (!unit || unit->prepared)
+ DBUG_RETURN(FALSE);
- if (!(derived_result= new select_union))
- DBUG_RETURN(TRUE); // out of memory
+ /* It's a target view for an INSERT, create field translation only. */
+ if (derived->merged_for_insert)
+ {
+ res= derived->create_field_translation(thd);
+ DBUG_RETURN(res);
+ }
- // st_select_lex_unit::prepare correctly work for single select
- if ((res= unit->prepare(thd, derived_result, 0)))
- goto exit;
+ Query_arena *arena= thd->stmt_arena, backup;
+ if (arena->is_conventional())
+ arena= 0; // For easier test
+ else
+ thd->set_n_backup_active_arena(arena, &backup);
- if ((res= check_duplicate_names(unit->types, 0)))
- goto exit;
+ SELECT_LEX *first_select= unit->first_select();
- create_options= (first_select->options | thd->options |
- TMP_TABLE_ALL_COLUMNS);
- /*
- Temp table is created so that it hounours if UNION without ALL is to be
- processed
+ /* prevent name resolving out of derived table */
+ for (SELECT_LEX *sl= first_select; sl; sl= sl->next_select())
+ {
+ sl->context.outer_context= 0;
+ // Prepare underlying views/DT first.
+ sl->handle_derived(lex, DT_PREPARE);
+ }
- As 'distinct' parameter we always pass FALSE (0), because underlying
- query will control distinct condition by itself. Correct test of
- distinct underlying query will be is_union &&
- !unit->union_distinct->next_select() (i.e. it is union and last distinct
- SELECT is last SELECT of UNION).
- */
- if ((res= derived_result->create_result_table(thd, &unit->types, FALSE,
- create_options,
- orig_table_list->alias,
- FALSE)))
- goto exit;
+ unit->derived= derived;
+
+ if (!(derived->derived_result= new select_union))
+ DBUG_RETURN(TRUE); // out of memory
- table= derived_result->table;
+ // st_select_lex_unit::prepare correctly work for single select
+ if ((res= unit->prepare(thd, derived->derived_result, 0)))
+ goto exit;
+
+ if ((res= check_duplicate_names(unit->types, 0)))
+ goto exit;
+
+ /*
+ Check whether we can merge this derived table into main select.
+ Depending on the result field translation will or will not
+ be created.
+ */
+ if (derived->init_derived(thd, FALSE))
+ goto exit;
+
+ /*
+ Temp table is created so that it hounours if UNION without ALL is to be
+ processed
+
+ As 'distinct' parameter we always pass FALSE (0), because underlying
+ query will control distinct condition by itself. Correct test of
+ distinct underlying query will be is_union &&
+ !unit->union_distinct->next_select() (i.e. it is union and last distinct
+ SELECT is last SELECT of UNION).
+ */
+ if (derived->derived_result->create_result_table(thd, &unit->types, FALSE,
+ (first_select->options |
+ thd->options |
+ TMP_TABLE_ALL_COLUMNS),
+ derived->alias,
+ FALSE, FALSE))
+ goto exit;
+
+ derived->table= derived->derived_result->table;
+ if (derived->is_derived() && derived->is_merged_derived())
+ first_select->mark_as_belong_to_derived(derived);
exit:
- /* Hide "Unknown column" or "Unknown function" error */
- if (orig_table_list->view)
- {
- if (thd->is_error() &&
+ /* Hide "Unknown column" or "Unknown function" error */
+ if (derived->view)
+ {
+ if (thd->is_error() &&
(thd->main_da.sql_errno() == ER_BAD_FIELD_ERROR ||
- thd->main_da.sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION ||
- thd->main_da.sql_errno() == ER_SP_DOES_NOT_EXIST))
- {
- thd->clear_error();
- my_error(ER_VIEW_INVALID, MYF(0), orig_table_list->db,
- orig_table_list->table_name);
- }
- }
-
- /*
- if it is preparation PS only or commands that need only VIEW structure
- then we do not need real data and we can skip execution (and parameters
- is not defined, too)
- */
- if (res)
+ thd->main_da.sql_errno() == ER_FUNC_INEXISTENT_NAME_COLLISION ||
+ thd->main_da.sql_errno() == ER_SP_DOES_NOT_EXIST))
{
- if (table)
- free_tmp_table(thd, table);
- delete derived_result;
+ thd->clear_error();
+ my_error(ER_VIEW_INVALID, MYF(0), derived->db,
+ derived->table_name);
}
+ }
+
+ /*
+ if it is preparation PS only or commands that need only VIEW structure
+ then we do not need real data and we can skip execution (and parameters
+ is not defined, too)
+ */
+ if (res)
+ {
+ if (derived->table)
+ free_tmp_table(thd, derived->table);
+ delete derived->derived_result;
+ }
+ else
+ {
+ TABLE *table= derived->table;
+ table->derived_select_number= first_select->select_number;
+ table->s->tmp_table= NON_TRANSACTIONAL_TMP_TABLE;
+#ifndef NO_EMBEDDED_ACCESS_CHECKS
+ if (derived->referencing_view)
+ table->grant= derived->grant;
else
{
- if (!thd->fill_derived_tables())
- {
- delete derived_result;
- derived_result= NULL;
- }
- orig_table_list->derived_result= derived_result;
- orig_table_list->table= table;
- orig_table_list->table_name= table->s->table_name.str;
- orig_table_list->table_name_length= table->s->table_name.length;
- table->derived_select_number= first_select->select_number;
- table->s->tmp_table= NON_TRANSACTIONAL_TMP_TABLE;
-#ifndef NO_EMBEDDED_ACCESS_CHECKS
- if (orig_table_list->referencing_view)
- table->grant= orig_table_list->grant;
- else
- table->grant.privilege= SELECT_ACL;
-#endif
- orig_table_list->db= (char *)"";
- orig_table_list->db_length= 0;
- // Force read of table stats in the optimizer
- table->file->info(HA_STATUS_VARIABLE);
- /* Add new temporary table to list of open derived tables */
- table->next= thd->derived_tables;
- thd->derived_tables= table;
+ table->grant.privilege= SELECT_ACL;
+ if (derived->is_derived())
+ derived->grant.privilege= SELECT_ACL;
}
+#endif
+ /* Add new temporary table to list of open derived tables */
+ table->next= thd->derived_tables;
+ thd->derived_tables= table;
}
- else if (orig_table_list->merge_underlying_list)
- orig_table_list->set_underlying_merge();
+ if (arena)
+ thd->restore_active_arena(arena, &backup);
DBUG_RETURN(res);
}
-/*
- fill derived table
+/**
+ @brief
+ Runs optimize phase for a derived table/view.
+
+ @param thd thread handle
+ @param lex LEX of the embedding query.
+ @param derived reference to the derived table.
+
+ @details
+ Runs optimize phase for given 'derived' derived table/view.
+ If optimizer finds out that it's of the type "SELECT a_constant" then this
+ functions also materializes it.
- SYNOPSIS
- mysql_derived_filling()
- thd Thread handle
- lex LEX for this thread
- unit node that contains all SELECT's for derived tables
- orig_table_list TABLE_LIST for the upper SELECT
-
- IMPLEMENTATION
- Derived table is resolved with temporary table. It is created based on the
- queries defined. After temporary table is filled, if this is not EXPLAIN,
- then the entire unit / node is deleted. unit is deleted if UNION is used
- for derived table and node is deleted is it is a simple SELECT.
- If you use this function, make sure it's not called at prepare.
- Due to evaluation of LIMIT clause it can not be used at prepared stage.
-
- RETURN
- FALSE OK
- TRUE Error
+ @return FALSE ok.
+ @return TRUE if an error occur.
*/
-bool mysql_derived_filling(THD *thd, LEX *lex, TABLE_LIST *orig_table_list)
+bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived)
{
- TABLE *table= orig_table_list->table;
- SELECT_LEX_UNIT *unit= orig_table_list->derived;
+ SELECT_LEX_UNIT *unit= derived->get_unit();
+ SELECT_LEX *first_select= unit->first_select();
+ SELECT_LEX *save_current_select= lex->current_select;
+
bool res= FALSE;
- /*check that table creation pass without problem and it is derived table */
- if (table && unit)
+ if (unit->optimized && !unit->uncacheable && !unit->describe)
+ return FALSE;
+ lex->current_select= first_select;
+
+ if (unit->is_union())
+ {
+ // optimize union without execution
+ res= unit->optimize();
+ }
+ else if (unit->derived)
{
- SELECT_LEX *first_select= unit->first_select();
- select_union *derived_result= orig_table_list->derived_result;
- SELECT_LEX *save_current_select= lex->current_select;
- if (unit->is_union())
+ if (!derived->is_merged_derived())
{
- // execute union without clean up
- res= unit->exec();
+ unit->optimized= TRUE;
+ if ((res= first_select->join->optimize()))
+ goto err;
}
- else
+ }
+ /*
+ Materialize derived tables/views of the "SELECT a_constant" type.
+ Such tables should be materialized at the optimization phase for
+ correct constant evaluation.
+ */
+ if (!res && derived->fill_me && !derived->merged_for_insert)
+ {
+ if (derived->is_merged_derived())
{
- unit->set_limit(first_select);
- if (unit->select_limit_cnt == HA_POS_ERROR)
- first_select->options&= ~OPTION_FOUND_ROWS;
-
- lex->current_select= first_select;
- res= mysql_select(thd, &first_select->ref_pointer_array,
- (TABLE_LIST*) first_select->table_list.first,
- first_select->with_wild,
- first_select->item_list, first_select->where,
- (first_select->order_list.elements+
- first_select->group_list.elements),
- (ORDER *) first_select->order_list.first,
- (ORDER *) first_select->group_list.first,
- first_select->having, (ORDER*) NULL,
- (first_select->options | thd->options |
- SELECT_NO_UNLOCK),
- derived_result, unit, first_select);
+ derived->change_refs_to_fields();
+ derived->set_materialized_derived();
}
+ if ((res= mysql_derived_create(thd, lex, derived)))
+ goto err;
+ if ((res= mysql_derived_fill(thd, lex, derived)))
+ goto err;
+ }
+err:
+ lex->current_select= save_current_select;
+ return res;
+}
- if (!res)
- {
- /*
- Here we entirely fix both TABLE_LIST and list of SELECT's as
- there were no derived tables
- */
- if (derived_result->flush())
- res= TRUE;
- if (!lex->describe)
- unit->cleanup();
- }
- else
- unit->cleanup();
- lex->current_select= save_current_select;
+/**
+ @brief
+ Actually create result table for a materialized derived table/view.
+
+ @param thd thread handle
+ @param lex LEX of the embedding query.
+ @param derived reference to the derived table.
+
+ @details
+ This function actually creates the result table for given 'derived'
+ table/view, but it doesn't fill it.
+ 'thd' and 'lex' parameters are not used by this function.
+
+ @return FALSE ok.
+ @return TRUE if an error occur.
+*/
+
+bool mysql_derived_create(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+ TABLE *table= derived->table;
+ SELECT_LEX_UNIT *unit= derived->get_unit();
+
+ if (table->created)
+ return FALSE;
+ select_union *result= (select_union*)unit->result;
+ if (table->s->db_type() == TMP_ENGINE_HTON)
+ {
+ if (create_internal_tmp_table(table, result->tmp_table_param.keyinfo,
+ result->tmp_table_param.start_recinfo,
+ &result->tmp_table_param.recinfo,
+ (unit->first_select()->options |
+ thd->options | TMP_TABLE_ALL_COLUMNS)))
+ return(TRUE);
+ }
+ if (open_tmp_table(table))
+ return TRUE;
+ table->file->extra(HA_EXTRA_WRITE_CACHE);
+ table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+ return FALSE;
+}
+
+
+/*
+ @brief
+ Execute subquery of a materialized derived table/view and fill the result
+ table.
+
+ @param thd Thread handle
+ @param lex LEX for this thread
+ @param derived reference to the derived table.
+
+ @details
+ Execute subquery of given 'derived' table/view and fill the result
+ table. After result table is filled, if this is not the EXPLAIN statement,
+ the entire unit / node is deleted. unit is deleted if UNION is used
+ for derived table and node is deleted is it is a simple SELECT.
+ 'lex' is unused and 'thd' is passed as an argument to an underlying function.
+
+ @note
+ If you use this function, make sure it's not called at prepare.
+ Due to evaluation of LIMIT clause it can not be used at prepared stage.
+
+ @return FALSE OK
+ @return TRUE Error
+*/
+
+bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+ TABLE *table= derived->table;
+ SELECT_LEX_UNIT *unit= derived->get_unit();
+ bool res= FALSE;
+
+ if (unit->executed && !unit->uncacheable && !unit->describe)
+ return FALSE;
+ /*check that table creation passed without problems. */
+ DBUG_ASSERT(table && table->created);
+ SELECT_LEX *first_select= unit->first_select();
+ select_union *derived_result= derived->derived_result;
+ SELECT_LEX *save_current_select= lex->current_select;
+ if (unit->is_union())
+ {
+ // execute union without clean up
+ res= unit->exec();
+ }
+ else
+ {
+ unit->set_limit(first_select);
+ if (unit->select_limit_cnt == HA_POS_ERROR)
+ first_select->options&= ~OPTION_FOUND_ROWS;
+
+ lex->current_select= first_select;
+ res= mysql_select(thd, &first_select->ref_pointer_array,
+ (TABLE_LIST*) first_select->table_list.first,
+ first_select->with_wild,
+ first_select->item_list, first_select->where,
+ (first_select->order_list.elements+
+ first_select->group_list.elements),
+ (ORDER *) first_select->order_list.first,
+ (ORDER *) first_select->group_list.first,
+ first_select->having, (ORDER*) NULL,
+ (first_select->options | thd->options |
+ SELECT_NO_UNLOCK),
+ derived_result, unit, first_select);
+ }
+
+ if (!res)
+ {
+ if (derived_result->flush())
+ res= TRUE;
+ unit->executed= TRUE;
}
+ if (res || !lex->describe)
+ unit->cleanup();
+ lex->current_select= save_current_select;
+
return res;
}
+
+
+/**
+ @brief
+ Re-initialize given derived table/view for the next execution.
+
+ @param thd thread handle
+ @param lex LEX for this thread
+ @param derived reference to the derived table.
+
+ @details
+ Re-initialize given 'derived' table/view for the next execution.
+ All underlying views/derived tables are recursively reinitialized prior
+ to re-initialization of given derived table.
+ 'thd' and 'lex' are passed as arguments to called functions.
+
+ @return FALSE OK
+ @return TRUE Error
+*/
+
+bool mysql_derived_reinit(THD *thd, LEX *lex, TABLE_LIST *derived)
+{
+ st_select_lex_unit *unit= derived->get_unit();
+
+ if (derived->table)
+ derived->merged_for_insert= FALSE;
+ unit->unclean();
+ unit->types.empty();
+ /* for derived tables & PS (which can't be reset by Item_subquery) */
+ unit->reinit_exec_mechanism();
+ unit->set_thd(thd);
+ return FALSE;
+}
=== modified file 'sql/sql_help.cc'
--- a/sql/sql_help.cc 2009-10-19 17:14:48 +0000
+++ b/sql/sql_help.cc 2010-05-26 20:18:18 +0000
@@ -628,7 +628,7 @@ bool mysqld_help(THD *thd, const char *m
Protocol *protocol= thd->protocol;
SQL_SELECT *select;
st_find_field used_fields[array_elements(init_used_fields)];
- TABLE_LIST *leaves= 0;
+ List<TABLE_LIST> leaves;
TABLE_LIST tables[4];
List<String> topics_list, categories_list, subcategories_list;
String name, description, example;
@@ -667,7 +667,7 @@ bool mysqld_help(THD *thd, const char *m
thd->lex->select_lex.context.first_name_resolution_table= &tables[0];
if (setup_tables(thd, &thd->lex->select_lex.context,
&thd->lex->select_lex.top_join_list,
- tables, &leaves, FALSE))
+ tables, leaves, FALSE, FALSE))
goto error;
memcpy((char*) used_fields, (char*) init_used_fields, sizeof(used_fields));
if (init_fields(thd, tables, used_fields, array_elements(used_fields)))
=== modified file 'sql/sql_insert.cc'
--- a/sql/sql_insert.cc 2010-03-15 11:51:23 +0000
+++ b/sql/sql_insert.cc 2010-05-26 20:18:18 +0000
@@ -124,7 +124,7 @@ bool check_view_single_update(List<Item>
{
it.init(*values);
while ((item= it++))
- tables|= item->used_tables();
+ tables|= item->view_used_tables(view);
}
/* Convert to real table bits */
@@ -140,6 +140,11 @@ bool check_view_single_update(List<Item>
if (view->check_single_table(&tbl, tables, view) || tbl == 0)
goto error;
+ /*
+ A buffer for the insert values was allocated for the merged view.
+ Use it.
+ */
+ //tbl->table->insert_values= view->table->insert_values;
view->table= tbl->table;
*map= tables;
@@ -243,6 +248,10 @@ static int check_insert_fields(THD *thd,
*/
table_list->next_local= 0;
context->resolve_in_table_list_only(table_list);
+ /* 'Unfix' fields to allow correct marking by the setup_fields function. */
+ if (table_list->is_view())
+ unfix_fields(fields);
+
res= setup_fields(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0);
/* Restore the current context. */
@@ -252,7 +261,7 @@ static int check_insert_fields(THD *thd,
if (res)
return -1;
- if (table_list->effective_algorithm == VIEW_ALGORITHM_MERGE)
+ if (table_list->is_view() && table_list->is_merged_derived())
{
if (check_view_single_update(fields,
fields_and_values_from_different_maps ?
@@ -341,7 +350,8 @@ static int check_update_fields(THD *thd,
if (setup_fields(thd, 0, update_fields, MARK_COLUMNS_WRITE, 0, 0))
return -1;
- if (insert_table_list->effective_algorithm == VIEW_ALGORITHM_MERGE &&
+ if (insert_table_list->is_view() &&
+ insert_table_list->is_merged_derived() &&
check_view_single_update(update_fields, &update_values,
insert_table_list, map))
return -1;
@@ -641,6 +651,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *t
table_list->table_name);
DBUG_RETURN(TRUE);
}
+ /*
+ mark the table_list as a target for insert, to skip the DT/view prepare phase
+ for correct access rights checks
+ TODO: remove this hack
+ */
+ table_list->skip_prepare_derived= TRUE;
if (table_list->lock_type == TL_WRITE_DELAYED)
{
@@ -652,6 +668,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *t
if (open_and_lock_tables(thd, table_list))
DBUG_RETURN(TRUE);
}
+
lock_type= table_list->lock_type;
thd_proc_info(thd, "init");
@@ -1010,6 +1027,12 @@ bool mysql_insert(THD *thd,TABLE_LIST *t
::my_ok(thd, (ulong) thd->row_count_func, id, buff);
}
thd->abort_on_warning= 0;
+ if (thd->lex->current_select->first_cond_optimization)
+ {
+ thd->lex->current_select->save_leaf_tables(thd);
+ thd->lex->current_select->first_cond_optimization= 0;
+ }
+
DBUG_RETURN(FALSE);
abort:
@@ -1138,6 +1161,11 @@ static bool mysql_prepare_insert_check_t
bool insert_into_view= (table_list->view != 0);
DBUG_ENTER("mysql_prepare_insert_check_table");
+ if (!table_list->updatable)
+ {
+ my_error(ER_NON_INSERTABLE_TABLE, MYF(0), table_list->alias, "INSERT");
+ DBUG_RETURN(TRUE);
+ }
/*
first table in list is the one we'll INSERT into, requires INSERT_ACL.
all others require SELECT_ACL only. the ACL requirement below is for
@@ -1148,14 +1176,16 @@ static bool mysql_prepare_insert_check_t
if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
&thd->lex->select_lex.top_join_list,
table_list,
- &thd->lex->select_lex.leaf_tables,
- select_insert, INSERT_ACL, SELECT_ACL))
+ thd->lex->select_lex.leaf_tables,
+ select_insert, INSERT_ACL, SELECT_ACL,
+ TRUE))
DBUG_RETURN(TRUE);
if (insert_into_view && !fields.elements)
{
thd->lex->empty_field_list_on_rset= 1;
- if (!table_list->table)
+ if (!thd->lex->select_lex.leaf_tables.head()->table ||
+ table_list->is_multitable())
{
my_error(ER_VIEW_NO_INSERT_FIELD_LIST, MYF(0),
table_list->view_db.str, table_list->view_name.str);
@@ -1246,6 +1276,12 @@ bool mysql_prepare_insert(THD *thd, TABL
/* INSERT should have a SELECT or VALUES clause */
DBUG_ASSERT (!select_insert || !values);
+ if (mysql_handle_derived(thd->lex, DT_INIT))
+ DBUG_RETURN(TRUE);
+ if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+ DBUG_RETURN(TRUE);
+ if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
+ DBUG_RETURN(TRUE);
/*
For subqueries in VALUES() we should not see the table in which we are
inserting (for INSERT ... SELECT this is done by changing table_list,
@@ -2913,9 +2949,9 @@ bool mysql_insert_select_prepare(THD *th
{
LEX *lex= thd->lex;
SELECT_LEX *select_lex= &lex->select_lex;
- TABLE_LIST *first_select_leaf_table;
DBUG_ENTER("mysql_insert_select_prepare");
+
/*
Statement-based replication of INSERT ... SELECT ... LIMIT is not safe
as order of rows is not defined, so in mixed mode we go to row-based.
@@ -2941,21 +2977,37 @@ bool mysql_insert_select_prepare(THD *th
&select_lex->where, TRUE, FALSE, FALSE))
DBUG_RETURN(TRUE);
+ DBUG_ASSERT(select_lex->leaf_tables.elements != 0);
+ List_iterator<TABLE_LIST> ti(select_lex->leaf_tables);
+ TABLE_LIST *table;
+ uint insert_tables;
+
+ if (select_lex->first_cond_optimization)
+ {
+ /* Back up leaf_tables list. */
+ Query_arena *arena= thd->stmt_arena, backup;
+ arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test
+
+ insert_tables= select_lex->insert_tables;
+ while ((table= ti++) && insert_tables--)
+ {
+ select_lex->leaf_tables_exec.push_back(table);
+ table->tablenr_exec= table->table->tablenr;
+ table->map_exec= table->table->map;
+ }
+ if (arena)
+ thd->restore_active_arena(arena, &backup);
+ }
+ ti.rewind();
/*
exclude first table from leaf tables list, because it belong to
INSERT
*/
- DBUG_ASSERT(select_lex->leaf_tables != 0);
- lex->leaf_tables_insert= select_lex->leaf_tables;
/* skip all leaf tables belonged to view where we are insert */
- for (first_select_leaf_table= select_lex->leaf_tables->next_leaf;
- first_select_leaf_table &&
- first_select_leaf_table->belong_to_view &&
- first_select_leaf_table->belong_to_view ==
- lex->leaf_tables_insert->belong_to_view;
- first_select_leaf_table= first_select_leaf_table->next_leaf)
- {}
- select_lex->leaf_tables= first_select_leaf_table;
+ insert_tables= select_lex->insert_tables;
+ while ((table= ti++) && insert_tables--)
+ ti.remove();
+
DBUG_RETURN(FALSE);
}
@@ -3169,7 +3221,7 @@ void select_insert::cleanup()
select_insert::~select_insert()
{
DBUG_ENTER("~select_insert");
- if (table)
+ if (table && table->created)
{
table->next_number_field=0;
table->auto_increment_field_not_null= FALSE;
=== modified file 'sql/sql_join_cache.cc'
--- a/sql/sql_join_cache.cc 2010-03-07 15:41:45 +0000
+++ b/sql/sql_join_cache.cc 2010-05-26 20:18:18 +0000
@@ -2370,6 +2370,8 @@ JOIN_CACHE_BKA::init_join_matching_recor
init_mrr_buff();
+ if (!join_tab->preread_init_done && join_tab->preread_init())
+ return NESTED_LOOP_ERROR;
/*
Prepare to iterate over keys from the join buffer and to get
matching candidates obtained with MMR handler functions.
=== modified file 'sql/sql_lex.cc'
--- a/sql/sql_lex.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_lex.cc 2010-05-26 20:18:18 +0000
@@ -23,6 +23,7 @@
#include <hash.h>
#include "sp.h"
#include "sp_head.h"
+#include "sql_select.h"
/*
We are using pointer to this variable for distinguishing between assignment
@@ -317,7 +318,6 @@ void lex_start(THD *thd)
lex->derived_tables= 0;
lex->lock_option= TL_READ;
lex->safe_to_cache_query= 1;
- lex->leaf_tables_insert= 0;
lex->parsing_options.reset();
lex->empty_field_list_on_rset= 0;
lex->select_lex.select_number= 1;
@@ -1590,6 +1590,7 @@ void st_select_lex_unit::init_query()
item_list.empty();
describe= 0;
found_rows_for_union= 0;
+ derived= 0;
}
void st_select_lex::init_query()
@@ -1598,7 +1599,8 @@ void st_select_lex::init_query()
table_list.empty();
top_join_list.empty();
join_list= &top_join_list;
- embedding= leaf_tables= 0;
+ embedding= 0;
+ leaf_tables.empty();
item_list.empty();
join= 0;
having= prep_having= where= prep_where= 0;
@@ -2060,9 +2062,27 @@ void st_select_lex::print_order(String *
{
if (order->counter_used)
{
- char buffer[20];
- size_t length= my_snprintf(buffer, 20, "%d", order->counter);
- str->append(buffer, (uint) length);
+ if (query_type != QT_VIEW_INTERNAL)
+ {
+ char buffer[20];
+ size_t length= my_snprintf(buffer, 20, "%d", order->counter);
+ str->append(buffer, (uint) length);
+ }
+ else
+ {
+ /* replace numeric reference with expression */
+ if (order->item[0]->type() == Item::INT_ITEM &&
+ order->item[0]->basic_const_item())
+ {
+ char buffer[20];
+ size_t length= my_snprintf(buffer, 20, "%d", order->counter);
+ str->append(buffer, (uint) length);
+ /* make it expression instead of integer constant */
+ str->append(STRING_WITH_LEN("+0"));
+ }
+ else
+ (*order->item)->print(str, query_type);
+ }
}
else
(*order->item)->print(str, query_type);
@@ -2264,22 +2284,6 @@ bool st_lex::can_be_merged()
/* find non VIEW subqueries/unions */
bool selects_allow_merge= select_lex.next_select() == 0;
- if (selects_allow_merge)
- {
- for (SELECT_LEX_UNIT *tmp_unit= select_lex.first_inner_unit();
- tmp_unit;
- tmp_unit= tmp_unit->next_unit())
- {
- if (tmp_unit->first_select()->parent_lex == this &&
- (tmp_unit->item == 0 ||
- (tmp_unit->item->place() != IN_WHERE &&
- tmp_unit->item->place() != IN_ON)))
- {
- selects_allow_merge= 0;
- break;
- }
- }
- }
return (selects_allow_merge &&
select_lex.group_list.elements == 0 &&
@@ -2909,7 +2913,11 @@ static void fix_prepare_info_in_table_li
tbl->prep_on_expr= tbl->on_expr;
tbl->on_expr= tbl->on_expr->copy_andor_structure(thd);
}
- fix_prepare_info_in_table_list(thd, tbl->merge_underlying_list);
+ if (tbl->is_view_or_derived() && tbl->is_merged_derived())
+ {
+ SELECT_LEX *sel= tbl->get_single_select();
+ fix_prepare_info_in_table_list(thd, sel->get_table_list());
+ }
}
}
@@ -3024,6 +3032,384 @@ bool st_select_lex::add_index_hint (THD
str, length));
}
+
+/**
+ @brief Process all derived tables/views of the SELECT.
+
+ @param lex LEX of this thread
+ @param phase phases to run derived tables/views through
+
+ @details
+ This function runs specified 'phases' on all tables from the
+ table_list of this select.
+
+ @return FALSE ok.
+ @return TRUE an error occur.
+*/
+
+bool st_select_lex::handle_derived(struct st_lex *lex, uint phases)
+{
+ for (TABLE_LIST *cursor= (TABLE_LIST*) table_list.first;
+ cursor;
+ cursor= cursor->next_local)
+ {
+ if (cursor->is_view_or_derived() && cursor->handle_derived(lex, phases))
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+/**
+ @brief
+ Returns first unoccupied table map and table number
+
+ @param map [out] return found map
+ @param tablenr [out] return found tablenr
+
+ @details
+ Returns first unoccupied table map and table number in this select.
+ Map and table are returned in *'map' and *'tablenr' accordingly.
+
+ @retrun TRUE no free table map/table number
+ @return FALSE found free table map/table number
+*/
+
+bool st_select_lex::get_free_table_map(table_map *map, uint *tablenr)
+{
+ *map= 0;
+ *tablenr= 0;
+ TABLE_LIST *tl;
+ if (!join)
+ {
+ (*map)= 1<<1;
+ (*tablenr)++;
+ return FALSE;
+ }
+ List_iterator<TABLE_LIST> ti(leaf_tables);
+ while ((tl= ti++))
+ {
+ if (tl->table->map > *map)
+ *map= tl->table->map;
+ if (tl->table->tablenr > *tablenr)
+ *tablenr= tl->table->tablenr;
+ }
+ (*map)<<= 1;
+ (*tablenr)++;
+ if (*tablenr >= MAX_TABLES)
+ return TRUE;
+ return FALSE;
+}
+
+
+/**
+ @brief
+ Append given table to the leaf_tables list.
+
+ @param link Offset to which list in table structure to use
+ @param table Table to append
+
+ @details
+ Append given 'table' to the leaf_tables list using the 'link' offset.
+ If the 'table' is linked with other tables through next_leaf/next_local
+ chains then whole list will be appended.
+*/
+
+void st_select_lex::append_table_to_list(TABLE_LIST *TABLE_LIST::*link,
+ TABLE_LIST *table)
+{
+ TABLE_LIST *tl;
+ for (tl= leaf_tables.head(); tl->*link; tl= tl->*link);
+ tl->*link= table;
+}
+
+/*
+ @brief
+ Remove given table from the leaf_tables list.
+
+ @param link Offset to which list in table structure to use
+ @param table Table to remove
+
+ @details
+ Remove 'table' from the leaf_tables list using the 'link' offset.
+*/
+
+void st_select_lex::remove_table_from_list(TABLE_LIST *table)
+{
+ TABLE_LIST *tl;
+ List_iterator<TABLE_LIST> ti(leaf_tables);
+ while ((tl= ti++))
+ {
+ if (tl == table)
+ {
+ ti.remove();
+ break;
+ }
+ }
+}
+
+
+/**
+ @brief
+ Assigns new table maps to tables in the leaf_tables list
+
+ @param derived Derived table to take initial table map from
+ @param map table map to begin with
+ @param tablenr table number to begin with
+ @param parent_lex new parent select_lex
+
+ @details
+ Assign new table maps/table numbers to all tables in the leaf_tables list.
+ 'map'/'tablenr' are used for the first table and shifted to left/
+ increased for each consequent table in the leaf_tables list.
+ If the 'derived' table is given then it's table map/number is used for the
+ first table in the list and 'map'/'tablenr' are used for the second and
+ all consequent tables.
+ The 'parent_lex' is set as the new parent select_lex for all tables in the
+ list.
+*/
+
+void st_select_lex::remap_tables(TABLE_LIST *derived, table_map map,
+ uint tablenr, SELECT_LEX *parent_lex)
+{
+ bool first_table= TRUE;
+ TABLE_LIST *tl;
+ table_map first_map;
+ uint first_tablenr;
+
+ if (derived && derived->table)
+ {
+ first_map= derived->table->map;
+ first_tablenr= derived->table->tablenr;
+ }
+ else
+ {
+ first_map= map;
+ map<<= 1;
+ first_tablenr= tablenr++;
+ }
+ /*
+ Assign table bit/table number.
+ To the first table of the subselect the table bit/tablenr of the
+ derived table is assigned. The rest of tables are getting bits
+ sequentially, starting from the provided table map/tablenr.
+ */
+ List_iterator<TABLE_LIST> ti(leaf_tables);
+ while ((tl= ti++))
+ {
+ if (first_table)
+ {
+ first_table= FALSE;
+ tl->table->set_table_map(first_map, first_tablenr);
+ }
+ else
+ {
+ tl->table->set_table_map(map, tablenr);
+ tablenr++;
+ map<<= 1;
+ }
+ SELECT_LEX *old_sl= tl->select_lex;
+ tl->select_lex= parent_lex;
+ for(TABLE_LIST *emb= tl->embedding;
+ emb && emb->select_lex == old_sl;
+ emb= emb->embedding)
+ emb->select_lex= parent_lex;
+ }
+}
+
+/**
+ @brief
+ Merge a subquery into this select.
+
+ @param derived derived table of the subquery to be merged
+ @param subq_select select_lex of the subquery
+ @param map table map for assigning to merged tables from subquery
+ @param table_no table number for assigning to merged tables from subquery
+
+ @details
+ This function merges a subquery into its parent select. In short the
+ merge operation appends the subquery FROM table list to the parent's
+ FROM table list. In more details:
+ .) the top_join_list of the subquery is wrapped into a join_nest
+ and attached to 'derived'
+ .) subquery's leaf_tables list is merged with the leaf_tables
+ list of this select_lex
+ .) the table maps and table numbers of the tables merged from
+ the subquery are adjusted to reflect their new binding to
+ this select
+
+ @return TRUE an error occur
+ @return FALSE ok
+*/
+
+bool SELECT_LEX::merge_subquery(TABLE_LIST *derived, SELECT_LEX *subq_select,
+ uint table_no, table_map map)
+{
+ derived->wrap_into_nested_join(subq_select->top_join_list);
+ /* Reconnect the next_leaf chain. */
+ leaf_tables.concat(&subq_select->leaf_tables);
+
+ ftfunc_list->concat(subq_select->ftfunc_list);
+ if (join)
+ {
+ Item_in_subselect **in_subq;
+ Item_in_subselect **in_subq_end;
+ for (in_subq= subq_select->join->sj_subselects.front(),
+ in_subq_end= subq_select->join->sj_subselects.back();
+ in_subq != in_subq_end;
+ in_subq++)
+ {
+ join->sj_subselects.append(join->thd->mem_root, *in_subq);
+ (*in_subq)->emb_on_expr_nest= derived;
+ }
+ }
+ /*
+ Remove merged table from chain.
+ When merge_subquery is called at a subquery-to-semijoin transformation
+ the derived isn't in the leaf_tables list, so in this case the call of
+ remove_table_from_list does not cause any actions.
+ */
+ remove_table_from_list(derived);
+
+ /* Walk through child's tables and adjust table map, tablenr,
+ * parent_lex */
+ subq_select->remap_tables(derived, map, table_no, this);
+ return FALSE;
+}
+
+
+/**
+ @brief
+ Mark tables from the leaf_tables list as belong to a derived table.
+
+ @param derived tables will be marked as belonging to this derived
+
+ @details
+ Run through the leaf_list and mark all tables as belonging to the 'derived'.
+*/
+
+void SELECT_LEX::mark_as_belong_to_derived(TABLE_LIST *derived)
+{
+ /* Mark tables as belonging to this DT */
+ TABLE_LIST *tl;
+ List_iterator<TABLE_LIST> ti(leaf_tables);
+ while ((tl= ti++))
+ {
+ tl->skip_temporary= 1;
+ tl->belong_to_derived= derived;
+ }
+}
+
+
+/**
+ @brief
+ Update used_tables cache for this select
+
+ @details
+ This function updates used_tables cache of ON expressions of all tables
+ in the leaf_tables list and of the conds expression (if any).
+*/
+
+void SELECT_LEX::update_used_tables()
+{
+ TABLE_LIST *tl;
+ List_iterator<TABLE_LIST> ti(leaf_tables);
+ while ((tl= ti++))
+ {
+ if (tl->on_expr)
+ {
+ tl->on_expr->update_used_tables();
+ tl->on_expr->walk(&Item::eval_not_null_tables, 0, NULL);
+ }
+ TABLE_LIST *embedding= tl->embedding;
+ while (embedding)
+ {
+ if (embedding->on_expr &&
+ embedding->nested_join->join_list.head() == tl)
+ {
+ embedding->on_expr->update_used_tables();
+ embedding->on_expr->walk(&Item::eval_not_null_tables, 0, NULL);
+ }
+ tl= embedding;
+ embedding= tl->embedding;
+ }
+ }
+ if (join->conds)
+ {
+ join->conds->update_used_tables();
+ join->conds->walk(&Item::eval_not_null_tables, 0, NULL);
+ }
+}
+
+/**
+ @brief
+ Increase estimated number of records for a derived table/view
+
+ @param records number of records to increase estimate by
+
+ @details
+ This function increases estimated number of records by the 'records'
+ for the derived table to which this select belongs to.
+*/
+
+void SELECT_LEX::increase_derived_records(uint records)
+{
+ SELECT_LEX_UNIT *unit= master_unit();
+ DBUG_ASSERT(unit->derived);
+
+ select_union *result= (select_union*)unit->result;
+ result->records+= records;
+}
+
+
+/**
+ @brief
+ Mark select's derived table as a const one.
+
+ @param empty Whether select has an empty result set
+
+ @details
+ Mark derived table/view of this select as a constant one (to
+ materialize it at the optimization phase) unless this select belongs to a
+ union. Estimated number of rows is incremented if this select has non empty
+ result set.
+*/
+
+void SELECT_LEX::mark_const_derived(bool empty)
+{
+ TABLE_LIST *derived= master_unit()->derived;
+ if (!join->thd->lex->describe && derived)
+ {
+ if (!empty)
+ increase_derived_records(1);
+ if (!master_unit()->is_union() && !derived->is_merged_derived())
+ derived->fill_me= TRUE;
+ }
+}
+
+bool st_select_lex::save_leaf_tables(THD *thd)
+{
+ Query_arena *arena= thd->stmt_arena, backup;
+ if (arena->is_conventional())
+ arena= 0;
+ else
+ thd->set_n_backup_active_arena(arena, &backup);
+
+ List_iterator_fast<TABLE_LIST> li(leaf_tables);
+ TABLE_LIST *table;
+ while ((table= li++))
+ {
+ if (leaf_tables_exec.push_back(table))
+ return 1;
+ table->tablenr_exec= table->table->tablenr;
+ table->map_exec= table->table->map;
+ }
+ if (arena)
+ thd->restore_active_arena(arena, &backup);
+
+ return 0;
+}
+
/**
A routine used by the parser to decide whether we are specifying a full
partitioning or if only partitions to add or to split.
=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h 2010-03-20 12:01:47 +0000
+++ b/sql/sql_lex.h 2010-05-26 20:18:18 +0000
@@ -469,6 +469,11 @@ public:
friend bool mysql_new_select(struct st_lex *lex, bool move_down);
friend bool mysql_make_view(THD *thd, File_parser *parser,
TABLE_LIST *table, uint flags);
+ friend bool mysql_derived_prepare(THD *thd, st_lex *lex,
+ TABLE_LIST *orig_table_list);
+ friend bool mysql_derived_merge(THD *thd, st_lex *lex,
+ TABLE_LIST *orig_table_list);
+ friend bool TABLE_LIST::init_derived(THD *thd, bool init_view);
private:
void fast_exclude();
};
@@ -487,13 +492,12 @@ class st_select_lex_unit: public st_sele
protected:
TABLE_LIST result_table_list;
select_union *union_result;
- TABLE *table; /* temporary table using for appending UNION results */
-
- select_result *result;
ulonglong found_rows_for_union;
bool saved_error;
public:
+ TABLE *table; /* temporary table using for appending UNION results */
+ select_result *result;
bool prepared, // prepare phase already performed for UNION (unit)
optimized, // optimize phase already performed for UNION (unit)
executed, // already executed
@@ -520,6 +524,11 @@ public:
ha_rows select_limit_cnt, offset_limit_cnt;
/* not NULL if unit used in subselect, point to subselect item */
Item_subselect *item;
+ /*
+ TABLE_LIST representing this union in the embedding select. Used for
+ derived tables/views handling.
+ */
+ TABLE_LIST *derived;
/* thread handler */
THD *thd;
/*
@@ -549,6 +558,7 @@ public:
/* UNION methods */
bool prepare(THD *thd, select_result *result, ulong additional_options);
+ bool optimize();
bool exec();
bool cleanup();
inline void unclean() { cleaned= 0; }
@@ -610,8 +620,15 @@ public:
Beginning of the list of leaves in a FROM clause, where the leaves
inlcude all base tables including view tables. The tables are connected
by TABLE_LIST::next_leaf, so leaf_tables points to the left-most leaf.
- */
- TABLE_LIST *leaf_tables;
+
+ List of all base tables local to a subquery including all view
+ tables. Unlike 'next_local', this in this list views are *not*
+ leaves. Created in setup_tables() -> make_leaves_list().
+ */
+ List<TABLE_LIST> leaf_tables;
+ List<TABLE_LIST> leaf_tables_exec;
+ uint insert_tables;
+
const char *type; /* type of select for EXPLAIN */
SQL_LIST order_list; /* ORDER clause */
@@ -832,6 +849,28 @@ public:
void clear_index_hints(void) { index_hints= NULL; }
bool is_part_of_union() { return master_unit()->is_union(); }
+ bool handle_derived(struct st_lex *lex, uint phases);
+ void append_table_to_list(TABLE_LIST *TABLE_LIST::*link, TABLE_LIST *table);
+ bool get_free_table_map(table_map *map, uint *tablenr);
+ void remove_table_from_list(TABLE_LIST *table);
+ void remap_tables(TABLE_LIST *derived, table_map map,
+ uint tablenr, st_select_lex *parent_lex);
+ bool merge_subquery(TABLE_LIST *derived, st_select_lex *subq_lex,
+ uint tablenr, table_map map);
+ inline bool is_mergeable()
+ {
+ return (next_select() == 0 && group_list.elements == 0 &&
+ having == 0 && with_sum_func == 0 &&
+ table_list.elements >= 1 && !(options & SELECT_DISTINCT) &&
+ select_limit == 0);
+ }
+ void mark_as_belong_to_derived(TABLE_LIST *derived);
+ void increase_derived_records(uint records);
+ void update_used_tables();
+ void mark_const_derived(bool empty);
+
+ bool save_leaf_tables(THD *thd);
+
private:
/* current index hint kind. used in filling up index_hints */
enum index_hint_type current_index_hint_type;
@@ -1556,8 +1595,6 @@ typedef struct st_lex : public Query_tab
CHARSET_INFO *charset;
bool text_string_is_7bit;
- /* store original leaf_tables for INSERT SELECT and PS/SP */
- TABLE_LIST *leaf_tables_insert;
/** SELECT of CREATE VIEW statement */
LEX_STRING create_view_select;
@@ -1673,7 +1710,7 @@ typedef struct st_lex : public Query_tab
DERIVED_SUBQUERY and DERIVED_VIEW).
*/
uint8 derived_tables;
- uint8 create_view_algorithm;
+ uint16 create_view_algorithm;
uint8 create_view_check;
bool drop_if_exists, drop_temporary, local_file, one_shot_set;
bool autocommit;
@@ -1836,6 +1873,8 @@ typedef struct st_lex : public Query_tab
switch (sql_command) {
case SQLCOM_UPDATE:
case SQLCOM_UPDATE_MULTI:
+ case SQLCOM_DELETE:
+ case SQLCOM_DELETE_MULTI:
case SQLCOM_INSERT:
case SQLCOM_INSERT_SELECT:
case SQLCOM_REPLACE:
=== modified file 'sql/sql_list.h'
--- a/sql/sql_list.h 2009-09-15 10:46:35 +0000
+++ b/sql/sql_list.h 2010-05-26 20:18:18 +0000
@@ -168,6 +168,11 @@ public:
{
if (!list->is_empty())
{
+ if (is_empty())
+ {
+ *this= *list;
+ return;
+ }
*last= list->first;
last= list->last;
elements+= list->elements;
@@ -188,11 +193,13 @@ public:
list_node *node= first;
list_node *list_first= list->first;
elements=0;
- while (node && node != list_first)
+ while (node != list_first)
{
prev= &node->next;
node= node->next;
elements++;
+ if (node == &end_of_list)
+ return;
}
*prev= *last;
last= prev;
=== modified file 'sql/sql_load.cc'
--- a/sql/sql_load.cc 2010-03-04 08:03:07 +0000
+++ b/sql/sql_load.cc 2010-05-26 20:18:18 +0000
@@ -164,12 +164,15 @@ int mysql_load(THD *thd,sql_exchange *ex
if (open_and_lock_tables(thd, table_list))
DBUG_RETURN(TRUE);
+ if (mysql_handle_single_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) ||
+ mysql_handle_single_derived(thd->lex, table_list, DT_PREPARE))
+ DBUG_RETURN(TRUE);
if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context,
&thd->lex->select_lex.top_join_list,
table_list,
- &thd->lex->select_lex.leaf_tables, FALSE,
+ thd->lex->select_lex.leaf_tables, FALSE,
INSERT_ACL | UPDATE_ACL,
- INSERT_ACL | UPDATE_ACL))
+ INSERT_ACL | UPDATE_ACL, FALSE))
DBUG_RETURN(-1);
if (!table_list->table || // do not suport join view
!table_list->updatable || // and derived tables
=== modified file 'sql/sql_olap.cc'
--- a/sql/sql_olap.cc 2007-05-10 09:59:39 +0000
+++ b/sql/sql_olap.cc 2010-05-26 20:18:18 +0000
@@ -154,7 +154,7 @@ int handle_olaps(LEX *lex, SELECT_LEX *s
if (setup_tables(lex->thd, &select_lex->context, &select_lex->top_join_list,
(TABLE_LIST *)select_lex->table_list.first
- &select_lex->leaf_tables, FALSE) ||
+ FALSE, FALSE) ||
setup_fields(lex->thd, 0, select_lex->item_list, MARK_COLUMNS_READ,
&all_fields,1) ||
setup_fields(lex->thd, 0, item_list_copy, MARK_COLUMNS_READ,
=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_parse.cc 2010-05-26 20:18:18 +0000
@@ -458,7 +458,7 @@ static void handle_bootstrap_impl(THD *t
thd->init_for_queries();
while (fgets(buff, thd->net.max_packet, file))
{
- char *query;
+ char *query, *res;
/* strlen() can't be deleted because fgets() doesn't return length */
ulong length= (ulong) strlen(buff);
while (buff[length-1] != '\n' && !feof(file))
@@ -2769,6 +2769,9 @@ mysql_execute_command(THD *thd)
}
}
}
+ if (mysql_handle_single_derived(thd->lex, create_table,
+ DT_MERGE_FOR_INSERT))
+ DBUG_RETURN(1);
/*
select_create is currently not re-execution friendly and
@@ -3300,6 +3303,10 @@ end_with_restore_list:
if (!(res= open_and_lock_tables(thd, all_tables)))
{
+ /*
+ Only the INSERT table should be merged. Other will be handled by
+ select.
+ */
/* Skip first table, which is the table we are inserting in */
TABLE_LIST *second_table= first_table->next_local;
select_lex->table_list.first= (uchar*) second_table;
@@ -5183,6 +5190,8 @@ bool check_single_table_access(THD *thd,
/* Show only 1 table for check_grant */
if (!(all_tables->belong_to_view &&
(thd->lex->sql_command == SQLCOM_SHOW_FIELDS)) &&
+ !(all_tables->is_view() &&
+ all_tables->is_merged_derived()) &&
check_grant(thd, privilege, all_tables, 0, 1, no_errors))
goto deny;
=== modified file 'sql/sql_prepare.cc'
--- a/sql/sql_prepare.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_prepare.cc 2010-05-26 20:18:18 +0000
@@ -1133,7 +1133,7 @@ static bool mysql_test_insert(Prepared_s
If we would use locks, then we have to ensure we are not using
TL_WRITE_DELAYED as having two such locks can cause table corruption.
*/
- if (open_normal_and_derived_tables(thd, table_list, 0))
+ if (open_normal_and_derived_tables(thd, table_list, 0, DT_INIT))
goto error;
if ((values= its++))
@@ -1217,7 +1217,10 @@ static int mysql_test_update(Prepared_st
open_tables(thd, &table_list, &table_count, 0))
goto error;
- if (table_list->multitable_view)
+ if (mysql_handle_derived(thd->lex, DT_INIT))
+ goto error;
+
+ if (table_list->is_multitable())
{
DBUG_ASSERT(table_list->view != 0);
DBUG_PRINT("info", ("Switch to multi-update"));
@@ -1231,8 +1234,15 @@ static int mysql_test_update(Prepared_st
thd->fill_derived_tables() is false here for sure (because it is
preparation of PS, so we even do not check it).
*/
- if (mysql_handle_derived(thd->lex, &mysql_derived_prepare))
+ if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT) ||
+ table_list->handle_derived(thd->lex, DT_PREPARE))
+ goto error;
+
+ if (!table_list->updatable)
+ {
+ my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
goto error;
+ }
#ifndef NO_EMBEDDED_ACCESS_CHECKS
/* Force privilege re-checking for views after they have been opened. */
@@ -1286,12 +1296,18 @@ error:
static bool mysql_test_delete(Prepared_statement *stmt,
TABLE_LIST *table_list)
{
+ uint table_count= 0;
THD *thd= stmt->thd;
LEX *lex= stmt->lex;
DBUG_ENTER("mysql_test_delete");
if (delete_precheck(thd, table_list) ||
- open_normal_and_derived_tables(thd, table_list, 0))
+ open_tables(thd, &table_list, &table_count, 0))
+ goto error;
+
+ if (mysql_handle_derived(thd->lex, DT_INIT) ||
+ mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) ||
+ mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
goto error;
if (!table_list->table)
@@ -1349,7 +1365,8 @@ static int mysql_test_select(Prepared_st
goto error;
}
- if (open_normal_and_derived_tables(thd, tables, 0))
+ if (open_normal_and_derived_tables(thd, tables, 0,
+ DT_PREPARE | DT_CREATE))
goto error;
thd->used_tables= 0; // Updated by setup_fields
@@ -1410,7 +1427,8 @@ static bool mysql_test_do_fields(Prepare
if (tables && check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE))
DBUG_RETURN(TRUE);
- if (open_normal_and_derived_tables(thd, tables, 0))
+ if (open_normal_and_derived_tables(thd, tables, 0,
+ DT_PREPARE | DT_CREATE))
DBUG_RETURN(TRUE);
DBUG_RETURN(setup_fields(thd, 0, *values, MARK_COLUMNS_NONE, 0, 0));
}
@@ -1440,7 +1458,8 @@ static bool mysql_test_set_fields(Prepar
if ((tables &&
check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE)) ||
- open_normal_and_derived_tables(thd, tables, 0))
+ open_normal_and_derived_tables(thd, tables, 0,
+ DT_PREPARE | DT_CREATE))
goto error;
while ((var= it++))
@@ -1477,7 +1496,7 @@ static bool mysql_test_call_fields(Prepa
if ((tables &&
check_table_access(thd, SELECT_ACL, tables, UINT_MAX, FALSE)) ||
- open_normal_and_derived_tables(thd, tables, 0))
+ open_normal_and_derived_tables(thd, tables, 0, DT_PREPARE))
goto err;
while ((item= it++))
@@ -1552,6 +1571,7 @@ select_like_stmt_test_with_open(Prepared
int (*specific_prepare)(THD *thd),
ulong setup_tables_done_option)
{
+ uint table_count= 0;
DBUG_ENTER("select_like_stmt_test_with_open");
/*
@@ -1560,7 +1580,8 @@ select_like_stmt_test_with_open(Prepared
prepared EXPLAIN yet so derived tables will clean up after
themself.
*/
- if (open_normal_and_derived_tables(stmt->thd, tables, 0))
+ THD *thd= stmt->thd;
+ if (open_tables(thd, &tables, &table_count, 0))
DBUG_RETURN(TRUE);
DBUG_RETURN(select_like_stmt_test(stmt, specific_prepare,
@@ -1605,7 +1626,8 @@ static bool mysql_test_create_table(Prep
create_table->skip_temporary= true;
}
- if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, 0))
+ if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, 0,
+ DT_PREPARE | DT_CREATE))
DBUG_RETURN(TRUE);
if (!(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE))
@@ -1623,7 +1645,8 @@ static bool mysql_test_create_table(Prep
we validate metadata of all CREATE TABLE statements,
which keeps metadata validation code simple.
*/
- if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, 0))
+ if (open_normal_and_derived_tables(stmt->thd, lex->query_tables, 0,
+ DT_PREPARE))
DBUG_RETURN(TRUE);
}
@@ -1658,7 +1681,7 @@ static bool mysql_test_create_view(Prepa
if (create_view_precheck(thd, tables, view, lex->create_view_mode))
goto err;
- if (open_normal_and_derived_tables(thd, tables, 0))
+ if (open_normal_and_derived_tables(thd, tables, 0, DT_PREPARE))
goto err;
lex->view_prepare_mode= 1;
@@ -2349,6 +2372,7 @@ void reinit_stmt_before_use(THD *thd, LE
/* Fix ORDER list */
for (order= (ORDER *)sl->order_list.first; order; order= order->next)
order->item= &order->item_ptr;
+ sl->handle_derived(lex, DT_REINIT);
/* clear the no_error flag for INSERT/UPDATE IGNORE */
sl->no_error= FALSE;
@@ -2392,9 +2416,6 @@ void reinit_stmt_before_use(THD *thd, LE
}
lex->current_select= &lex->select_lex;
- /* restore original list used in INSERT ... SELECT */
- if (lex->leaf_tables_insert)
- lex->select_lex.leaf_tables= lex->leaf_tables_insert;
if (lex->result)
{
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-03-29 20:09:40 +0000
+++ b/sql/sql_select.cc 2010-05-26 20:18:18 +0000
@@ -47,8 +47,8 @@ const char *join_type_str[]={ "UNKNOWN",
struct st_sargable_param;
static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array);
-static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds,
- DYNAMIC_ARRAY *keyuse);
+static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves,
+ COND *conds, DYNAMIC_ARRAY *keyuse);
static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
JOIN_TAB *join_tab,
uint tables, COND *conds,
@@ -99,7 +99,8 @@ static void update_depend_map(JOIN *join
static void update_depend_map(JOIN *join, ORDER *order);
static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond,
bool change_list, bool *simple_order);
-static int return_zero_rows(JOIN *join, select_result *res,TABLE_LIST *tables,
+static int return_zero_rows(JOIN *join, select_result *res,
+ List<TABLE_LIST> &tables,
List<Item> &fields, bool send_row,
ulonglong select_options, const char *info,
Item *having);
@@ -210,7 +211,7 @@ static ORDER *create_distinct_group(THD
List<Item> &all_fields,
bool *all_order_by_fields_used);
static bool test_if_subpart(ORDER *a,ORDER *b);
-static TABLE *get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables);
+static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables);
static void calc_group_buffer(JOIN *join,ORDER *group);
static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
static bool alloc_group_fields(JOIN *join,ORDER *group);
@@ -237,6 +238,7 @@ static void add_group_and_distinct_keys(
void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg,
double *record_count_arg);
static uint make_join_orderinfo(JOIN *join);
+static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array);
static int
join_read_record_no_init(JOIN_TAB *tab);
@@ -405,7 +407,7 @@ fix_inner_refs(THD *thd, List<Item> &all
*/
inline int setup_without_group(THD *thd, Item **ref_pointer_array,
TABLE_LIST *tables,
- TABLE_LIST *leaves,
+ List<TABLE_LIST> &leaves,
List<Item> &fields,
List<Item> &all_fields,
COND **conds,
@@ -483,28 +485,26 @@ JOIN::prepare(Item ***rref_pointer_array
join_list= &select_lex->top_join_list;
union_part= unit_arg->is_union();
+ if (select_lex->handle_derived(thd->lex, DT_PREPARE))
+ DBUG_RETURN(1);
+
thd->lex->current_select->is_item_list_lookup= 1;
/*
If we have already executed SELECT, then it have not sense to prevent
its table from update (see unique_table())
+ Affects only materialized derived tables.
*/
- if (thd->derived_tables_processing)
- select_lex->exclude_from_table_unique_test= TRUE;
-
/* Check that all tables, fields, conds and order are ok */
-
- if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
- setup_tables_and_check_access(thd, &select_lex->context, join_list,
- tables_list, &select_lex->leaf_tables,
- FALSE, SELECT_ACL, SELECT_ACL))
+ if (!(select_options & OPTION_SETUP_TABLES_DONE))
+ {
+ if (setup_tables_and_check_access(thd, &select_lex->context, join_list,
+ tables_list, select_lex->leaf_tables,
+ FALSE, SELECT_ACL, SELECT_ACL, FALSE))
DBUG_RETURN(-1);
-
- TABLE_LIST *table_ptr;
- for (table_ptr= select_lex->leaf_tables;
- table_ptr;
- table_ptr= table_ptr->next_leaf)
- tables++;
+ }
+ tables= select_lex->leaf_tables.elements;
+
if (setup_wild(thd, tables_list, fields_list, &all_fields, wild_num) ||
select_lex->setup_ref_array(thd, og_num) ||
setup_fields(thd, (*rref_pointer_array), fields_list, MARK_COLUMNS_READ,
@@ -605,10 +605,6 @@ JOIN::prepare(Item ***rref_pointer_array
}
}
- if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
- DBUG_RETURN(-1);
-
-
/*
Check if there are references to un-aggregated columns when computing
aggregate functions with implicit grouping (there is no GROUP BY).
@@ -720,13 +716,37 @@ JOIN::optimize()
if (optimized)
DBUG_RETURN(0);
optimized= 1;
-
thd_proc_info(thd, "optimizing");
+
+ /* Run optimize phase for all derived tables/views used in this SELECT. */
+ if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
+ DBUG_RETURN(1);
+
+ if (select_lex->first_cond_optimization)
+ {
+ //Do it only for the first execution
+ /* Merge all mergeable derived tables/views in this SELECT. */
+ if (select_lex->handle_derived(thd->lex, DT_MERGE))
+ DBUG_RETURN(TRUE);
+ tables= select_lex->leaf_tables.elements;
+ select_lex->update_used_tables();
+
+ /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+ if (convert_join_subqueries_to_semijoins(this))
+ DBUG_RETURN(1); /* purecov: inspected */
+ /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+ select_lex->update_used_tables();
+
+ /* Save this info for the next executions */
+ if (select_lex->save_leaf_tables(thd))
+ DBUG_RETURN(1);
+ }
+
+ tables= select_lex->leaf_tables.elements;
+
- /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
- if (convert_join_subqueries_to_semijoins(this))
- DBUG_RETURN(1); /* purecov: inspected */
- /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
+ if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
+ DBUG_RETURN(-1);
row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
unit->select_limit_cnt);
@@ -760,7 +780,8 @@ JOIN::optimize()
}
}
#endif
- SELECT_LEX *sel= thd->lex->current_select;
+
+ SELECT_LEX *sel= select_lex;
if (sel->first_cond_optimization)
{
/*
@@ -785,7 +806,7 @@ JOIN::optimize()
if (arena)
thd->restore_active_arena(arena, &backup);
}
-
+
conds= optimize_cond(this, conds, join_list, &cond_value);
if (thd->is_error())
{
@@ -823,7 +844,8 @@ JOIN::optimize()
#ifdef WITH_PARTITION_STORAGE_ENGINE
{
TABLE_LIST *tbl;
- for (tbl= select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
+ List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
+ while ((tbl= li++))
{
/*
If tbl->embedding!=NULL that means that this table is in the inner
@@ -930,6 +952,8 @@ JOIN::optimize()
DBUG_RETURN(1);
}
+ drop_unused_derived_keys();
+
if (rollup.state != ROLLUP::STATE_NONE)
{
if (rollup_process_const_fields())
@@ -1030,6 +1054,7 @@ JOIN::optimize()
{
zero_result_cause=
"Impossible WHERE noticed after reading const tables";
+ select_lex->mark_const_derived(zero_result_cause);
goto setup_subq_exit;
}
@@ -1348,7 +1373,7 @@ JOIN::optimize()
if (select_options & SELECT_DESCRIBE)
{
error= 0;
- DBUG_RETURN(0);
+ goto derived_exit;
}
having= 0;
@@ -1497,6 +1522,9 @@ setup_subq_exit:
if (setup_subquery_materialization())
DBUG_RETURN(1);
error= 0;
+
+derived_exit:
+ select_lex->mark_const_derived(zero_result_cause);
DBUG_RETURN(0);
}
@@ -1733,6 +1761,11 @@ JOIN::exec()
!tables ? "No tables used" : NullS);
DBUG_VOID_RETURN;
}
+ else
+ {
+ /* it's a const select, materialize it. */
+ select_lex->mark_const_derived(zero_result_cause);
+ }
JOIN *curr_join= this;
List<Item> *curr_all_fields= &all_fields;
@@ -2232,6 +2265,7 @@ JOIN::destroy()
}
tmp_join->tmp_join= 0;
tmp_table_param.cleanup();
+ tmp_join->tmp_table_param.copy_field= 0;
DBUG_RETURN(tmp_join->destroy());
}
cond_equal= 0;
@@ -2512,12 +2546,11 @@ typedef struct st_sargable_param
*/
static bool
-make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
- DYNAMIC_ARRAY *keyuse_array)
+make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
+ COND *conds, DYNAMIC_ARRAY *keyuse_array)
{
- int error;
+ int error= 0;
TABLE *table;
- TABLE_LIST *tables= tables_arg;
uint i,table_count,const_count,key;
table_map found_const_table_map, all_table_map, found_ref, refs;
key_map const_ref, eq_part;
@@ -2528,6 +2561,8 @@ make_join_statistics(JOIN *join, TABLE_L
table_map no_rows_const_tables= 0;
SARGABLE_PARAM *sargables= 0;
JOIN_TAB *stat_vector[MAX_TABLES+1];
+ List_iterator<TABLE_LIST> ti(tables_list);
+ TABLE_LIST *tables;
DBUG_ENTER("make_join_statistics");
table_count=join->tables;
@@ -2543,9 +2578,7 @@ make_join_statistics(JOIN *join, TABLE_L
found_const_table_map= all_table_map=0;
const_count=0;
- for (s= stat, i= 0;
- tables;
- s++, tables= tables->next_leaf, i++)
+ for (s= stat, i= 0; (tables= ti++); s++, i++)
{
TABLE_LIST *embedding= tables->embedding;
stat_vector[i]=s;
@@ -2555,7 +2588,7 @@ make_join_statistics(JOIN *join, TABLE_L
s->needed_reg.init();
table_vector[i]=s->table=table=tables->table;
table->pos_in_table_list= tables;
- error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+ error= tables->fetch_number_of_rows();
if (error)
{
table->file->print_error(error, MYF(0));
@@ -2626,6 +2659,7 @@ make_join_statistics(JOIN *join, TABLE_L
no_rows_const_tables |= table->map;
}
}
+
stat_vector[i]=0;
join->outer_join=outer_join;
@@ -2641,6 +2675,8 @@ make_join_statistics(JOIN *join, TABLE_L
*/
for (i= 0, s= stat ; i < table_count ; i++, s++)
{
+ if (!s->dependent)
+ continue;
for (uint j= 0 ; j < table_count ; j++)
{
table= stat[j].table;
@@ -2874,7 +2910,7 @@ make_join_statistics(JOIN *join, TABLE_L
}
/* Approximate found rows and time to read them */
s->found_records=s->records=s->table->file->stats.records;
- s->read_time=(ha_rows) s->table->file->scan_time();
+ s->scan_time();
/*
Set a max range of how many seeks we can expect when using keys
@@ -2959,17 +2995,31 @@ make_join_statistics(JOIN *join, TABLE_L
if (optimize_semijoin_nests(join, all_table_map))
DBUG_RETURN(TRUE); /* purecov: inspected */
- /* Find an optimal join order of the non-constant tables. */
- if (join->const_tables != join->tables)
- {
- if (choose_plan(join, all_table_map & ~join->const_table_map))
- goto error;
- }
- else
- {
- memcpy((uchar*) join->best_positions,(uchar*) join->positions,
- sizeof(POSITION)*join->const_tables);
- join->best_read=1.0;
+ {
+ ha_rows records= 1;
+ SELECT_LEX_UNIT *unit= join->select_lex->master_unit();
+
+ /* Find an optimal join order of the non-constant tables. */
+ if (join->const_tables != join->tables)
+ {
+ if (choose_plan(join, all_table_map & ~join->const_table_map))
+ goto error;
+ /*
+ Calculate estimated number of rows for materialized derived
+ table/view.
+ */
+ for (i= 0; i < join->tables ; i++)
+ records*= join->best_positions[i].records_read ?
+ (ha_rows)join->best_positions[i].records_read : 1;
+ }
+ else
+ {
+ memcpy((uchar*) join->best_positions,(uchar*) join->positions,
+ sizeof(POSITION)*join->const_tables);
+ join->best_read=1.0;
+ }
+ if (unit->derived && unit->derived->is_materialized_derived())
+ join->select_lex->increase_derived_records(records);
}
/* Generate an execution plan from the found optimal join order. */
DBUG_RETURN(join->thd->killed || get_best_combination(join));
@@ -2981,8 +3031,12 @@ error:
may not be assigned yet by this function (which is building join_tab).
Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke.
*/
- for (tables= tables_arg; tables; tables= tables->next_leaf)
- tables->table->reginfo.join_tab= NULL;
+ {
+ TABLE_LIST *table;
+ List_iterator<TABLE_LIST> ti(tables_list);
+ while ((table= ti++))
+ table->table->reginfo.join_tab= NULL;
+ }
DBUG_RETURN (1);
}
@@ -3245,14 +3299,20 @@ add_key_field(KEY_FIELD **key_fields,uin
Field *field, bool eq_func, Item **value, uint num_values,
table_map usable_tables, SARGABLE_PARAM **sargables)
{
- uint exists_optimize= 0;
- if (!(field->flags & PART_KEY_FLAG))
+ uint optimize= 0;
+ if (eq_func &&
+ field->table->pos_in_table_list->is_materialized_derived() &&
+ !field->table->created)
+ {
+ optimize= KEY_OPTIMIZE_EQ;
+ }
+ else if (!(field->flags & PART_KEY_FLAG))
{
// Don't remove column IS NULL on a LEFT JOIN table
if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
!field->table->maybe_null || field->null_ptr)
return; // Not a key. Skip it
- exists_optimize= KEY_OPTIMIZE_EXISTS;
+ optimize= KEY_OPTIMIZE_EXISTS;
DBUG_ASSERT(num_values == 1);
}
else
@@ -3272,12 +3332,12 @@ add_key_field(KEY_FIELD **key_fields,uin
if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
!field->table->maybe_null || field->null_ptr)
return; // Can't use left join optimize
- exists_optimize= KEY_OPTIMIZE_EXISTS;
+ optimize= KEY_OPTIMIZE_EXISTS;
}
else
{
JOIN_TAB *stat=field->table->reginfo.join_tab;
- key_map possible_keys=field->key_start;
+ key_map possible_keys=field->get_possible_keys();
possible_keys.intersect(field->table->keys_in_use_for_query);
stat[0].keys.merge(possible_keys); // Add possible keys
@@ -3371,7 +3431,7 @@ add_key_field(KEY_FIELD **key_fields,uin
(*key_fields)->eq_func= eq_func;
(*key_fields)->val= *value;
(*key_fields)->level= and_level;
- (*key_fields)->optimize= exists_optimize;
+ (*key_fields)->optimize= optimize;
/*
If the condition has form "tbl.keypart = othertbl.field" and
othertbl.field can be NULL, there will be no matches if othertbl.field
@@ -3690,6 +3750,34 @@ max_part_bit(key_part_map bits)
return found;
}
+static bool
+add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
+ uint key, uint part)
+{
+ KEYUSE keyuse;
+ Field *field= key_field->field;
+
+ keyuse.table= field->table;
+ keyuse.val= key_field->val;
+ keyuse.key= key;
+ if (key != MAX_KEY)
+ {
+ keyuse.keypart=part;
+ keyuse.keypart_map= (key_part_map) 1 << part;
+ }
+ else
+ {
+ keyuse.keypart= field->field_index;
+ keyuse.keypart_map= (key_part_map) 0;
+ }
+ keyuse.used_tables= key_field->val->used_tables();
+ keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
+ keyuse.null_rejecting= key_field->null_rejecting;
+ keyuse.cond_guard= key_field->cond_guard;
+ keyuse.sj_pred_no= key_field->sj_pred_no;
+ return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
+}
+
/*
Add all keys with uses 'field' for some keypart
If field->and_level != and_level then only mark key_part as const_part
@@ -3704,10 +3792,13 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array
{
Field *field=key_field->field;
TABLE *form= field->table;
- KEYUSE keyuse;
if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
{
+ if (key_field->eq_func && (key_field->optimize & KEY_OPTIMIZE_EQ))
+ {
+ return add_keyuse(keyuse_array, key_field, MAX_KEY, 0);
+ }
for (uint key=0 ; key < form->s->keys ; key++)
{
if (!(form->keys_in_use_for_query.is_set(key)))
@@ -3720,17 +3811,7 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array
{
if (field->eq(form->key_info[key].key_part[part].field))
{
- keyuse.table= field->table;
- keyuse.val = key_field->val;
- keyuse.key = key;
- keyuse.keypart=part;
- keyuse.keypart_map= (key_part_map) 1 << part;
- keyuse.used_tables=key_field->val->used_tables();
- keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
- keyuse.null_rejecting= key_field->null_rejecting;
- keyuse.cond_guard= key_field->cond_guard;
- keyuse.sj_pred_no= key_field->sj_pred_no;
- if (insert_dynamic(keyuse_array,(uchar*) &keyuse))
+ if (add_keyuse(keyuse_array, key_field, key, part))
return TRUE;
}
}
@@ -3815,6 +3896,9 @@ sort_keyuse(KEYUSE *a,KEYUSE *b)
return (int) (a->table->tablenr - b->table->tablenr);
if (a->key != b->key)
return (int) (a->key - b->key);
+ if (a->key == MAX_KEY && b->key == MAX_KEY &&
+ a->used_tables != b->used_tables)
+ return (int) ((ulong) a->used_tables - (ulong) b->used_tables);
if (a->keypart != b->keypart)
return (int) (a->keypart - b->keypart);
// Place const values before other ones
@@ -3965,19 +4049,21 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
if (my_init_dynamic_array(keyuse,sizeof(KEYUSE),20,64))
return TRUE;
+
if (cond)
{
+ KEY_FIELD *saved_field= field;
add_key_fields(join_tab->join, &end, &and_level, cond, normal_tables,
sargables);
for (; field != end ; field++)
{
- if (add_key_part(keyuse,field))
- return TRUE;
+
/* Mark that we can optimize LEFT JOIN */
if (field->val->type() == Item::NULL_ITEM &&
!field->field->real_maybe_null())
field->field->table->reginfo.not_exists_optimize=1;
}
+ field= saved_field;
}
for (i=0 ; i < tables ; i++)
{
@@ -4042,6 +4128,8 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
if (insert_dynamic(keyuse,(uchar*) &key_end))
return TRUE;
+ generate_derived_keys(keyuse);
+
use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end;
found_eq_constant=0;
@@ -4107,7 +4195,7 @@ static void optimize_keyuse(JOIN *join,
~OUTER_REF_TABLE_BIT)))
{
uint tablenr;
- for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ;
+ tablenr= my_count_bits(map);
if (map == 1) // Only one table
{
TABLE *tmp_table=join->all_tables[tablenr];
@@ -4714,7 +4802,7 @@ best_access_path(JOIN *join,
else
{
/* Estimate cost of reading table. */
- tmp= s->table->file->scan_time();
+ tmp= s->scan_time();
if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache
{
/*
@@ -5065,6 +5153,7 @@ optimize_straight_join(JOIN *join, table
{
JOIN_TAB *s;
uint idx= join->const_tables;
+ bool disable_jbuf= join->thd->variables.join_cache_level == 0;
double record_count= 1.0;
double read_time= 0.0;
POSITION loose_scan_pos;
@@ -5072,7 +5161,7 @@ optimize_straight_join(JOIN *join, table
for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
{
/* Find the best access method from 's' to the current partial plan */
- best_access_path(join, s, join_tables, idx, FALSE, record_count,
+ best_access_path(join, s, join_tables, idx, disable_jbuf, record_count,
join->positions + idx, &loose_scan_pos);
/* compute the cost of the new plan extended with 's' */
@@ -5452,6 +5541,7 @@ best_extension_by_limited_search(JOIN
JOIN_TAB *s;
double best_record_count= DBL_MAX;
double best_read_time= DBL_MAX;
+ bool disable_jbuf= join->thd->variables.join_cache_level == 0;
DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
"part_plan"););
@@ -5473,8 +5563,8 @@ best_extension_by_limited_search(JOIN
/* Find the best access method from 's' to the current partial plan */
POSITION loose_scan_pos;
- best_access_path(join, s, remaining_tables, idx, FALSE, record_count,
- join->positions + idx, &loose_scan_pos);
+ best_access_path(join, s, remaining_tables, idx, disable_jbuf,
+ record_count, join->positions + idx, &loose_scan_pos);
/* Compute the cost of extending the plan with 's' */
@@ -5618,6 +5708,7 @@ find_best(JOIN *join,table_map rest_tabl
JOIN_TAB *s;
double best_record_count=DBL_MAX,best_read_time=DBL_MAX;
+ bool disable_jbuf= join->thd->variables.join_cache_level == 0;
for (JOIN_TAB **pos=join->best_ref+idx ; (s=*pos) ; pos++)
{
table_map real_table_bit=s->table->map;
@@ -5626,7 +5717,7 @@ find_best(JOIN *join,table_map rest_tabl
{
double records, best;
POSITION loose_scan_pos;
- best_access_path(join, s, rest_tables, idx, FALSE, record_count,
+ best_access_path(join, s, rest_tables, idx, disable_jbuf, record_count,
join->positions + idx, &loose_scan_pos);
records= join->positions[idx].records_read;
best= join->positions[idx].read_time;
@@ -5999,8 +6090,7 @@ static bool create_ref_for_key(JOIN *joi
if (keyuse->null_rejecting)
j->ref.null_rejecting |= 1 << i;
keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
- if (!keyuse->used_tables &&
- !(join->select_options & SELECT_DESCRIBE))
+ if (!keyuse->used_tables && !thd->lex->describe)
{ // Compare against constant
store_key_item tmp(thd, keyinfo->key_part[i].field,
key_buff + maybe_null,
@@ -6411,7 +6501,7 @@ make_outerjoin_info(JOIN *join)
for ( ; embedding ; embedding= embedding->embedding)
{
/* Ignore sj-nests: */
- if (!embedding->on_expr)
+ if (!(embedding->on_expr && embedding->outer_join))
continue;
NESTED_JOIN *nested_join= embedding->nested_join;
if (!nested_join->counter)
@@ -6902,6 +6992,123 @@ make_join_select(JOIN *join,SQL_SELECT *
}
+static
+uint get_next_field_for_derived_key(uchar *arg)
+{
+ KEYUSE *keyuse= *(KEYUSE **) arg;
+ if (!keyuse)
+ return (uint) (-1);
+ uint key= keyuse->key;
+ uint fldno= keyuse->keypart;
+ uint keypart= keyuse->keypart_map == (key_part_map) 1 ?
+ 0 : (keyuse-1)->keypart+1;
+ for ( ; keyuse->key == key && keyuse->keypart == fldno; keyuse++)
+ keyuse->keypart= keypart;
+ if (keyuse->key != key)
+ keyuse= 0;
+ return fldno;
+}
+
+
+static
+bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys)
+{
+ TABLE *table= keyuse->table;
+ if (table->alloc_keys(keys))
+ return TRUE;
+ uint keyno= 0;
+ KEYUSE *first_keyuse= keyuse;
+ uint prev_part= (uint) (-1);
+ uint parts= 0;
+ uint i= 0;
+ do
+ {
+ keyuse->key= keyno;
+ keyuse->keypart_map= (key_part_map) (1 << parts);
+ keyuse++;
+ if (++i == count || keyuse->used_tables != first_keyuse->used_tables)
+ {
+ if (table->add_tmp_key(keyno, ++parts,
+ get_next_field_for_derived_key,
+ (uchar *) &first_keyuse))
+ return TRUE;
+ first_keyuse= keyuse;
+ keyno++;
+ parts= 0;
+ }
+ else if (keyuse->keypart != prev_part)
+ {
+ parts++;
+ prev_part= keyuse->keypart;
+ }
+ } while (keyno < keys);
+ return FALSE;
+}
+
+
+static
+bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array)
+{
+ KEYUSE *keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
+ uint elements= keyuse_array->elements;
+ TABLE *prev_table= 0;
+ for (uint i= 0; i < elements; i++, keyuse++)
+ {
+ KEYUSE *first_table_keyuse;
+ table_map last_used_tables;
+ uint count;
+ uint keys;
+ while (keyuse->key == MAX_KEY)
+ {
+ if (keyuse->table != prev_table)
+ {
+ prev_table= keyuse->table;
+ first_table_keyuse= keyuse;
+ last_used_tables= keyuse->used_tables;
+ count= 0;
+ keys= 0;
+ }
+ else if (keyuse->used_tables != last_used_tables)
+ {
+ keys++;
+ last_used_tables= keyuse->used_tables;
+ }
+ count++;
+ keyuse++;
+ if (keyuse->table != prev_table &&
+ generate_derived_keys_for_table(first_table_keyuse, count, ++keys))
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+/*
+ @brief
+ Drops unused keys for each materialized derived table/view
+
+ @details
+ For materialized derived tables only ref access can be used, it employs
+ only one index, thus we don't need the rest. For each materialized derived
+ table/view call TABLE::use_index to save one index chosen by the optimizer
+ and free others. No key is chosen then all keys will be dropped.
+*/
+
+void JOIN::drop_unused_derived_keys()
+{
+ for (uint i= const_tables ; i < tables ; i++)
+ {
+ JOIN_TAB *tab=join_tab+i;
+ TABLE *table=tab->table;
+ if (!table->pos_in_table_list->is_materialized_derived() ||
+ table->max_keys <= 1)
+ continue;
+ table->use_index(tab->ref.key);
+ tab->ref.key= 0;
+ }
+}
+
/*
Determine {after which table we'll produce ordered set}
@@ -7695,6 +7902,30 @@ void JOIN_TAB::cleanup()
/**
+ Initialize the join_tab before reading.
+ Currently only derived table/view materialization is done here.
+*/
+
+bool JOIN_TAB::preread_init()
+{
+ TABLE_LIST *derived= table->pos_in_table_list;
+ if (!derived || !derived->is_materialized_derived())
+ {
+ preread_init_done= TRUE;
+ return FALSE;
+ }
+
+ /* Materialize derived table/view. */
+ if (!derived->get_unit()->executed &&
+ mysql_handle_single_derived(join->thd->lex,
+ derived, DT_CREATE | DT_FILL))
+ return TRUE;
+ preread_init_done= TRUE;
+ return FALSE;
+}
+
+
+/**
Partially cleanup JOIN after it has executed: close index or rnd read
(table cursors), free quick selects.
@@ -8118,7 +8349,7 @@ remove_const(JOIN *join,ORDER *first_ord
static int
-return_zero_rows(JOIN *join, select_result *result,TABLE_LIST *tables,
+return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> &tables,
List<Item> &fields, bool send_row, ulonglong select_options,
const char *info, Item *having)
{
@@ -8134,7 +8365,9 @@ return_zero_rows(JOIN *join, select_resu
if (send_row)
{
- for (TABLE_LIST *table= tables; table; table= table->next_leaf)
+ List_iterator<TABLE_LIST> ti(tables);
+ TABLE_LIST *table;
+ while ((table= ti++))
mark_as_null_row(table->table); // All fields are NULL
if (having && having->val_int() == 0)
send_row=0;
@@ -9764,12 +9997,14 @@ simplify_joins(JOIN *join, List<TABLE_LI
{
TABLE_LIST *tbl;
List_iterator<TABLE_LIST> it(nested_join->join_list);
+ List<TABLE_LIST> repl_list;
while ((tbl= it++))
{
tbl->embedding= table->embedding;
tbl->join_list= table->join_list;
+ repl_list.push_back(tbl);
}
- li.replace(nested_join->join_list);
+ li.replace(repl_list);
/* Need to update the name resolution table chain when flattening joins */
fix_name_res= TRUE;
table= *li.ref();
@@ -10707,13 +10942,29 @@ Field *create_tmp_field(THD *thd, TABLE
If item have to be able to store NULLs but underlaid field can't do it,
create_tmp_field_from_field() can't be used for tmp field creation.
*/
- if (field->maybe_null && !field->field->maybe_null())
+ if ((field->maybe_null ||
+ (orig_item && orig_item->maybe_null)) && /* for outer joined views/dt*/
+ !field->field->maybe_null())
{
+ bool save_maybe_null;
+ /*
+ The item the ref points to may have maybe_null flag set while
+ the ref doesn't have it. This may happen for outer fields
+ when the outer query decided at some point after name resolution phase
+ that this field might be null. Take this into account here.
+ */
+ if (orig_item)
+ {
+ save_maybe_null= item->maybe_null;
+ item->maybe_null= orig_item->maybe_null;
+ }
result= create_tmp_field_from_item(thd, item, table, NULL,
modify_item, convert_blob_length);
*from_field= field->field;
if (result && modify_item)
field->result_field= result;
+ if (orig_item)
+ item->maybe_null= save_maybe_null;
}
else if (table_cant_handle_bit_fields && field->field->type() ==
MYSQL_TYPE_BIT)
@@ -10858,7 +11109,7 @@ TABLE *
create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulonglong select_options, ha_rows rows_limit,
- char *table_alias)
+ char *table_alias, bool do_not_open)
{
MEM_ROOT *mem_root_save, own_root;
TABLE *table;
@@ -11397,7 +11648,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA
share->uniques= test(using_unique_constraint);
table->key_info= table->s->key_info= keyinfo;
keyinfo->key_part=key_part_info;
- keyinfo->flags=HA_NOSAME;
+ keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts;
keyinfo->key_length=0;
keyinfo->rec_per_key=0;
@@ -11483,7 +11734,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA
bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO));
table->key_info= table->s->key_info= keyinfo;
keyinfo->key_part=key_part_info;
- keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL;
+ keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY | HA_PACK_KEY;
keyinfo->key_length= 0; // Will compute the sum of the parts below.
keyinfo->name= (char*) "distinct_key";
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
@@ -11551,15 +11802,17 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA
if (thd->is_fatal_error) // If end of memory
goto err; /* purecov: inspected */
share->db_record_offset= 1;
- if (share->db_type() == TMP_ENGINE_HTON)
+ if (!do_not_open)
{
- if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo,
- ¶m->recinfo, select_options))
+ if (share->db_type() == TMP_ENGINE_HTON)
+ {
+ if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo,
+ ¶m->recinfo, select_options))
+ goto err;
+ }
+ if (open_tmp_table(table))
goto err;
}
- if (open_tmp_table(table))
- goto err;
-
thd->mem_root= mem_root_save;
DBUG_RETURN(table);
@@ -11714,6 +11967,7 @@ bool open_tmp_table(TABLE *table)
return(1);
}
(void) table->file->extra(HA_EXTRA_QUICK); /* Faster */
+ table->created= TRUE;
return(0);
}
@@ -12022,6 +12276,7 @@ static bool create_internal_tmp_table(TA
}
status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
share->db_record_offset= 1;
+ table->created= TRUE;
DBUG_RETURN(0);
err:
DBUG_RETURN(1);
@@ -12177,7 +12432,7 @@ free_tmp_table(THD *thd, TABLE *entry)
save_proc_info=thd->proc_info;
thd_proc_info(thd, "removing tmp table");
- if (entry->file)
+ if (entry->file && entry->created)
{
if (entry->db_stat)
entry->file->ha_drop_table(entry->s->table_name.str);
@@ -12777,6 +13032,9 @@ sub_select(JOIN *join,JOIN_TAB *join_tab
do_sj_reset(join_tab->flush_weedout_table);
}
+ if (!join_tab->preread_init_done && join_tab->preread_init())
+ DBUG_RETURN(NESTED_LOOP_ERROR);
+
if (join->resume_nested_loop)
{
/* If not the last table, plunge down the nested loop */
@@ -13135,13 +13393,21 @@ static int
join_read_const_table(JOIN_TAB *tab, POSITION *pos)
{
int error;
+ TABLE_LIST *tbl;
DBUG_ENTER("join_read_const_table");
TABLE *table=tab->table;
table->const_table=1;
table->null_row=0;
table->status=STATUS_NO_RECORD;
- if (tab->type == JT_SYSTEM)
+ if (tab->table->pos_in_table_list->is_materialized_derived() &&
+ !tab->table->pos_in_table_list->fill_me)
+ {
+ //TODO: don't get here at all
+ /* Skip materialized derived tables/views. */
+ DBUG_RETURN(0);
+ }
+ else if (tab->type == JT_SYSTEM)
{
if ((error=join_read_system(tab)))
{ // Info for DESCRIBE
@@ -13203,26 +13469,27 @@ join_read_const_table(JOIN_TAB *tab, POS
if (!table->null_row)
table->maybe_null=0;
- /* Check appearance of new constant items in Item_equal objects */
- JOIN *join= tab->join;
- if (join->conds)
- update_const_equal_items(join->conds, tab);
- TABLE_LIST *tbl;
- for (tbl= join->select_lex->leaf_tables; tbl; tbl= tbl->next_leaf)
{
- TABLE_LIST *embedded;
- TABLE_LIST *embedding= tbl;
- do
+ JOIN *join= tab->join;
+ List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
+ /* Check appearance of new constant items in Item_equal objects */
+ if (join->conds)
+ update_const_equal_items(join->conds, tab);
+ while ((tbl= ti++))
{
- embedded= embedding;
- if (embedded->on_expr)
- update_const_equal_items(embedded->on_expr, tab);
- embedding= embedded->embedding;
+ TABLE_LIST *embedded;
+ TABLE_LIST *embedding= tbl;
+ do
+ {
+ embedded= embedding;
+ if (embedded->on_expr)
+ update_const_equal_items(embedded->on_expr, tab);
+ embedding= embedded->embedding;
+ }
+ while (embedding &&
+ embedding->nested_join->join_list.head() == embedded);
}
- while (embedding &&
- embedding->nested_join->join_list.head() == embedded);
}
-
DBUG_RETURN(0);
}
@@ -13576,6 +13843,9 @@ int join_init_read_record(JOIN_TAB *tab)
{
if (tab->select && tab->select->quick && tab->select->quick->reset())
return 1;
+ if (!tab->preread_init_done && tab->preread_init())
+ return 1;
+
init_read_record(&tab->read_record, tab->join->thd, tab->table,
tab->select,1,1, FALSE);
return (*tab->read_record.read_record)(&tab->read_record);
@@ -15500,6 +15770,8 @@ create_sort_index(THD *thd, JOIN *join,
get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX))
goto err;
+ if (!tab->preread_init_done && tab->preread_init())
+ goto err;
if (table->s->tmp_table)
table->file->info(HA_STATUS_VARIABLE); // Get record count
table->sort.found_records=filesort(thd, table,join->sortorder, length,
@@ -16053,7 +16325,7 @@ find_order_in_list(THD *thd, Item **ref_
order->in_field_list= 1;
order->counter= count;
order->counter_used= 1;
- return FALSE;
+ return FALSE;
}
/* Lookup the current GROUP/ORDER field in the SELECT clause. */
select_item= find_item_in_list(order_item, fields, &counter,
@@ -16494,8 +16766,10 @@ test_if_subpart(ORDER *a,ORDER *b)
*/
static TABLE *
-get_sort_by_table(ORDER *a,ORDER *b,TABLE_LIST *tables)
+get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables)
{
+ TABLE_LIST *table;
+ List_iterator<TABLE_LIST> ti(tables);
table_map map= (table_map) 0;
DBUG_ENTER("get_sort_by_table");
@@ -16513,11 +16787,11 @@ get_sort_by_table(ORDER *a,ORDER *b,TABL
if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
DBUG_RETURN(0);
- for (; !(map & tables->table->map); tables= tables->next_leaf) ;
- if (map != tables->table->map)
+ while ((table= ti++) && !(map & table->table->map));
+ if (map != table->table->map)
DBUG_RETURN(0); // More than one table
- DBUG_PRINT("exit",("sort by table: %d",tables->table->tablenr));
- DBUG_RETURN(tables->table);
+ DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr));
+ DBUG_RETURN(table->table);
}
@@ -17886,7 +18160,8 @@ static void select_describe(JOIN *join,
if (result->send_data(item_list))
join->error= 1;
}
- else
+ else if (!join->select_lex->master_unit()->derived ||
+ join->select_lex->master_unit()->derived->is_materialized_derived())
{
table_map used_tables=0;
@@ -18194,6 +18469,7 @@ static void select_describe(JOIN *join,
if (examined_rows)
f= (float) (100.0 * join->best_positions[i].records_read /
examined_rows);
+ set_if_smaller(f, 100.0);
item_list.push_back(new Item_float(f, 2));
}
}
@@ -18456,11 +18732,32 @@ bool mysql_explain_union(THD *thd, SELEC
sl;
sl= sl->next_select())
{
+ bool is_primary= FALSE;
+ if (sl->next_select())
+ is_primary= TRUE;
+
+ if (!is_primary && sl->first_inner_unit())
+ {
+ /*
+ If there is at least one materialized derived|view then it's a PRIMARY select.
+ Otherwise, all derived tables/views were merged and this select is a SIMPLE one.
+ */
+ for (SELECT_LEX_UNIT *un= sl->first_inner_unit();
+ un;
+ un= un->next_unit())
+ {
+ if ((!un->derived ||
+ un->derived->is_materialized_derived()))
+ {
+ is_primary= TRUE;
+ break;
+ }
+ }
+ }
// drop UNCACHEABLE_EXPLAIN, because it is for internal usage only
uint8 uncacheable= (sl->uncacheable & ~UNCACHEABLE_EXPLAIN);
sl->type= (((&thd->lex->select_lex)==sl)?
- (sl->first_inner_unit() || sl->next_select() ?
- "PRIMARY" : "SIMPLE"):
+ (is_primary ? "PRIMARY" : "SIMPLE"):
((sl == first)?
((sl->linkage == DERIVED_TABLE_TYPE) ?
"DERIVED":
=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h 2010-03-20 12:01:47 +0000
+++ b/sql/sql_select.h 2010-05-26 20:18:18 +0000
@@ -37,6 +37,7 @@
/* Values in optimize */
#define KEY_OPTIMIZE_EXISTS 1
#define KEY_OPTIMIZE_REF_OR_NULL 2
+#define KEY_OPTIMIZE_EQ 4
typedef struct keyuse_t {
TABLE *table;
@@ -293,6 +294,8 @@ typedef struct st_join_table {
*/
uint sj_strategy;
+ bool preread_init_done;
+
void cleanup();
inline bool is_using_loose_index_scan()
{
@@ -364,6 +367,22 @@ typedef struct st_join_table {
select->cond= new_cond;
return tmp_select_cond;
}
+ double scan_time()
+ {
+ double res;
+ if (table->created)
+ {
+ res= table->file->scan_time();
+ read_time=(ha_rows) res;
+ }
+ else
+ {
+ read_time= found_records ? found_records: 10;// TODO:fix this stub
+ res= (double)read_time;
+ }
+ return res;
+ }
+ bool preread_init();
} JOIN_TAB;
@@ -1551,6 +1570,7 @@ public:
bool union_part; ///< this subselect is part of union
bool optimized; ///< flag to avoid double optimization in EXPLAIN
+
Array<Item_in_subselect> sj_subselects;
/* Temporary tables used to weed-out semi-join duplicates */
@@ -1700,6 +1720,7 @@ public:
{
return (table_map(1) << tables) - 1;
}
+ void drop_unused_derived_keys();
/*
Return the table for which an index scan can be used to satisfy
the sort order needed by the ORDER BY/(implicit) GROUP BY clause
@@ -1744,7 +1765,7 @@ Field* create_tmp_field_from_field(THD *
/* functions from opt_sum.cc */
bool simple_pred(Item_func *func_item, Item **args, bool *inv_order);
-int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds);
+int opt_sum_query(List<TABLE_LIST> &tables, List<Item> &all_fields,COND *conds);
/* from sql_delete.cc, used by opt_range.cc */
extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b);
@@ -1964,7 +1985,7 @@ void push_index_cond(JOIN_TAB *tab, uint
TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulonglong select_options, ha_rows rows_limit,
- char* alias);
+ char* alias, bool do_not_open=FALSE);
void free_tmp_table(THD *thd, TABLE *entry);
bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
ENGINE_COLUMNDEF *start_recinfo,
=== modified file 'sql/sql_show.cc'
--- a/sql/sql_show.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_show.cc 2010-05-26 20:18:18 +0000
@@ -719,7 +719,8 @@ mysqld_show_create(THD *thd, TABLE_LIST
{
Show_create_error_handler view_error_suppressor(thd, table_list);
thd->push_internal_handler(&view_error_suppressor);
- bool error= open_normal_and_derived_tables(thd, table_list, 0);
+ bool error= open_normal_and_derived_tables(thd, table_list, 0,
+ DT_PREPARE | DT_CREATE);
thd->pop_internal_handler();
if (error && (thd->killed || thd->main_da.is_error()))
DBUG_RETURN(TRUE);
@@ -894,7 +895,8 @@ mysqld_list_fields(THD *thd, TABLE_LIST
DBUG_ENTER("mysqld_list_fields");
DBUG_PRINT("enter",("table: %s",table_list->table_name));
- if (open_normal_and_derived_tables(thd, table_list, 0))
+ if (open_normal_and_derived_tables(thd, table_list, 0,
+ DT_PREPARE | DT_CREATE))
DBUG_VOID_RETURN;
table= table_list->table;
@@ -1680,7 +1682,7 @@ view_store_options(THD *thd, TABLE_LIST
static void append_algorithm(TABLE_LIST *table, String *buff)
{
buff->append(STRING_WITH_LEN("ALGORITHM="));
- switch ((int8)table->algorithm) {
+ switch ((int16)table->algorithm) {
case VIEW_ALGORITHM_UNDEFINED:
buff->append(STRING_WITH_LEN("UNDEFINED "));
break;
@@ -3360,8 +3362,9 @@ fill_schema_show_cols_or_idxs(THD *thd,
SQLCOM_SHOW_FIELDS is used because it satisfies 'only_view_structure()'
*/
lex->sql_command= SQLCOM_SHOW_FIELDS;
- res= open_normal_and_derived_tables(thd, show_table_list,
- MYSQL_LOCK_IGNORE_FLUSH);
+ res= (open_normal_and_derived_tables(thd, show_table_list,
+ MYSQL_LOCK_IGNORE_FLUSH,
+ DT_PREPARE | DT_CREATE));
lex->sql_command= save_sql_command;
/*
get_all_tables() returns 1 on failure and 0 on success thus
@@ -3792,8 +3795,9 @@ int get_all_tables(THD *thd, TABLE_LIST
lex->sql_command= SQLCOM_SHOW_FIELDS;
show_table_list->i_s_requested_object=
schema_table->i_s_requested_object;
- res= open_normal_and_derived_tables(thd, show_table_list,
- MYSQL_LOCK_IGNORE_FLUSH);
+ res= (open_normal_and_derived_tables(thd, show_table_list,
+ MYSQL_LOCK_IGNORE_FLUSH,
+ DT_PREPARE | DT_CREATE));
lex->sql_command= save_sql_command;
/*
XXX: show_table_list has a flag i_is_requested,
=== modified file 'sql/sql_table.cc'
--- a/sql/sql_table.cc 2010-03-15 11:51:23 +0000
+++ b/sql/sql_table.cc 2010-05-26 20:18:18 +0000
@@ -4623,8 +4623,13 @@ static bool mysql_admin_table(THD* thd,
thd->no_warnings_for_error= no_warnings_for_error;
if (view_operator_func == NULL)
table->required_type=FRMTYPE_TABLE;
-
+ if (lex->sql_command == SQLCOM_CHECK ||
+ lex->sql_command == SQLCOM_REPAIR ||
+ lex->sql_command == SQLCOM_ANALYZE ||
+ lex->sql_command == SQLCOM_OPTIMIZE)
+ thd->prepare_derived_at_open= TRUE;
open_and_lock_tables(thd, table);
+ thd->prepare_derived_at_open= FALSE;
thd->no_warnings_for_error= 0;
table->next_global= save_next_global;
table->next_local= save_next_local;
@@ -4722,7 +4727,7 @@ static bool mysql_admin_table(THD* thd,
else
/* Default failure code is corrupt table */
result_code= HA_ADMIN_CORRUPT;
- goto send_result;
+ goto send_result;
}
if (table->view)
=== modified file 'sql/sql_union.cc'
--- a/sql/sql_union.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_union.cc 2010-05-26 20:18:18 +0000
@@ -105,6 +105,7 @@ bool select_union::flush()
options create options
table_alias name of the temporary table
bit_fields_as_long convert bit fields to ulonglong
+ create_table whether to physically create result table
DESCRIPTION
Create a temporary table that is used to store the result of a UNION,
@@ -119,7 +120,7 @@ bool
select_union::create_result_table(THD *thd_arg, List<Item> *column_types,
bool is_union_distinct, ulonglong options,
const char *alias,
- bool bit_fields_as_long)
+ bool bit_fields_as_long, bool create_table)
{
DBUG_ASSERT(table == 0);
tmp_table_param.init();
@@ -128,10 +129,14 @@ select_union::create_result_table(THD *t
if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
(ORDER*) 0, is_union_distinct, 1,
- options, HA_POS_ERROR, (char*) alias)))
+ options, HA_POS_ERROR, (char*) alias,
+ !create_table)))
return TRUE;
- table->file->extra(HA_EXTRA_WRITE_CACHE);
- table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+ if (create_table)
+ {
+ table->file->extra(HA_EXTRA_WRITE_CACHE);
+ table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+ }
return FALSE;
}
@@ -269,6 +274,7 @@ bool st_select_lex_unit::prepare(THD *th
(is_union_select ? (ORDER*) 0 :
(ORDER*) thd_arg->lex->proc_list.first),
sl, this);
+
/* There are no * in the statement anymore (for PS) */
sl->with_wild= 0;
last_procedure= join->procedure;
@@ -331,6 +337,8 @@ bool st_select_lex_unit::prepare(THD *th
List_iterator_fast<Item> tp(types);
Item *type;
ulonglong create_options;
+ uint save_tablenr;
+ table_map save_map;
while ((type= tp++))
{
@@ -383,12 +391,22 @@ bool st_select_lex_unit::prepare(THD *th
create_options= create_options | TMP_TABLE_FORCE_MYISAM;
if (union_result->create_result_table(thd, &types, test(union_distinct),
- create_options, "", FALSE))
+ create_options, "", FALSE, TRUE))
goto err;
+ if (fake_select_lex && !fake_select_lex->first_cond_optimization)
+ {
+ save_tablenr= result_table_list.tablenr_exec;
+ save_map= result_table_list.map_exec;
+ }
bzero((char*) &result_table_list, sizeof(result_table_list));
result_table_list.db= (char*) "";
result_table_list.table_name= result_table_list.alias= (char*) "union";
result_table_list.table= table= union_result->table;
+ if (fake_select_lex && !fake_select_lex->first_cond_optimization)
+ {
+ result_table_list.tablenr_exec= save_tablenr;
+ result_table_list.map_exec= save_map;
+ }
thd_arg->lex->current_select= lex_select_save;
if (!item_list.elements)
@@ -453,18 +471,21 @@ err:
}
-bool st_select_lex_unit::exec()
+/**
+ Run optimization phase.
+
+ @return FALSE unit successfully passed optimization phase.
+ @return TRUE an error occur.
+*/
+bool st_select_lex_unit::optimize()
{
SELECT_LEX *lex_select_save= thd->lex->current_select;
SELECT_LEX *select_cursor=first_select();
- ulonglong add_rows=0;
- ha_rows examined_rows= 0;
- DBUG_ENTER("st_select_lex_unit::exec");
+ DBUG_ENTER("st_select_lex_unit::optimize");
- if (executed && !uncacheable && !describe)
+ if (optimized && !uncacheable && !describe)
DBUG_RETURN(FALSE);
- executed= 1;
-
+
if (uncacheable || !item || !item->assigned() || describe)
{
if (item)
@@ -485,7 +506,6 @@ bool st_select_lex_unit::exec()
}
for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
{
- ha_rows records_at_start= 0;
thd->lex->current_select= sl;
if (optimized)
@@ -512,6 +532,66 @@ bool st_select_lex_unit::exec()
sl->join->select_options=
(select_limit_cnt == HA_POS_ERROR || sl->braces) ?
sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union;
+
+ saved_error= sl->join->optimize();
+ }
+
+ if (saved_error)
+ {
+ thd->lex->current_select= lex_select_save;
+ DBUG_RETURN(saved_error);
+ }
+ }
+ }
+ optimized= 1;
+
+ thd->lex->current_select= lex_select_save;
+ DBUG_RETURN(saved_error);
+}
+
+
+bool st_select_lex_unit::exec()
+{
+ SELECT_LEX *lex_select_save= thd->lex->current_select;
+ SELECT_LEX *select_cursor=first_select();
+ ulonglong add_rows=0;
+ ha_rows examined_rows= 0;
+ DBUG_ENTER("st_select_lex_unit::exec");
+
+ if (executed && !uncacheable && !describe)
+ DBUG_RETURN(FALSE);
+ executed= 1;
+
+ saved_error= optimize();
+
+ if (uncacheable || !item || !item->assigned() || describe)
+ {
+ for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+ {
+ ha_rows records_at_start= 0;
+ thd->lex->current_select= sl;
+
+ {
+ set_limit(sl);
+ if (sl == global_parameters || describe)
+ {
+ offset_limit_cnt= 0;
+ /*
+ We can't use LIMIT at this stage if we are using ORDER BY for the
+ whole query
+ */
+ if (sl->order_list.first || describe)
+ select_limit_cnt= HA_POS_ERROR;
+ }
+
+ /*
+ When using braces, SQL_CALC_FOUND_ROWS affects the whole query:
+ we don't calculate found_rows() per union part.
+ Otherwise, SQL_CALC_FOUND_ROWS should be done on all sub parts.
+ */
+ sl->join->select_options=
+ (select_limit_cnt == HA_POS_ERROR || sl->braces) ?
+ sl->options & ~OPTION_FOUND_ROWS : sl->options | found_rows_for_union;
saved_error= sl->join->optimize();
}
if (!saved_error)
@@ -564,7 +644,6 @@ bool st_select_lex_unit::exec()
}
}
}
- optimized= 1;
/* Send result to 'result' */
saved_error= TRUE;
=== modified file 'sql/sql_update.cc'
--- a/sql/sql_update.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_update.cc 2010-05-26 20:18:18 +0000
@@ -212,7 +212,11 @@ int mysql_update(THD *thd,
if (open_tables(thd, &table_list, &table_count, 0))
DBUG_RETURN(1);
- if (table_list->multitable_view)
+ //Prepare views so they are handled correctly.
+ if (mysql_handle_derived(thd->lex, DT_INIT))
+ DBUG_RETURN(1);
+
+ if (table_list->is_multitable())
{
DBUG_ASSERT(table_list->view != 0);
DBUG_PRINT("info", ("Switch to multi-update"));
@@ -227,15 +231,19 @@ int mysql_update(THD *thd,
DBUG_RETURN(1);
close_tables_for_reopen(thd, &table_list);
}
-
- if (mysql_handle_derived(thd->lex, &mysql_derived_prepare) ||
- (thd->fill_derived_tables() &&
- mysql_handle_derived(thd->lex, &mysql_derived_filling)))
+ if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
+ DBUG_RETURN(1);
+ if (table_list->handle_derived(thd->lex, DT_PREPARE))
DBUG_RETURN(1);
thd_proc_info(thd, "init");
table= table_list->table;
+ if (!table_list->updatable)
+ {
+ my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
+ DBUG_RETURN(1);
+ }
/* Calculate "table->covering_keys" based on the WHERE */
table->covering_keys= table->s->keys_in_use;
table->quick_keys.clear_all();
@@ -254,13 +262,17 @@ int mysql_update(THD *thd,
table_list->grant.want_privilege= table->grant.want_privilege= want_privilege;
table_list->register_want_access(want_privilege);
#endif
+ /* 'Unfix' fields to allow correct marking by the setup_fields function. */
+ if (table_list->is_view())
+ unfix_fields(fields);
+
if (setup_fields_with_no_wrap(thd, 0, fields, MARK_COLUMNS_WRITE, 0, 0))
DBUG_RETURN(1); /* purecov: inspected */
if (table_list->view && check_fields(thd, fields))
{
DBUG_RETURN(1);
}
- if (!table_list->updatable || check_key_in_view(thd, table_list))
+ if (check_key_in_view(thd, table_list))
{
my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
DBUG_RETURN(1);
@@ -839,6 +851,11 @@ int mysql_update(THD *thd,
}
thd->count_cuted_fields= CHECK_FIELD_IGNORE; /* calc cuted fields */
thd->abort_on_warning= 0;
+ if (thd->lex->current_select->first_cond_optimization)
+ {
+ thd->lex->current_select->save_leaf_tables(thd);
+ thd->lex->current_select->first_cond_optimization= 0;
+ }
DBUG_RETURN((error >= 0 || thd->is_error()) ? 1 : 0);
err:
@@ -903,8 +920,8 @@ bool mysql_prepare_update(THD *thd, TABL
if (setup_tables_and_check_access(thd, &select_lex->context,
&select_lex->top_join_list,
table_list,
- &select_lex->leaf_tables,
- FALSE, UPDATE_ACL, SELECT_ACL) ||
+ select_lex->leaf_tables,
+ FALSE, UPDATE_ACL, SELECT_ACL, TRUE) ||
setup_conds(thd, table_list, select_lex->leaf_tables, conds) ||
select_lex->setup_ref_array(thd, order_num) ||
setup_order(thd, select_lex->ref_pointer_array,
@@ -941,8 +958,8 @@ static table_map get_table_map(List<Item
Item_field *item;
table_map map= 0;
- while ((item= (Item_field *) item_it++))
- map|= item->used_tables();
+ while ((item= (Item_field *) item_it++))
+ map|= item->all_used_tables();
DBUG_PRINT("info", ("table_map: 0x%08lx", (long) map));
return map;
}
@@ -964,7 +981,7 @@ int mysql_multi_update_prepare(THD *thd)
{
LEX *lex= thd->lex;
TABLE_LIST *table_list= lex->query_tables;
- TABLE_LIST *tl, *leaves;
+ TABLE_LIST *tl;
List<Item> *fields= &lex->select_lex.item_list;
table_map tables_for_update;
bool update_view= 0;
@@ -987,19 +1004,24 @@ reopen_tables:
/* open tables and create derived ones, but do not lock and fill them */
if (((original_multiupdate || need_reopen) &&
open_tables(thd, &table_list, &table_count, 0)) ||
- mysql_handle_derived(lex, &mysql_derived_prepare))
+ mysql_handle_derived(lex, DT_INIT))
DBUG_RETURN(TRUE);
/*
setup_tables() need for VIEWs. JOIN::prepare() will call setup_tables()
second time, but this call will do nothing (there are check for second
call in setup_tables()).
*/
+ //We need to merge for insert prior to prepare.
+ if (mysql_handle_list_of_derived(lex, table_list, DT_MERGE_FOR_INSERT))
+ DBUG_RETURN(1);
+ if (mysql_handle_list_of_derived(lex, table_list, DT_PREPARE))
+ DBUG_RETURN(1);
if (setup_tables_and_check_access(thd, &lex->select_lex.context,
&lex->select_lex.top_join_list,
table_list,
- &lex->select_lex.leaf_tables, FALSE,
- UPDATE_ACL, SELECT_ACL))
+ lex->select_lex.leaf_tables, FALSE,
+ UPDATE_ACL, SELECT_ACL, TRUE))
DBUG_RETURN(TRUE);
if (setup_fields_with_no_wrap(thd, 0, *fields, MARK_COLUMNS_WRITE, 0, 0))
@@ -1024,8 +1046,8 @@ reopen_tables:
/*
Setup timestamp handling and locking mode
*/
- leaves= lex->select_lex.leaf_tables;
- for (tl= leaves; tl; tl= tl->next_leaf)
+ List_iterator<TABLE_LIST> ti(lex->select_lex.leaf_tables);
+ while ((tl= ti++))
{
TABLE *table= tl->table;
/* Only set timestamp column if this is not modified */
@@ -1067,7 +1089,7 @@ reopen_tables:
for (tl= table_list; tl; tl= tl->next_local)
{
/* Check access privileges for table */
- if (!tl->derived)
+ if (!tl->is_derived())
{
uint want_privilege= tl->updating ? UPDATE_ACL : SELECT_ACL;
if (check_access(thd, want_privilege,
@@ -1081,7 +1103,7 @@ reopen_tables:
/* check single table update for view compound from several tables */
for (tl= table_list; tl; tl= tl->next_local)
{
- if (tl->effective_algorithm == VIEW_ALGORITHM_MERGE)
+ if (tl->is_merged_derived())
{
TABLE_LIST *for_update= 0;
if (tl->check_single_table(&for_update, tables_for_update, tl))
@@ -1136,6 +1158,8 @@ reopen_tables:
*/
unit->unclean();
}
+ // Reset 'prepared' flags for all derived tables/views
+ mysql_handle_list_of_derived(thd->lex, table_list, DT_REINIT);
/*
Also we need to cleanup Natural_join_column::table_field items.
@@ -1158,7 +1182,8 @@ reopen_tables:
*/
lex->select_lex.exclude_from_table_unique_test= TRUE;
/* We only need SELECT privilege for columns in the values list */
- for (tl= leaves; tl; tl= tl->next_leaf)
+ ti.rewind();
+ while ((tl= ti++))
{
TABLE *table= tl->table;
TABLE_LIST *tlist;
@@ -1187,10 +1212,6 @@ reopen_tables:
*/
lex->select_lex.exclude_from_table_unique_test= FALSE;
- if (thd->fill_derived_tables() &&
- mysql_handle_derived(lex, &mysql_derived_filling))
- DBUG_RETURN(TRUE);
-
DBUG_RETURN (FALSE);
}
@@ -1213,7 +1234,7 @@ bool mysql_multi_update(THD *thd,
DBUG_ENTER("mysql_multi_update");
if (!(result= new multi_update(table_list,
- thd->lex->select_lex.leaf_tables,
+ &thd->lex->select_lex.leaf_tables,
fields, values,
handle_duplicates, ignore)))
DBUG_RETURN(TRUE);
@@ -1247,7 +1268,7 @@ bool mysql_multi_update(THD *thd,
multi_update::multi_update(TABLE_LIST *table_list,
- TABLE_LIST *leaves_list,
+ List<TABLE_LIST> *leaves_list,
List<Item> *field_list, List<Item> *value_list,
enum enum_duplicates handle_duplicates_arg,
bool ignore_arg)
@@ -1265,6 +1286,7 @@ multi_update::multi_update(TABLE_LIST *t
int multi_update::prepare(List<Item> ¬_used_values,
SELECT_LEX_UNIT *lex_unit)
+
{
TABLE_LIST *table_ref;
SQL_LIST update;
@@ -1274,12 +1296,20 @@ int multi_update::prepare(List<Item> &no
List_iterator_fast<Item> value_it(*values);
uint i, max_fields;
uint leaf_table_count= 0;
+ List_iterator<TABLE_LIST> ti(*leaves);
DBUG_ENTER("multi_update::prepare");
thd->count_cuted_fields= CHECK_FIELD_WARN;
thd->cuted_fields=0L;
thd_proc_info(thd, "updating main table");
+ SELECT_LEX *select_lex= lex_unit->first_select();
+ if (select_lex->first_cond_optimization)
+ {
+ if (select_lex->handle_derived(thd->lex, DT_MERGE))
+ DBUG_RETURN(TRUE);
+ }
+
tables_to_update= get_table_map(fields);
if (!tables_to_update)
@@ -1293,7 +1323,7 @@ int multi_update::prepare(List<Item> &no
TABLE::tmp_set by pointing TABLE::read_set to it and then restore it after
setup_fields().
*/
- for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+ while ((table_ref= ti++))
{
TABLE *table= table_ref->table;
if (tables_to_update & table->map)
@@ -1311,7 +1341,8 @@ int multi_update::prepare(List<Item> &no
int error= setup_fields(thd, 0, *values, MARK_COLUMNS_READ, 0, 0);
- for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+ ti.rewind();
+ while ((table_ref= ti++))
{
TABLE *table= table_ref->table;
if (tables_to_update & table->map)
@@ -1331,7 +1362,8 @@ int multi_update::prepare(List<Item> &no
*/
update.empty();
- for (table_ref= leaves; table_ref; table_ref= table_ref->next_leaf)
+ ti.rewind();
+ while ((table_ref= ti++))
{
/* TODO: add support of view of join support */
TABLE *table=table_ref->table;
@@ -1557,9 +1589,9 @@ loop_end:
{
table_map unupdated_tables= table_ref->check_option->used_tables() &
~first_table_for_update->map;
- for (TABLE_LIST *tbl_ref =leaves;
- unupdated_tables && tbl_ref;
- tbl_ref= tbl_ref->next_leaf)
+ List_iterator<TABLE_LIST> ti(*leaves);
+ TABLE_LIST *tbl_ref;
+ while ((tbl_ref= ti++) && unupdated_tables)
{
if (unupdated_tables & tbl_ref->table->map)
unupdated_tables&= ~tbl_ref->table->map;
=== modified file 'sql/sql_view.cc'
--- a/sql/sql_view.cc 2010-03-04 08:03:07 +0000
+++ b/sql/sql_view.cc 2010-05-26 20:18:18 +0000
@@ -219,7 +219,7 @@ fill_defined_view_parts (THD *thd, TABLE
view->definer.user= decoy.definer.user;
lex->definer= &view->definer;
}
- if (lex->create_view_algorithm == VIEW_ALGORITHM_UNDEFINED)
+ if (lex->create_view_algorithm == DTYPE_ALGORITHM_UNDEFINED)
lex->create_view_algorithm= (uint8) decoy.algorithm;
if (lex->create_view_suid == VIEW_SUID_DEFAULT)
lex->create_view_suid= decoy.view_suid ?
@@ -814,7 +814,7 @@ static int mysql_register_view(THD *thd,
ulong sql_mode= thd->variables.sql_mode & MODE_ANSI_QUOTES;
thd->variables.sql_mode&= ~MODE_ANSI_QUOTES;
- lex->unit.print(&view_query, QT_ORDINARY);
+ lex->unit.print(&view_query, QT_VIEW_INTERNAL);
lex->unit.print(&is_query, QT_IS);
thd->variables.sql_mode|= sql_mode;
@@ -847,7 +847,7 @@ static int mysql_register_view(THD *thd,
{
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_VIEW_MERGE,
ER(ER_WARN_VIEW_MERGE));
- lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+ lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
}
view->algorithm= lex->create_view_algorithm;
view->definer.user= lex->definer->user;
@@ -1415,7 +1415,7 @@ bool mysql_make_view(THD *thd, File_pars
List_iterator_fast<TABLE_LIST> ti(view_select->top_join_list);
- table->effective_algorithm= VIEW_ALGORITHM_MERGE;
+ table->derived_type= VIEW_ALGORITHM_MERGE;
DBUG_PRINT("info", ("algorithm: MERGE"));
table->updatable= (table->updatable_view != 0);
table->effective_with_check=
@@ -1429,67 +1429,10 @@ bool mysql_make_view(THD *thd, File_pars
/* prepare view context */
lex->select_lex.context.resolve_in_table_list_only(view_main_select_tables);
lex->select_lex.context.outer_context= 0;
- lex->select_lex.context.select_lex= table->select_lex;
lex->select_lex.select_n_having_items+=
table->select_lex->select_n_having_items;
- /*
- Tables of the main select of the view should be marked as belonging
- to the same select as original view (again we can use LEX::select_lex
- for this purprose because we don't support MERGE algorithm for views
- with unions).
- */
- for (tbl= lex->select_lex.get_table_list(); tbl; tbl= tbl->next_local)
- tbl->select_lex= table->select_lex;
-
- {
- if (view_main_select_tables->next_local)
- {
- table->multitable_view= TRUE;
- if (table->belong_to_view)
- table->belong_to_view->multitable_view= TRUE;
- }
- /* make nested join structure for view tables */
- NESTED_JOIN *nested_join;
- if (!(nested_join= table->nested_join=
- (NESTED_JOIN *) thd->calloc(sizeof(NESTED_JOIN))))
- goto err;
- nested_join->join_list= view_select->top_join_list;
-
- /* re-nest tables of VIEW */
- ti.rewind();
- while ((tbl= ti++))
- {
- tbl->join_list= &nested_join->join_list;
- tbl->embedding= table;
- }
- }
-
- /* Store WHERE clause for post-processing in setup_underlying */
table->where= view_select->where;
- /*
- Add subqueries units to SELECT into which we merging current view.
- unit(->next)* chain starts with subqueries that are used by this
- view and continues with subqueries that are used by other views.
- We must not add any subquery twice (otherwise we'll form a loop),
- to do this we remember in end_unit the first subquery that has
- been already added.
-
- NOTE: we do not support UNION here, so we take only one select
- */
- SELECT_LEX_NODE *end_unit= table->select_lex->slave;
- SELECT_LEX_UNIT *next_unit;
- for (SELECT_LEX_UNIT *unit= lex->select_lex.first_inner_unit();
- unit;
- unit= next_unit)
- {
- if (unit == end_unit)
- break;
- SELECT_LEX_NODE *save_slave= unit->slave;
- next_unit= unit->next_unit();
- unit->include_down(table->select_lex);
- unit->slave= save_slave; // fix include_down initialisation
- }
/*
We can safely ignore the VIEW's ORDER BY if we merge into union
@@ -1506,23 +1449,22 @@ bool mysql_make_view(THD *thd, File_pars
goto ok;
}
- table->effective_algorithm= VIEW_ALGORITHM_TMPTABLE;
+ table->derived_type= VIEW_ALGORITHM_TMPTABLE;
DBUG_PRINT("info", ("algorithm: TEMPORARY TABLE"));
view_select->linkage= DERIVED_TABLE_TYPE;
table->updatable= 0;
table->effective_with_check= VIEW_CHECK_NONE;
old_lex->subqueries= TRUE;
- /* SELECT tree link */
- lex->unit.include_down(table->select_lex);
- lex->unit.slave= view_select; // fix include_down initialisation
-
table->derived= &lex->unit;
}
else
goto err;
ok:
+ /* SELECT tree link */
+ lex->unit.include_down(table->select_lex);
+ lex->unit.slave= view_select; // fix include_down initialisation
/* global SELECT list linking */
end= view_select; // primary SELECT_LEX is always last
end->link_next= old_lex->all_selects_list;
=== modified file 'sql/sql_yacc.yy'
--- a/sql/sql_yacc.yy 2010-03-15 11:51:23 +0000
+++ b/sql/sql_yacc.yy 2010-05-26 20:18:18 +0000
@@ -1920,7 +1920,7 @@ create:
| CREATE
{
Lex->create_view_mode= VIEW_CREATE_NEW;
- Lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+ Lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
Lex->create_view_suid= TRUE;
}
view_or_trigger_or_sp_or_event
@@ -5858,7 +5858,7 @@ alter:
my_error(ER_SP_BADSTATEMENT, MYF(0), "ALTER VIEW");
MYSQL_YYABORT;
}
- lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED;
+ lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED;
lex->create_view_mode= VIEW_ALTER;
}
view_tail
@@ -13369,7 +13369,7 @@ view_replace:
view_algorithm:
ALGORITHM_SYM EQ UNDEFINED_SYM
- { Lex->create_view_algorithm= VIEW_ALGORITHM_UNDEFINED; }
+ { Lex->create_view_algorithm= DTYPE_ALGORITHM_UNDEFINED; }
| ALGORITHM_SYM EQ MERGE_SYM
{ Lex->create_view_algorithm= VIEW_ALGORITHM_MERGE; }
| ALGORITHM_SYM EQ TEMPTABLE_SYM
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-03-20 12:01:47 +0000
+++ b/sql/table.cc 2010-05-26 20:18:18 +0000
@@ -20,7 +20,7 @@
#include "sql_trigger.h"
#include <m_ctype.h>
#include "my_md5.h"
-
+#include "my_bit.h"
/* INFORMATION_SCHEMA name */
LEX_STRING INFORMATION_SCHEMA_NAME= {C_STRING_WITH_LEN("information_schema")};
@@ -3442,129 +3442,118 @@ void TABLE_LIST::calc_md5(char *buffer)
/**
- @brief Set underlying table for table place holder of view.
-
- @details
-
- Replace all views that only use one table with the table itself. This
- allows us to treat the view as a simple table and even update it (it is a
- kind of optimization).
+ @brief
+ Create field translation for mergeable derived table/view.
- @note
+ @param thd Thread handle
- This optimization is potentially dangerous as it makes views
- masquerade as base tables: Views don't have the pointer TABLE_LIST::table
- set to non-@c NULL.
+ @details
+ Create field translation for mergeable derived table/view.
- We may have the case where a view accesses tables not normally accessible
- in the current Security_context (only in the definer's
- Security_context). According to the table's GRANT_INFO (TABLE::grant),
- access is fulfilled, but this is implicitly meant in the definer's security
- context. Hence we must never look at only a TABLE's GRANT_INFO without
- looking at the one of the referring TABLE_LIST.
+ @return FALSE ok.
+ @return TRUE an error occur.
*/
-void TABLE_LIST::set_underlying_merge()
+bool TABLE_LIST::create_field_translation(THD *thd)
{
- TABLE_LIST *tbl;
+ Item *item;
+ Field_translator *transl;
+ SELECT_LEX *select= get_single_select();
+ List_iterator_fast<Item> it(select->item_list);
+ uint field_count= 0;
+ Query_arena *arena= thd->stmt_arena, backup;
+ bool res= FALSE;
+
+ used_items.empty();
- if ((tbl= merge_underlying_list))
+ if (field_translation)
{
- /* This is a view. Process all tables of view */
- DBUG_ASSERT(view && effective_algorithm == VIEW_ALGORITHM_MERGE);
- do
+ /*
+ Update items in the field translation aftet view have been prepared.
+ It's needed because some items in the select list, like IN subselects,
+ might be substituted for optimized ones.
+ */
+ if (is_view() && get_unit()->prepared && !field_translation_updated)
{
- if (tbl->merge_underlying_list) // This is a view
+ while ((item= it++))
{
- DBUG_ASSERT(tbl->view &&
- tbl->effective_algorithm == VIEW_ALGORITHM_MERGE);
- /*
- This is the only case where set_ancestor is called on an object
- that may not be a view (in which case ancestor is 0)
- */
- tbl->merge_underlying_list->set_underlying_merge();
+ field_translation[field_count++].item= item;
}
- } while ((tbl= tbl->next_local));
-
- if (!multitable_view)
- {
- table= merge_underlying_list->table;
- schema_table= merge_underlying_list->schema_table;
+ field_translation_updated= TRUE;
}
+
+ return FALSE;
}
+
+ if (arena->is_conventional())
+ arena= 0; // For easier test
+ else
+ thd->set_n_backup_active_arena(arena, &backup);
+
+ /* Create view fields translation table */
+
+ if (!(transl=
+ (Field_translator*)(thd->stmt_arena->
+ alloc(select->item_list.elements *
+ sizeof(Field_translator)))))
+ {
+ res= TRUE;
+ goto exit;
+ }
+
+ while ((item= it++))
+ {
+ transl[field_count].name= item->name;
+ transl[field_count++].item= item;
+ }
+ field_translation= transl;
+ field_translation_end= transl + field_count;
+
+exit:
+ if (arena)
+ thd->restore_active_arena(arena, &backup);
+
+ return res;
}
-/*
- setup fields of placeholder of merged VIEW
+/**
+ @brief
+ Create field translation for mergeable derived table/view.
- SYNOPSIS
- TABLE_LIST::setup_underlying()
- thd - thread handler
+ @param thd Thread handle
- DESCRIPTION
- It is:
- - preparing translation table for view columns
- If there are underlying view(s) procedure first will be called for them.
+ @details
+ Create field translation for mergeable derived table/view.
- RETURN
- FALSE - OK
- TRUE - error
+ @return FALSE ok.
+ @return TRUE an error occur.
*/
bool TABLE_LIST::setup_underlying(THD *thd)
{
DBUG_ENTER("TABLE_LIST::setup_underlying");
- if (!field_translation && merge_underlying_list)
+ if (!view || (!field_translation && merge_underlying_list))
{
- Field_translator *transl;
- SELECT_LEX *select= &view->select_lex;
- Item *item;
- TABLE_LIST *tbl;
+ SELECT_LEX *select= get_single_select();
List_iterator_fast<Item> it(select->item_list);
- uint field_count= 0;
+ TABLE_LIST *tbl;
- if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*) &field_count))
+ if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*) &tbl))
{
DBUG_RETURN(TRUE);
}
-
- for (tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
- {
- if (tbl->merge_underlying_list &&
- tbl->setup_underlying(thd))
- {
- DBUG_RETURN(TRUE);
- }
- }
-
- /* Create view fields translation table */
-
- if (!(transl=
- (Field_translator*)(thd->stmt_arena->
- alloc(select->item_list.elements *
- sizeof(Field_translator)))))
- {
+ if (create_field_translation(thd))
DBUG_RETURN(TRUE);
- }
-
- while ((item= it++))
- {
- transl[field_count].name= item->name;
- transl[field_count++].item= item;
- }
- field_translation= transl;
- field_translation_end= transl + field_count;
- /* TODO: use hash for big number of fields */
/* full text function moving to current select */
- if (view->select_lex.ftfunc_list->elements)
+ if (select->ftfunc_list->elements)
{
Item_func_match *ifm;
SELECT_LEX *current_select= thd->lex->current_select;
List_iterator_fast<Item_func_match>
- li(*(view->select_lex.ftfunc_list));
+ li(*(select_lex->ftfunc_list));
while ((ifm= li++))
current_select->ftfunc_list->push_front(ifm);
}
@@ -3574,7 +3563,7 @@ bool TABLE_LIST::setup_underlying(THD *t
/*
- Prepare where expression of view
+ Prepare where expression of derived table/view
SYNOPSIS
TABLE_LIST::prep_where()
@@ -3598,7 +3587,8 @@ bool TABLE_LIST::prep_where(THD *thd, It
for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
{
- if (tbl->view && tbl->prep_where(thd, conds, no_where_clause))
+ if (tbl->is_view_or_derived() &&
+ tbl->prep_where(thd, conds, no_where_clause))
{
DBUG_RETURN(TRUE);
}
@@ -3606,6 +3596,8 @@ bool TABLE_LIST::prep_where(THD *thd, It
if (where)
{
+ if (where->fixed)
+ where->update_used_tables();
if (!where->fixed && where->fix_fields(thd, &where))
{
DBUG_RETURN(TRUE);
@@ -3638,7 +3630,13 @@ bool TABLE_LIST::prep_where(THD *thd, It
}
}
if (tbl == 0)
+ {
+ if (*conds && !(*conds)->fixed)
+ (*conds)->fix_fields(thd, conds);
*conds= and_conds(*conds, where->copy_andor_structure(thd));
+ if (*conds && !(*conds)->fixed)
+ (*conds)->fix_fields(thd, conds);
+ }
if (arena)
thd->restore_active_arena(arena, &backup);
where_processed= TRUE;
@@ -3677,10 +3675,11 @@ merge_on_conds(THD *thd, TABLE_LIST *tab
DBUG_PRINT("info", ("alias: %s", table->alias));
if (table->on_expr)
cond= table->on_expr->copy_andor_structure(thd);
- if (!table->nested_join)
+ if (!table->view)
DBUG_RETURN(cond);
- List_iterator<TABLE_LIST> li(table->nested_join->join_list);
- while (TABLE_LIST *tbl= li++)
+ for (TABLE_LIST *tbl= (TABLE_LIST*)table->view->select_lex.table_list.first;
+ tbl;
+ tbl= tbl->next_local)
{
if (tbl->view && !is_cascaded)
continue;
@@ -3720,7 +3719,7 @@ bool TABLE_LIST::prep_check_option(THD *
{
DBUG_ENTER("TABLE_LIST::prep_check_option");
bool is_cascaded= check_opt_type == VIEW_CHECK_CASCADED;
-
+ TABLE_LIST *merge_underlying_list= view->select_lex.get_table_list();
for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
{
/* see comment of check_opt_type parameter */
@@ -3833,10 +3832,14 @@ void TABLE_LIST::hide_view_error(THD *th
TABLE_LIST *TABLE_LIST::find_underlying_table(TABLE *table_to_find)
{
/* is this real table and table which we are looking for? */
- if (table == table_to_find && merge_underlying_list == 0)
+ if (table == table_to_find && view == 0)
return this;
+ if (!view)
+ return 0;
- for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+ for (TABLE_LIST *tbl= view->select_lex.get_table_list();
+ tbl;
+ tbl= tbl->next_local)
{
TABLE_LIST *result;
if ((result= tbl->find_underlying_table(table_to_find)))
@@ -3918,7 +3921,12 @@ bool TABLE_LIST::check_single_table(TABL
table_map map,
TABLE_LIST *view_arg)
{
- for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+ if (!select_lex)
+ return FALSE;
+ DBUG_ASSERT(is_merged_derived());
+ for (TABLE_LIST *tbl= get_single_select()->get_table_list();
+ tbl;
+ tbl= tbl->next_local)
{
if (tbl->table)
{
@@ -3960,8 +3968,10 @@ bool TABLE_LIST::set_insert_values(MEM_R
}
else
{
- DBUG_ASSERT(view && merge_underlying_list);
- for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+ DBUG_ASSERT(is_view_or_derived() && is_merged_derived());
+ for (TABLE_LIST *tbl= (TABLE_LIST*)view->select_lex.table_list.first;
+ tbl;
+ tbl= tbl->next_local)
if (tbl->set_insert_values(mem_root))
return TRUE;
}
@@ -3987,7 +3997,7 @@ bool TABLE_LIST::set_insert_values(MEM_R
*/
bool TABLE_LIST::is_leaf_for_name_resolution()
{
- return (view || is_natural_join || is_join_columns_complete ||
+ return (is_merged_derived() || is_natural_join || is_join_columns_complete ||
!nested_join);
}
@@ -4125,7 +4135,11 @@ void TABLE_LIST::register_want_access(ul
if (table)
table->grant.want_privilege= want_access;
}
- for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
+ if (!view)
+ return;
+ for (TABLE_LIST *tbl= view->select_lex.get_table_list();
+ tbl;
+ tbl= tbl->next_local)
tbl->register_want_access(want_access);
}
@@ -4358,14 +4372,23 @@ const char *Natural_join_column::db_name
DBUG_ASSERT(!strcmp(table_ref->db,
table_ref->table->s->db.str) ||
(table_ref->schema_table &&
- table_ref->table->s->db.str[0] == 0));
+ table_ref->table->s->db.str[0] == 0) ||
+ table_ref->is_materialized_derived());
return table_ref->db;
}
GRANT_INFO *Natural_join_column::grant()
{
- if (view_field)
+/* if (view_field)
+ return &(table_ref->grant);
+ return &(table_ref->table->grant);*/
+ /*
+ Have to check algorithm because merged derived also has
+ field_translation.
+ */
+//if (table_ref->effective_algorithm == DTYPE_ALGORITHM_MERGE)
+ if (table_ref->is_merged_derived())
return &(table_ref->grant);
return &(table_ref->table->grant);
}
@@ -4448,7 +4471,15 @@ Item *create_view_field(THD *thd, TABLE_
}
Item *item= new Item_direct_view_ref(&view->view->select_lex.context,
field_ref, view->alias,
- name);
+ name, view);
+ /*
+ Force creation of nullable item for the result tmp table for outer joined
+ views/derived tables.
+ */
+ if (view->outer_join)
+ item->maybe_null= TRUE;
+ /* Save item in case we will need to fall back to materialization. */
+ view->used_items.push_back(item);
DBUG_RETURN(item);
}
@@ -4502,8 +4533,7 @@ void Field_iterator_table_ref::set_field
/* This is a merge view, so use field_translation. */
else if (table_ref->field_translation)
{
- DBUG_ASSERT(table_ref->view &&
- table_ref->effective_algorithm == VIEW_ALGORITHM_MERGE);
+ DBUG_ASSERT(table_ref->is_merged_derived());
field_it= &view_field_it;
DBUG_PRINT("info", ("field_it for '%s' is Field_iterator_view",
table_ref->alias));
@@ -5096,6 +5126,142 @@ void st_table::mark_virtual_columns_for_
file->column_bitmaps_signal();
}
+
+/**
+ @brief
+ Allocate space for keys
+
+ @param key_count number of keys to allocate.
+
+ @details
+ Allocate space enough to fit 'key_count' keys for this table.
+
+ @return FALSE space was successfully allocated.
+ @return TRUE an error occur.
+*/
+
+bool TABLE::alloc_keys(uint key_count)
+{
+ DBUG_ASSERT(!s->keys);
+ key_info= s->key_info= (KEY*) alloc_root(&mem_root, sizeof(KEY)*key_count);
+ max_keys= key_count;
+ return !(key_info);
+}
+
+/**
+ @brief Adds one key to a temporary table.
+
+ @param key_parts bitmap of fields that take a part in the key.
+ @param key_name name of the key
+
+ @details
+ Creates a key for this table from fields which corresponds the bits set to 1
+ in the 'key_parts' bitmap. The 'key_name' name is given to the newly created
+ key.
+
+ @return <0 an error occur.
+ @return >=0 number of newly added key.
+*/
+
+bool TABLE::add_tmp_key(uint key, uint key_parts,
+ uint (*next_field_no) (uchar *), uchar *arg)
+{
+ DBUG_ASSERT(!created && key < max_keys);
+
+ char buf[NAME_CHAR_LEN];
+ KEY* keyinfo;
+ Field **reg_field;
+ uint i;
+ bool key_start= TRUE;
+ KEY_PART_INFO* key_part_info=
+ (KEY_PART_INFO*) alloc_root(&mem_root, sizeof(KEY_PART_INFO)*key_parts);
+ if (!key_part_info)
+ return TRUE;
+ keyinfo= key_info + key;
+ keyinfo->key_part= key_part_info;
+ keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
+ keyinfo->key_length=0;
+ keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+ keyinfo->flags= HA_GENERATED_KEY;
+ sprintf(buf, "key%i", key);
+ if (!(keyinfo->name= strdup_root(&mem_root, buf)))
+ return TRUE;
+ keyinfo->rec_per_key= (ulong*) alloc_root(&mem_root,
+ sizeof(ulong)*key_parts);
+ if (!keyinfo->rec_per_key)
+ return TRUE;
+ bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
+ for (i= 0; i < key_parts; i++)
+ {
+ reg_field= field + next_field_no(arg);
+ if (key_start)
+ (*reg_field)->key_start.set_bit(key);
+ key_start= FALSE;
+ (*reg_field)->part_of_key.set_bit(key);
+ (*reg_field)->flags|= PART_KEY_FLAG;
+ key_part_info->null_bit= (*reg_field)->null_bit;
+ key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
+ (uchar*) record[0]);
+ key_part_info->field= *reg_field;
+ key_part_info->offset= (*reg_field)->offset(record[0]);
+ key_part_info->length= (uint16) (*reg_field)->pack_length();
+ keyinfo->key_length+= key_part_info->length;
+ /* TODO:
+ The below method of computing the key format length of the
+ key part is a copy/paste from opt_range.cc, and table.cc.
+ This should be factored out, e.g. as a method of Field.
+ In addition it is not clear if any of the Field::*_length
+ methods is supposed to compute the same length. If so, it
+ might be reused.
+ */
+ key_part_info->store_length= key_part_info->length;
+
+ if ((*reg_field)->real_maybe_null())
+ key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+ if ((*reg_field)->type() == MYSQL_TYPE_BLOB ||
+ (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR)
+ key_part_info->store_length+= HA_KEY_BLOB_LENGTH;
+
+ key_part_info->type= (uint8) (*reg_field)->key_type();
+ key_part_info->key_type =
+ ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
+ (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
+ (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
+ 0 : FIELDFLAG_BINARY;
+ key_part_info++;
+ }
+ set_if_bigger(s->max_key_length, keyinfo->key_length);
+ s->keys++;
+ return FALSE;
+}
+
+/*
+ @brief
+ Drop all indexes except specified one.
+
+ @param key_to_save the key to save
+
+ @details
+ Drop all indexes on this table except 'key_to_save'. The saved key becomes
+ key #0. Memory occupied by key parts of dropped keys are freed.
+ If the 'key_to_save' is negative then all keys are freed.
+*/
+
+void TABLE::use_index(int key_to_save)
+{
+ uint i= 1;
+ DBUG_ASSERT(!created && key_to_save < (int)s->keys);
+ if (key_to_save >= 0)
+ /* Save the given key. */
+ memcpy(key_info, key_info + key_to_save, sizeof(KEY));
+ else
+ /* Drop all keys; */
+ i= 0;
+
+ s->keys= (key_to_save < 0) ? 0 : 1;
+}
+
+
/**
@brief Check if this is part of a MERGE table with attached children.
@@ -5144,6 +5310,7 @@ void TABLE_LIST::reinit_before_use(THD *
parent_embedding->nested_join->join_list.head() == embedded);
}
+
/*
Return subselect that contains the FROM list this table is taken from
@@ -5412,6 +5579,296 @@ int update_virtual_fields(TABLE *table,
DBUG_RETURN(0);
}
+/*
+ @brief Reset const_table flag
+
+ @detail
+ Reset const_table flag for this table. If this table is a merged derived
+ table/view the flag is recursively reseted for all tables of the underlying
+ select.
+*/
+
+void TABLE_LIST::reset_const_table()
+{
+ table->const_table= 0;
+ if (is_merged_derived())
+ {
+ SELECT_LEX *select_lex= get_unit()->first_select();
+ TABLE_LIST *tl;
+ List_iterator<TABLE_LIST> ti(select_lex->leaf_tables);
+ while ((tl= ti++))
+ tl->reset_const_table();
+ }
+}
+
+
+/*
+ @brief Run derived tables/view handling phases on underlying select_lex.
+
+ @param lex LEX for this thread
+ @param phases derived tables/views handling phases to run
+ (set of DT_XXX constants)
+ @details
+ This function runs this derived table through specified 'phases'.
+ Underlying tables of this select are handled prior to this derived.
+ 'lex' is passed as an argument to called functions.
+
+ @return TRUE on error
+ @return FALSE ok
+*/
+
+bool TABLE_LIST::handle_derived(struct st_lex *lex, uint phases)
+{
+ SELECT_LEX_UNIT *unit= get_unit();
+ if (unit)
+ {
+ for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select())
+ if (sl->handle_derived(lex, phases))
+ return TRUE;
+ return mysql_handle_single_derived(lex, this, phases);
+ }
+ return FALSE;
+}
+
+
+/**
+ @brief
+ Return unit of this derived table/view
+
+ @return reference to a unit if it's a derived table/view.
+ @return 0 when it's not a derived table/view.
+*/
+
+st_select_lex_unit *TABLE_LIST::get_unit()
+{
+ return (view ? &view->unit : derived);
+}
+
+
+/**
+ @brief
+ Return select_lex of this derived table/view
+
+ @return select_lex of this derived table/view.
+ @return 0 when it's not a derived table.
+*/
+
+st_select_lex *TABLE_LIST::get_single_select()
+{
+ SELECT_LEX_UNIT *unit= get_unit();
+ return (unit ? unit->first_select() : 0);
+}
+
+
+/**
+ @brief
+ Attach a join table list as a nested join to this TABLE_LIST.
+
+ @param join_list join table list to attach
+
+ @details
+ This function wraps 'join_list' into a nested_join of this table, thus
+ turning it to a nested join leaf.
+*/
+
+void TABLE_LIST::wrap_into_nested_join(List<TABLE_LIST> &join_list)
+{
+ TABLE_LIST *tl;
+ /*
+ Walk through derived table top list and set 'embedding' to point to
+ the nesting table.
+ */
+ nested_join->join_list.empty();
+ List_iterator_fast<TABLE_LIST> li(join_list);
+ nested_join->join_list= join_list;
+ while ((tl= li++))
+ {
+ tl->embedding= this;
+ tl->join_list= &nested_join->join_list;
+ }
+}
+
+
+/**
+ @brief
+ Initialize this derived table/view
+
+ @param thd Thread handle
+
+ @details
+ This function makes initial preparations of this derived table/view for
+ further processing:
+ if it's a derived table this function marks it either as mergeable or
+ materializable
+ creates temporary table for name resolution purposes
+ creates field translation for mergeable derived table/view
+
+ @return TRUE an error occur
+ @return FALSE ok
+*/
+
+bool TABLE_LIST::init_derived(THD *thd, bool init_view)
+{
+ SELECT_LEX *first_select= get_single_select();
+ SELECT_LEX_UNIT *unit= get_unit();
+
+ if (!unit)
+ return FALSE;
+ /*
+ Check whether we can merge this derived table into main select.
+ Depending on the result field translation will or will not
+ be created.
+ */
+ TABLE_LIST *first_table= (TABLE_LIST *) first_select->table_list.first;
+ if (first_select->table_list.elements > 1 ||
+ first_table && first_table->is_multitable())
+ set_multitable();
+
+ unit->derived= this;
+ if (init_view && !view)
+ {
+ /* This is all what we can do for a derived table for now. */
+ set_derived();
+ }
+
+ if (!is_view())
+ {
+ /* A subquery might be forced to be materialized due to a side-effect. */
+ if (!is_materialized_derived() && first_select->is_mergeable())
+ set_merged_derived();
+ else
+ set_materialized_derived();
+ }
+ /*
+ Derived tables/view are materialized prior to UPDATE, thus we can skip
+ them from table uniqueness check
+ */
+ if (is_materialized_derived())
+ {
+ SELECT_LEX *sl;
+ for (sl= first_select ;sl ; sl= sl->next_select())
+ sl->exclude_from_table_unique_test= TRUE;
+ }
+ /*
+ Create field translation for mergeable derived tables/views.
+ For derived tables field translation can be created only after
+ unit is prepared so all '*' are get unrolled.
+ */
+ if (is_merged_derived())
+ {
+ if (is_view() || unit->prepared)
+ create_field_translation(thd);
+ }
+
+ return FALSE;
+}
+
+
+/**
+ @brief
+ Retrieve number of rows in the table
+
+ @details
+ Retrieve number of rows in the table referred by this TABLE_LIST and
+ store it in the table's stats.records variable. If this TABLE_LIST refers
+ to a materialized derived table/view then the estimated number of rows of
+ the derived table/view is used instead.
+
+ @return 0 ok
+ @return non zero error
+*/
+
+int TABLE_LIST::fetch_number_of_rows()
+{
+ int error= 0;
+ if (is_materialized_derived() && !fill_me)
+
+ {
+ table->file->stats.records= ((select_union*)derived->result)->records;
+ set_if_bigger(table->file->stats.records, 2);
+ }
+ else
+ error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+ return error;
+}
+
+/*
+ Procedure of keys generation for result tables of materialized derived
+ tables/views.
+
+ A key is generated for each equi-join pair derived table-another table.
+ Each generated key consists of fields of derived table used in equi-join.
+ Example:
+
+ SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+ t1 ON tt.f1=t1.f3 and tt.f2.=t1.f4;
+ In this case for the derived table tt one key will be generated. It will
+ consist of two parts f1 and f2.
+ Example:
+
+ SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+ t1 ON tt.f1=t1.f3 JOIN
+ t2 ON tt.f2=t2.f4;
+ In this case for the derived table tt two keys will be generated.
+ One key over f1 field, and another key over f2 field.
+ Currently optimizer may choose to use only one such key, thus the second
+ one will be dropped after range optimizer is finished.
+ See also JOIN::drop_unused_derived_keys function.
+ Example:
+
+ SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN
+ t1 ON tt.f1=a_function(t1.f3);
+ In this case for the derived table tt one key will be generated. It will
+ consist of one field - f1.
+*/
+
+
+
+/*
+ @brief
+ Change references to underlying items of a merged derived table/view
+ for fields in derived table's result table.
+
+ @return FALSE ok
+ @return TRUE Out of memory
+*/
+bool TABLE_LIST::change_refs_to_fields()
+{
+ List_iterator<Item> li(used_items);
+ Item_direct_ref *ref;
+ Field_iterator_view field_it;
+ THD *thd= table->in_use;
+ DBUG_ASSERT(is_merged_derived());
+
+ if (!used_items.elements)
+ return FALSE;
+
+ materialized_items= (Item**)thd->calloc(sizeof(void*) * table->s->fields);
+
+ while ((ref= (Item_direct_ref*)li++))
+ {
+ uint idx;
+ Item *orig_item= *ref->ref;
+ field_it.set(this);
+ for (idx= 0; !field_it.end_of_fields(); field_it.next(), idx++)
+ {
+ if (field_it.item() == orig_item)
+ break;
+ }
+ DBUG_ASSERT(!field_it.end_of_fields());
+ if (!materialized_items[idx])
+ {
+ materialized_items[idx]= new Item_field(table->field[idx]);
+ if (!materialized_items[idx])
+ return TRUE;
+ }
+ ref->ref= materialized_items + idx;
+ }
+
+ return FALSE;
+}
+
+
/*****************************************************************************
** Instansiate templates
*****************************************************************************/
=== modified file 'sql/table.h'
--- a/sql/table.h 2010-03-20 12:01:47 +0000
+++ b/sql/table.h 2010-05-26 20:18:18 +0000
@@ -858,6 +858,7 @@ struct st_table {
my_bool insert_or_update; /* Can be used by the handler */
my_bool alias_name_used; /* true if table_name is alias */
my_bool get_fields_in_item_tree; /* Signal to fix_field */
+ my_bool created; /* For tmp tables. TRUE <=> tmp table was actually created.*/
/* If MERGE children attached to parent. See top comment in ha_myisammrg.cc */
my_bool children_attached;
@@ -870,6 +871,7 @@ struct st_table {
bool no_partitions_used; /* If true, all partitions have been pruned away */
#endif
+ uint max_keys; /* Size of allocated key_info array. */
bool fill_item_list(List<Item> *item_list) const;
void reset_item_list(List<Item> *item_list) const;
void clear_column_bitmaps(void);
@@ -913,6 +915,15 @@ struct st_table {
*/
inline bool needs_reopen_or_name_lock()
{ return s->version != refresh_version; }
+ bool alloc_keys(uint key_count);
+ bool add_tmp_key(uint key, uint key_parts,
+ uint (*next_field_no) (uchar *), uchar *arg);
+ void use_index(int key_to_save);
+ void set_table_map(table_map map_arg, uint tablenr_arg)
+ {
+ map= map_arg;
+ tablenr= tablenr_arg;
+ }
bool is_children_attached(void);
};
@@ -1045,13 +1056,52 @@ typedef struct st_schema_table
} ST_SCHEMA_TABLE;
+/*
+ Types of derived tables. The ending part is a bitmap of phases that are
+ applicable to a derived table of the type.
+ * /
+#define VIEW_ALGORITHM_UNDEFINED 0
+#define VIEW_ALGORITHM_MERGE 1 + DT_COMMON + DT_MERGE
+#define DERIVED_ALGORITHM_MERGE 2 + DT_COMMON + DT_MERGE
+#define VIEW_ALGORITHM_TMPTABLE 3 + DT_COMMON + DT_MATERIALIZE
+#define DERIVED_ALGORITHM_MATERIALIZE 4 + DT_COMMON + DT_MATERIALIZE
+*/
+#define DTYPE_ALGORITHM_UNDEFINED 0
+#define DTYPE_VIEW 1
+#define DTYPE_TABLE 2
+#define DTYPE_MERGE 4
+#define DTYPE_MATERIALIZE 8
+#define DTYPE_MULTITABLE 16
+#define DTYPE_MASK 19
+
+/*
+ Phases of derived tables/views handling, see sql_derived.cc
+ Values are used as parts of a bitmap attached to derived table types.
+*/
+#define DT_INIT 1
+#define DT_PREPARE 2
+#define DT_OPTIMIZE 4
+#define DT_MERGE 8
+#define DT_MERGE_FOR_INSERT 16
+#define DT_CREATE 32
+#define DT_FILL 64
+#define DT_REINIT 128
+#define DT_PHASES 8
+/* Phases that are applicable to all derived tables. */
+#define DT_COMMON (DT_INIT + DT_PREPARE + DT_REINIT + DT_OPTIMIZE)
+/* Phases that are applicable only to materialized derived tables. */
+#define DT_MATERIALIZE (DT_CREATE + DT_FILL)
+
+#define DT_PHASES_MERGE (DT_COMMON | DT_MERGE | DT_MERGE_FOR_INSERT)
+#define DT_PHASES_MATERIALIZE (DT_COMMON | DT_MATERIALIZE)
+
+#define VIEW_ALGORITHM_UNDEFINED 0
+#define VIEW_ALGORITHM_MERGE (DTYPE_VIEW | DTYPE_MERGE)
+#define VIEW_ALGORITHM_TMPTABLE (DTYPE_VIEW + DTYPE_MATERIALIZE )
+
#define JOIN_TYPE_LEFT 1
#define JOIN_TYPE_RIGHT 2
-#define VIEW_ALGORITHM_UNDEFINED 0
-#define VIEW_ALGORITHM_TMPTABLE 1
-#define VIEW_ALGORITHM_MERGE 2
-
#define VIEW_SUID_INVOKER 0
#define VIEW_SUID_DEFINER 1
#define VIEW_SUID_DEFAULT 2
@@ -1141,6 +1191,7 @@ class Item_in_subselect;
also (TABLE_LIST::field_translation != NULL)
- tmptable (TABLE_LIST::effective_algorithm == VIEW_ALGORITHM_TMPTABLE)
also (TABLE_LIST::field_translation == NULL)
+ 2.5) TODO: Add derived tables description here
3) nested table reference (TABLE_LIST::nested_join != NULL)
- table sequence - e.g. (t1, t2, t3)
TODO: how to distinguish from a JOIN?
@@ -1153,6 +1204,7 @@ class Item_in_subselect;
*/
class Index_hint;
+struct st_lex;
struct TABLE_LIST
{
TABLE_LIST() {} /* Remove gcc warning */
@@ -1246,6 +1298,8 @@ struct TABLE_LIST
filling procedure
*/
select_union *derived_result;
+ /* Stub used for materialized derived tables. */
+ table_map map; /* ID bit of table (1,2,4,8,16...) */
/*
Reference from aux_tables to local list entry of main select of
multi-delete statement:
@@ -1290,6 +1344,7 @@ struct TABLE_LIST
Field_translator *field_translation; /* array of VIEW fields */
/* pointer to element after last one in translation table above */
Field_translator *field_translation_end;
+ bool field_translation_updated;
/*
List (based on next_local) of underlying tables of this view. I.e. it
does not include the tables of subqueries used in the view. Is set only
@@ -1304,11 +1359,18 @@ struct TABLE_LIST
List<TABLE_LIST> *view_tables;
/* most upper view this table belongs to */
TABLE_LIST *belong_to_view;
+ /* A derived table this table belongs to */
+ TABLE_LIST *belong_to_derived;
/*
The view directly referencing this table
(non-zero only for merged underlying tables of a view).
*/
TABLE_LIST *referencing_view;
+
+ table_map view_used_tables;
+ table_map map_exec;
+ uint tablenr_exec;
+
/* Ptr to parent MERGE table list item. See top comment in ha_myisammrg.cc */
TABLE_LIST *parent_l;
/*
@@ -1321,13 +1383,7 @@ struct TABLE_LIST
SQL SECURITY DEFINER)
*/
Security_context *view_sctx;
- /*
- List of all base tables local to a subquery including all view
- tables. Unlike 'next_local', this in this list views are *not*
- leaves. Created in setup_tables() -> make_leaves_list().
- */
bool allowed_show;
- TABLE_LIST *next_leaf;
Item *where; /* VIEW WHERE clause condition */
Item *check_option; /* WITH CHECK OPTION condition */
LEX_STRING select_stmt; /* text of (CREATE/SELECT) statement */
@@ -1363,7 +1419,7 @@ struct TABLE_LIST
- VIEW_ALGORITHM_MERGE
@to do Replace with an enum
*/
- uint8 effective_algorithm;
+ uint8 derived_type;
GRANT_INFO grant;
/* data need by some engines in query cache*/
ulonglong engine_data;
@@ -1390,7 +1446,6 @@ struct TABLE_LIST
bool skip_temporary; /* this table shouldn't be temporary */
/* TRUE if this merged view contain auto_increment field */
bool contain_auto_increment;
- bool multitable_view; /* TRUE iff this is multitable view */
bool compact_view_format; /* Use compact format for SHOW CREATE VIEW */
/* view where processed */
bool where_processed;
@@ -1414,6 +1469,17 @@ struct TABLE_LIST
bool internal_tmp_table;
bool deleting; /* going to delete this table */
+ /* TRUE <=> derived table should be filled right after optimization. */
+ bool fill_me;
+ /* TRUE <=> view/DT is merged. */
+ bool merged;
+ bool merged_for_insert;
+ /* TRUE <=> don't prepare this derived table/view as it should be merged.*/
+ bool skip_prepare_derived;
+
+ List<Item> used_items;
+ Item **materialized_items;
+
/* View creation context. */
View_creation_ctx *view_creation_ctx;
@@ -1451,9 +1517,10 @@ struct TABLE_LIST
bool has_table_lookup_value;
uint table_open_method;
enum enum_schema_table_state schema_table_state;
+
void calc_md5(char *buffer);
- void set_underlying_merge();
int view_check_option(THD *thd, bool ignore_failure);
+ bool create_field_translation(THD *thd);
bool setup_underlying(THD *thd);
void cleanup_items();
bool placeholder()
@@ -1483,7 +1550,7 @@ struct TABLE_LIST
inline bool prepare_where(THD *thd, Item **conds,
bool no_where_clause)
{
- if (effective_algorithm == VIEW_ALGORITHM_MERGE)
+ if (!view || is_merged_derived())
return prep_where(thd, conds, no_where_clause);
return FALSE;
}
@@ -1549,6 +1616,60 @@ struct TABLE_LIST
m_table_ref_version= s->get_table_ref_version();
}
+ /* Set of functions returning/setting state of a derived table/view. */
+ inline bool is_non_derived()
+ {
+ return (!derived_type);
+ }
+ inline bool is_view_or_derived()
+ {
+ return (derived_type);
+ }
+ inline bool is_view()
+ {
+ return (derived_type & DTYPE_VIEW);
+ }
+ inline bool is_derived()
+ {
+ return (derived_type & DTYPE_TABLE);
+ }
+ inline void set_view()
+ {
+ derived_type= DTYPE_VIEW;
+ }
+ inline void set_derived()
+ {
+ derived_type= DTYPE_TABLE;
+ }
+ inline bool is_merged_derived()
+ {
+ return (derived_type & DTYPE_MERGE);
+ }
+ inline void set_merged_derived()
+ {
+ derived_type= ((derived_type & DTYPE_MASK) |
+ DTYPE_TABLE | DTYPE_MERGE);
+ }
+ inline bool is_materialized_derived()
+ {
+ return (derived_type & DTYPE_MATERIALIZE);
+ }
+ inline void set_materialized_derived()
+ {
+ derived_type= ((derived_type & DTYPE_MASK) |
+ DTYPE_TABLE | DTYPE_MATERIALIZE);
+ }
+ inline bool is_multitable()
+ {
+ return (derived_type & DTYPE_MULTITABLE);
+ }
+ inline void set_multitable()
+ {
+ derived_type|= DTYPE_MULTITABLE;
+ }
+ void reset_const_table();
+ bool handle_derived(struct st_lex *lex, uint phases);
+
/**
@brief True if this TABLE_LIST represents an anonymous derived table,
i.e. the result of a subquery.
@@ -1568,6 +1689,12 @@ struct TABLE_LIST
respectively.
*/
char *get_table_name() { return view != NULL ? view_name.str : table_name; }
+ st_select_lex_unit *get_unit();
+ st_select_lex *get_single_select();
+ void wrap_into_nested_join(List<TABLE_LIST> &join_list);
+ bool init_derived(THD *thd, bool init_view);
+ int fetch_number_of_rows();
+ bool change_refs_to_fields();
private:
bool prep_check_option(THD *thd, uint8 check_opt_type);
1
0

[Maria-developers] bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (igor:2792)
by Igor Babaev 26 May '10
by Igor Babaev 26 May '10
26 May '10
#At lp:maria/5.2 based on revid:igor@askmonty.org-20100518174632-e2xaeunykfmtyafm
2792 Igor Babaev 2010-05-26
MWL#106: creation of keys for materialized derived tables/views.
Also fixed several bugs in the backported code.
modified:
mysql-test/r/derived_view.result
sql/item_cmpfunc.cc
sql/item_subselect.cc
sql/sql_base.cc
sql/sql_delete.cc
sql/sql_insert.cc
sql/sql_join_cache.cc
sql/sql_lex.h
sql/sql_parse.cc
sql/sql_prepare.cc
sql/sql_select.cc
sql/sql_select.h
sql/table.cc
sql/table.h
=== modified file 'mysql-test/r/derived_view.result'
--- a/mysql-test/r/derived_view.result 2010-05-18 17:46:32 +0000
+++ b/mysql-test/r/derived_view.result 2010-05-26 20:04:58 +0000
@@ -556,7 +556,7 @@ test two keys
explain select * from t1 join (select * from t2 group by f2) tt on t1.f1=tt.f2 join t1 xx on tt.f22=xx.f1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 11
-1 PRIMARY <derived2> ALL key0 NULL NULL NULL 11 Using where; Using join buffer
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.f1 2 Using where
1 PRIMARY xx ALL NULL NULL NULL NULL 11 Using where; Using join buffer
2 DERIVED t2 ALL NULL NULL NULL NULL 11 Using temporary; Using filesort
select * from t1 join (select * from t2 group by f2) tt on t1.f1=tt.f2 join t1 xx on tt.f22=xx.f1;
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc 2010-05-18 17:46:32 +0000
+++ b/sql/item_cmpfunc.cc 2010-05-26 20:04:58 +0000
@@ -4232,10 +4232,6 @@ Item_cond::fix_fields(THD *thd, Item **r
(item= *li.ref())->check_cols(1))
return TRUE; /* purecov: inspected */
used_tables_cache|= item->used_tables();
-#if 0
- if (!item->const_item())
- const_item_cache= FALSE;
-#else
if (item->const_item())
and_tables_cache= (table_map) 0;
else
@@ -4245,7 +4241,6 @@ Item_cond::fix_fields(THD *thd, Item **r
and_tables_cache&= tmp_table_map;
const_item_cache= FALSE;
}
-#endif
with_sum_func= with_sum_func || item->with_sum_func;
with_subselect|= item->with_subselect;
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-04-29 21:10:39 +0000
+++ b/sql/item_subselect.cc 2010-05-26 20:04:58 +0000
@@ -2894,6 +2894,9 @@ int subselect_uniquesubquery_engine::exe
DBUG_RETURN(0);
}
+ if (!tab->preread_init_done && tab->preread_init())
+ DBUG_RETURN(1);
+
if (null_keypart)
DBUG_RETURN(scan_table());
@@ -3026,7 +3029,7 @@ subselect_uniquesubquery_engine::~subsel
int subselect_indexsubquery_engine::exec()
{
- DBUG_ENTER("subselect_indexsubquery_engine::exec");
+ DBUG_ENTER("subselect_indexsubquery_engine");
int error;
bool null_finding= 0;
TABLE *table= tab->table;
@@ -3057,6 +3060,9 @@ int subselect_indexsubquery_engine::exec
DBUG_RETURN(0);
}
+ if (!tab->preread_init_done && tab->preread_init())
+ DBUG_RETURN(1);
+
if (null_keypart)
DBUG_RETURN(scan_table());
=== modified file 'sql/sql_base.cc'
--- a/sql/sql_base.cc 2010-05-12 04:09:58 +0000
+++ b/sql/sql_base.cc 2010-05-26 20:04:58 +0000
@@ -6288,7 +6288,9 @@ find_field_in_tables(THD *thd, Item_iden
find_field_in_table even in the case of information schema tables
when table_ref->field_translation != NULL.
*/
- if (table_ref->table && !table_ref->is_merged_derived())
+ if (table_ref->table &&
+ (!table_ref->is_merged_derived() ||
+ (!table_ref->is_multitable() && table_ref->merged_for_insert)))
found= find_field_in_table(thd, table_ref->table, name, length,
TRUE, &(item->cached_field_index));
else
=== modified file 'sql/sql_delete.cc'
--- a/sql/sql_delete.cc 2010-05-12 04:09:58 +0000
+++ b/sql/sql_delete.cc 2010-05-26 20:04:58 +0000
@@ -559,6 +559,11 @@ int mysql_multi_delete_prepare(THD *thd)
TABLE_LIST *target_tbl;
DBUG_ENTER("mysql_multi_delete_prepare");
+ TABLE_LIST *tables= lex->query_tables;
+ if (mysql_handle_derived(lex, DT_INIT) ||
+ mysql_handle_list_of_derived(lex, tables, DT_MERGE_FOR_INSERT) ||
+ mysql_handle_list_of_derived(lex, tables, DT_PREPARE))
+ DBUG_RETURN(TRUE);
/*
setup_tables() need for VIEWs. JOIN::prepare() will not do it second
time.
=== modified file 'sql/sql_insert.cc'
--- a/sql/sql_insert.cc 2010-04-29 21:10:39 +0000
+++ b/sql/sql_insert.cc 2010-05-26 20:04:58 +0000
@@ -1184,8 +1184,8 @@ static bool mysql_prepare_insert_check_t
if (insert_into_view && !fields.elements)
{
thd->lex->empty_field_list_on_rset= 1;
- if (table_list->is_multitable() && !table_list->table ||
- !table_list->table->created)
+ if (!thd->lex->select_lex.leaf_tables.head()->table ||
+ table_list->is_multitable())
{
my_error(ER_VIEW_NO_INSERT_FIELD_LIST, MYF(0),
table_list->view_db.str, table_list->view_name.str);
@@ -1276,8 +1276,10 @@ bool mysql_prepare_insert(THD *thd, TABL
/* INSERT should have a SELECT or VALUES clause */
DBUG_ASSERT (!select_insert || !values);
+ if (mysql_handle_derived(thd->lex, DT_INIT))
+ DBUG_RETURN(TRUE);
if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT))
- DBUG_RETURN(TRUE);
+ DBUG_RETURN(TRUE);
if (mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
DBUG_RETURN(TRUE);
/*
=== modified file 'sql/sql_join_cache.cc'
--- a/sql/sql_join_cache.cc 2010-03-07 15:41:45 +0000
+++ b/sql/sql_join_cache.cc 2010-05-26 20:04:58 +0000
@@ -2370,6 +2370,8 @@ JOIN_CACHE_BKA::init_join_matching_recor
init_mrr_buff();
+ if (!join_tab->preread_init_done && join_tab->preread_init())
+ return NESTED_LOOP_ERROR;
/*
Prepare to iterate over keys from the join buffer and to get
matching candidates obtained with MMR handler functions.
=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h 2010-04-29 21:10:39 +0000
+++ b/sql/sql_lex.h 2010-05-26 20:04:58 +0000
@@ -1873,6 +1873,8 @@ typedef struct st_lex : public Query_tab
switch (sql_command) {
case SQLCOM_UPDATE:
case SQLCOM_UPDATE_MULTI:
+ case SQLCOM_DELETE:
+ case SQLCOM_DELETE_MULTI:
case SQLCOM_INSERT:
case SQLCOM_INSERT_SELECT:
case SQLCOM_REPLACE:
=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc 2010-04-29 21:10:39 +0000
+++ b/sql/sql_parse.cc 2010-05-26 20:04:58 +0000
@@ -3425,9 +3425,6 @@ end_with_restore_list:
thd_proc_info(thd, "init");
if ((res= open_and_lock_tables(thd, all_tables)))
break;
- if (mysql_handle_list_of_derived(lex, all_tables, DT_MERGE_FOR_INSERT) ||
- mysql_handle_list_of_derived(lex, all_tables, DT_PREPARE))
- DBUG_RETURN(1);
if ((res= mysql_multi_delete_prepare(thd)))
goto error;
=== modified file 'sql/sql_prepare.cc'
--- a/sql/sql_prepare.cc 2010-04-29 21:10:39 +0000
+++ b/sql/sql_prepare.cc 2010-05-26 20:04:58 +0000
@@ -1133,9 +1133,7 @@ static bool mysql_test_insert(Prepared_s
If we would use locks, then we have to ensure we are not using
TL_WRITE_DELAYED as having two such locks can cause table corruption.
*/
- if (open_normal_and_derived_tables(thd, table_list, 0,
- DT_INIT | DT_PREPARE | DT_CREATE) ||
- mysql_handle_single_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT))
+ if (open_normal_and_derived_tables(thd, table_list, 0, DT_INIT))
goto error;
if ((values= its++))
@@ -1236,9 +1234,16 @@ static int mysql_test_update(Prepared_st
thd->fill_derived_tables() is false here for sure (because it is
preparation of PS, so we even do not check it).
*/
- if (mysql_handle_derived(thd->lex, DT_PREPARE))
+ if (table_list->handle_derived(thd->lex, DT_MERGE_FOR_INSERT) ||
+ table_list->handle_derived(thd->lex, DT_PREPARE))
goto error;
+ if (!table_list->updatable)
+ {
+ my_error(ER_NON_UPDATABLE_TABLE, MYF(0), table_list->alias, "UPDATE");
+ goto error;
+ }
+
#ifndef NO_EMBEDDED_ACCESS_CHECKS
/* Force privilege re-checking for views after they have been opened. */
want_privilege= (table_list->view ? UPDATE_ACL :
@@ -1291,13 +1296,18 @@ error:
static bool mysql_test_delete(Prepared_statement *stmt,
TABLE_LIST *table_list)
{
+ uint table_count= 0;
THD *thd= stmt->thd;
LEX *lex= stmt->lex;
DBUG_ENTER("mysql_test_delete");
if (delete_precheck(thd, table_list) ||
- open_normal_and_derived_tables(thd, table_list, 0,
- DT_PREPARE | DT_CREATE))
+ open_tables(thd, &table_list, &table_count, 0))
+ goto error;
+
+ if (mysql_handle_derived(thd->lex, DT_INIT) ||
+ mysql_handle_list_of_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) ||
+ mysql_handle_list_of_derived(thd->lex, table_list, DT_PREPARE))
goto error;
if (!table_list->table)
@@ -1561,6 +1571,7 @@ select_like_stmt_test_with_open(Prepared
int (*specific_prepare)(THD *thd),
ulong setup_tables_done_option)
{
+ uint table_count= 0;
DBUG_ENTER("select_like_stmt_test_with_open");
/*
@@ -1569,8 +1580,8 @@ select_like_stmt_test_with_open(Prepared
prepared EXPLAIN yet so derived tables will clean up after
themself.
*/
- if (open_normal_and_derived_tables(stmt->thd, tables, 0,
- DT_PREPARE | DT_CREATE))
+ THD *thd= stmt->thd;
+ if (open_tables(thd, &tables, &table_count, 0))
DBUG_RETURN(TRUE);
DBUG_RETURN(select_like_stmt_test(stmt, specific_prepare,
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-05-18 17:46:32 +0000
+++ b/sql/sql_select.cc 2010-05-26 20:04:58 +0000
@@ -238,7 +238,7 @@ static void add_group_and_distinct_keys(
void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg,
double *record_count_arg);
static uint make_join_orderinfo(JOIN *join);
-static bool generate_derived_keys(List<TABLE_LIST> &tables);
+static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array);
static int
join_read_record_no_init(JOIN_TAB *tab);
@@ -744,18 +744,6 @@ JOIN::optimize()
tables= select_lex->leaf_tables.elements;
-#if 0
- if (thd->lex->describe)
- {
- /*
- Force join->join_tmp creation, because we will use this JOIN
- twice for EXPLAIN and we have to have unchanged join for EXPLAINing
- */
- select_lex->uncacheable|= UNCACHEABLE_EXPLAIN;
- select_lex->master_unit()->uncacheable|= UNCACHEABLE_EXPLAIN;
- }
-#else
-#endif
if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
DBUG_RETURN(-1);
@@ -3311,18 +3299,20 @@ add_key_field(KEY_FIELD **key_fields,uin
Field *field, bool eq_func, Item **value, uint num_values,
table_map usable_tables, SARGABLE_PARAM **sargables)
{
- uint exists_optimize= 0;
- if (field->table->pos_in_table_list->is_materialized_derived() &&
+ uint optimize= 0;
+ if (eq_func &&
+ field->table->pos_in_table_list->is_materialized_derived() &&
!field->table->created)
- field->table->pos_in_table_list->update_derived_keys(field, value,
- num_values);
- if (!(field->flags & PART_KEY_FLAG))
+ {
+ optimize= KEY_OPTIMIZE_EQ;
+ }
+ else if (!(field->flags & PART_KEY_FLAG))
{
// Don't remove column IS NULL on a LEFT JOIN table
if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
!field->table->maybe_null || field->null_ptr)
return; // Not a key. Skip it
- exists_optimize= KEY_OPTIMIZE_EXISTS;
+ optimize= KEY_OPTIMIZE_EXISTS;
DBUG_ASSERT(num_values == 1);
}
else
@@ -3342,7 +3332,7 @@ add_key_field(KEY_FIELD **key_fields,uin
if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
!field->table->maybe_null || field->null_ptr)
return; // Can't use left join optimize
- exists_optimize= KEY_OPTIMIZE_EXISTS;
+ optimize= KEY_OPTIMIZE_EXISTS;
}
else
{
@@ -3441,7 +3431,7 @@ add_key_field(KEY_FIELD **key_fields,uin
(*key_fields)->eq_func= eq_func;
(*key_fields)->val= *value;
(*key_fields)->level= and_level;
- (*key_fields)->optimize= exists_optimize;
+ (*key_fields)->optimize= optimize;
/*
If the condition has form "tbl.keypart = othertbl.field" and
othertbl.field can be NULL, there will be no matches if othertbl.field
@@ -3760,6 +3750,34 @@ max_part_bit(key_part_map bits)
return found;
}
+static bool
+add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
+ uint key, uint part)
+{
+ KEYUSE keyuse;
+ Field *field= key_field->field;
+
+ keyuse.table= field->table;
+ keyuse.val= key_field->val;
+ keyuse.key= key;
+ if (key != MAX_KEY)
+ {
+ keyuse.keypart=part;
+ keyuse.keypart_map= (key_part_map) 1 << part;
+ }
+ else
+ {
+ keyuse.keypart= field->field_index;
+ keyuse.keypart_map= (key_part_map) 0;
+ }
+ keyuse.used_tables= key_field->val->used_tables();
+ keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
+ keyuse.null_rejecting= key_field->null_rejecting;
+ keyuse.cond_guard= key_field->cond_guard;
+ keyuse.sj_pred_no= key_field->sj_pred_no;
+ return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
+}
+
/*
Add all keys with uses 'field' for some keypart
If field->and_level != and_level then only mark key_part as const_part
@@ -3774,10 +3792,13 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array
{
Field *field=key_field->field;
TABLE *form= field->table;
- KEYUSE keyuse;
if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
{
+ if (key_field->eq_func && (key_field->optimize & KEY_OPTIMIZE_EQ))
+ {
+ return add_keyuse(keyuse_array, key_field, MAX_KEY, 0);
+ }
for (uint key=0 ; key < form->s->keys ; key++)
{
if (!(form->keys_in_use_for_query.is_set(key)))
@@ -3790,17 +3811,7 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array
{
if (field->eq(form->key_info[key].key_part[part].field))
{
- keyuse.table= field->table;
- keyuse.val = key_field->val;
- keyuse.key = key;
- keyuse.keypart=part;
- keyuse.keypart_map= (key_part_map) 1 << part;
- keyuse.used_tables=key_field->val->used_tables();
- keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
- keyuse.null_rejecting= key_field->null_rejecting;
- keyuse.cond_guard= key_field->cond_guard;
- keyuse.sj_pred_no= key_field->sj_pred_no;
- if (insert_dynamic(keyuse_array,(uchar*) &keyuse))
+ if (add_keyuse(keyuse_array, key_field, key, part))
return TRUE;
}
}
@@ -3885,6 +3896,9 @@ sort_keyuse(KEYUSE *a,KEYUSE *b)
return (int) (a->table->tablenr - b->table->tablenr);
if (a->key != b->key)
return (int) (a->key - b->key);
+ if (a->key == MAX_KEY && b->key == MAX_KEY &&
+ a->used_tables != b->used_tables)
+ return (int) ((ulong) a->used_tables - (ulong) b->used_tables);
if (a->keypart != b->keypart)
return (int) (a->keypart - b->keypart);
// Place const values before other ones
@@ -4080,9 +4094,6 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
}
}
- /* Generate keys descriptions for derived tables */
- generate_derived_keys(select_lex->leaf_tables);
-
/* fill keyuse with found key parts */
for ( ; field != end ; field++)
{
@@ -4117,6 +4128,8 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
if (insert_dynamic(keyuse,(uchar*) &key_end))
return TRUE;
+ generate_derived_keys(keyuse);
+
use=save_pos=dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end;
found_eq_constant=0;
@@ -6979,33 +6992,93 @@ make_join_select(JOIN *join,SQL_SELECT *
}
-/**
- @brief
- Add keys to derived tables'/views' result tables in a list
-
- @param tables list of tables to generate keys for
-
- @details
- This function generates keys for all derived tables/views in the 'tables'
- list with help of the TABLE_LIST:generate_keys function.
+static
+uint get_next_field_for_derived_key(uchar *arg)
+{
+ KEYUSE *keyuse= *(KEYUSE **) arg;
+ if (!keyuse)
+ return (uint) (-1);
+ uint key= keyuse->key;
+ uint fldno= keyuse->keypart;
+ uint keypart= keyuse->keypart_map == (key_part_map) 1 ?
+ 0 : (keyuse-1)->keypart+1;
+ for ( ; keyuse->key == key && keyuse->keypart == fldno; keyuse++)
+ keyuse->keypart= keypart;
+ if (keyuse->key != key)
+ keyuse= 0;
+ return fldno;
+}
- @note currently this function can't fail because errors from the
- TABLE_LIST:generate_keys function is ignored as they aren't critical to the
- query execution.
- @return FALSE all keys were successfully added.
-*/
+static
+bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys)
+{
+ TABLE *table= keyuse->table;
+ if (table->alloc_keys(keys))
+ return TRUE;
+ uint keyno= 0;
+ KEYUSE *first_keyuse= keyuse;
+ uint prev_part= (uint) (-1);
+ uint parts= 0;
+ uint i= 0;
+ do
+ {
+ keyuse->key= keyno;
+ keyuse->keypart_map= (key_part_map) (1 << parts);
+ keyuse++;
+ if (++i == count || keyuse->used_tables != first_keyuse->used_tables)
+ {
+ if (table->add_tmp_key(keyno, ++parts,
+ get_next_field_for_derived_key,
+ (uchar *) &first_keyuse))
+ return TRUE;
+ first_keyuse= keyuse;
+ keyno++;
+ parts= 0;
+ }
+ else if (keyuse->keypart != prev_part)
+ {
+ parts++;
+ prev_part= keyuse->keypart;
+ }
+ } while (keyno < keys);
+ return FALSE;
+}
+
static
-bool generate_derived_keys(List<TABLE_LIST> &tables)
+bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array)
{
- TABLE_LIST *table;
- List_iterator<TABLE_LIST> ti(tables);
- while ((table= ti++))
+ KEYUSE *keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
+ uint elements= keyuse_array->elements;
+ TABLE *prev_table= 0;
+ for (uint i= 0; i < elements; i++, keyuse++)
{
- /* Process tables that aren't materialized yet. */
- if (table->is_materialized_derived() && !table->table->created)
- table->generate_keys();
+ KEYUSE *first_table_keyuse;
+ table_map last_used_tables;
+ uint count;
+ uint keys;
+ while (keyuse->key == MAX_KEY)
+ {
+ if (keyuse->table != prev_table)
+ {
+ prev_table= keyuse->table;
+ first_table_keyuse= keyuse;
+ last_used_tables= keyuse->used_tables;
+ count= 0;
+ keys= 0;
+ }
+ else if (keyuse->used_tables != last_used_tables)
+ {
+ keys++;
+ last_used_tables= keyuse->used_tables;
+ }
+ count++;
+ keyuse++;
+ if (keyuse->table != prev_table &&
+ generate_derived_keys_for_table(first_table_keyuse, count, ++keys))
+ return TRUE;
+ }
}
return FALSE;
}
=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h 2010-05-18 17:46:32 +0000
+++ b/sql/sql_select.h 2010-05-26 20:04:58 +0000
@@ -37,6 +37,7 @@
/* Values in optimize */
#define KEY_OPTIMIZE_EXISTS 1
#define KEY_OPTIMIZE_REF_OR_NULL 2
+#define KEY_OPTIMIZE_EQ 4
typedef struct keyuse_t {
TABLE *table;
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-05-12 04:09:58 +0000
+++ b/sql/table.cc 2010-05-26 20:04:58 +0000
@@ -5143,12 +5143,11 @@ void st_table::mark_virtual_columns_for_
bool TABLE::alloc_keys(uint key_count)
{
DBUG_ASSERT(!s->keys);
- key_info= s->key_info= (KEY*) my_malloc(sizeof(KEY)*key_count, MYF(0));
+ key_info= s->key_info= (KEY*) alloc_root(&mem_root, sizeof(KEY)*key_count);
max_keys= key_count;
return !(key_info);
}
-
/**
@brief Adds one key to a temporary table.
@@ -5164,40 +5163,41 @@ bool TABLE::alloc_keys(uint key_count)
@return >=0 number of newly added key.
*/
-int TABLE::add_tmp_key(ulonglong key_parts, char *key_name)
+bool TABLE::add_tmp_key(uint key, uint key_parts,
+ uint (*next_field_no) (uchar *), uchar *arg)
{
- DBUG_ASSERT(!created && s->keys< max_keys);
+ DBUG_ASSERT(!created && key < max_keys);
+ char buf[NAME_CHAR_LEN];
KEY* keyinfo;
Field **reg_field;
uint i;
bool key_start= TRUE;
- uint key_part_count= my_count_bits(key_parts);
KEY_PART_INFO* key_part_info=
- (KEY_PART_INFO*) my_malloc(sizeof(KEY_PART_INFO)* key_part_count, MYF(0));
+ (KEY_PART_INFO*) alloc_root(&mem_root, sizeof(KEY_PART_INFO)*key_parts);
if (!key_part_info)
- return -1;
- keyinfo= key_info + s->keys;
- keyinfo->key_part=key_part_info;
- keyinfo->usable_key_parts=keyinfo->key_parts= key_part_count;
+ return TRUE;
+ keyinfo= key_info + key;
+ keyinfo->key_part= key_part_info;
+ keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
keyinfo->key_length=0;
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
- keyinfo->name= key_name;
keyinfo->flags= HA_GENERATED_KEY;
- keyinfo->rec_per_key= (ulong*)my_malloc(sizeof(ulong)*key_part_count, MYF(0));
+ sprintf(buf, "key%i", key);
+ if (!(keyinfo->name= strdup_root(&mem_root, buf)))
+ return TRUE;
+ keyinfo->rec_per_key= (ulong*) alloc_root(&mem_root,
+ sizeof(ulong)*key_parts);
if (!keyinfo->rec_per_key)
- return -1;
- bzero(keyinfo->rec_per_key, sizeof(ulong)*key_part_count);
- for (i= 0, reg_field=field ;
- *reg_field;
- i++, reg_field++)
+ return TRUE;
+ bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts);
+ for (i= 0; i < key_parts; i++)
{
- if (!(key_parts & (1 << i)))
- continue;
+ reg_field= field + next_field_no(arg);
if (key_start)
- (*reg_field)->key_start.set_bit(s->keys);
+ (*reg_field)->key_start.set_bit(key);
key_start= FALSE;
- (*reg_field)->part_of_key.set_bit(s->keys);
+ (*reg_field)->part_of_key.set_bit(key);
(*reg_field)->flags|= PART_KEY_FLAG;
key_part_info->null_bit= (*reg_field)->null_bit;
key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
@@ -5231,10 +5231,10 @@ int TABLE::add_tmp_key(ulonglong key_par
key_part_info++;
}
set_if_bigger(s->max_key_length, keyinfo->key_length);
- return ++s->keys - 1;
+ s->keys++;
+ return FALSE;
}
-
/*
@brief
Drop all indexes except specified one.
@@ -5310,6 +5310,7 @@ void TABLE_LIST::reinit_before_use(THD *
parent_embedding->nested_join->join_list.head() == embedded);
}
+
/*
Return subselect that contains the FROM list this table is taken from
@@ -5819,143 +5820,8 @@ int TABLE_LIST::fetch_number_of_rows()
t1 ON tt.f1=a_function(t1.f3);
In this case for the derived table tt one key will be generated. It will
consist of one field - f1.
-
- Implementation is split in two steps:
- gathering information on all used fields of derived tables/view and
- store it in lists of possible keys, one per a derived table/view.
- add keys to result tables of derived tables/view using info from above
- lists.
-
- The above procedure is implemented in 4 functions:
- TABLE_LIST::update_derived_keys
- Create/extend list of possible keys for one derived
- table/view based on given field/used tables info.
- (Step one)
- generate_derived_keys This function is called at the moment when all
- possible info on keys is gathered and it's safe to
- add keys. Walk over list of derived tables/views and
- calls to TABLE_LIST::generate_keys to actually
- generate keys. (Step two)
- TABLE_LIST::generate_keys
- Walks over list of possible keys for this derived
- table/view to add keys to the result table.
- Calls to TABLE::add_tmp_index to actually add
- keys. (Step two)
- TABLE::add_tmp_index Creates one index description according to given
- bitmap of used fields. (Step two)
- There is also the fifth function called TABLE::use_index. It saves used
- key and frees others. It is called when the optimizer has chosen which key
- it will use, thus we don't need other keys anymore.
-*/
-
-
-/*
- @brief
- Update derived table's list of possible keys
-
- @param field derived table's field to take part in a key
- @param values array of values
- @param num_values number of elements in the array values
-
- @details
- This function creates/extends a list of possible keys for this derived
- table/view. For each table used by a value from the 'values' array the
- corresponding possible key is extended to include the 'field'.
- If there is no such possible key then it is created. field's
- key_start/part_of_key bitmaps are updated accordingly.
-
- @return TRUE new possible key can't be allocated.
- @return FALSE list of possible keys successfully updated.
-*/
-
-bool TABLE_LIST::update_derived_keys(Field *field, Item **values,
- uint num_values)
-{
- DERIVED_KEY_MAP *entry= 0;
- List_iterator<DERIVED_KEY_MAP> ki(derived_keymap_list);
- uint i;
-
- /* Allow all keys to be used. */
- if (!derived_keymap_list.elements)
- {
- table->keys_in_use_for_query.set_all();
- table->s->uniques= 0;
- derived_keymap_list.empty();
- }
-
- for (i= 0; i < num_values; i++)
- {
- uint tbl;
- table_map tables= values[i]->used_tables() & ~OUTER_REF_TABLE_BIT;
- for (tbl= 1; tables >= tbl; tbl<<= 1)
- {
- uint key= 0;
- if (! (tables & tbl))
- continue;
- ki.rewind();
- while ((entry= ki++))
- {
- key++;
- if (entry->referenced_by & tbl)
- break;
- }
- if (!entry)
- {
- key++;
- entry= (DERIVED_KEY_MAP*)my_malloc(sizeof(DERIVED_KEY_MAP), MYF(0));
- if (!entry)
- return TRUE;
- entry->referenced_by|= tbl;
- entry->used_fields.clear_all();
- derived_keymap_list.push_back(entry);
- field->key_start.set_bit(key);
- table->max_keys++;
- }
- field->part_of_key.set_bit(key - 1);
- field->flags|= PART_KEY_FLAG;
- entry->used_fields.set_bit(field->field_index);
- }
- }
- return FALSE;
-}
-
-
-/**
- @brief
- Generate keys for a materialized derived table/view
-
- @details
- This function adds keys to the result table by walking over the list of
- possible keys for this derived table/view and calling to the
- TABLE::add_tmp_index to actually add keys. A name "key" with a sequential
- number is given to each key to ease debugging.
-
- @return TRUE an error occur.
- @return FALSE all keys were successfully added.
*/
-bool TABLE_LIST::generate_keys()
-{
- List_iterator<DERIVED_KEY_MAP> it(derived_keymap_list);
- DERIVED_KEY_MAP *entry;
- uint key= 0;
- char buf[NAME_CHAR_LEN];
- DBUG_ASSERT(is_materialized_derived());
-
- if (!derived_keymap_list.elements)
- return FALSE;
-
- table->alloc_keys(table->max_keys);
- while ((entry= it++))
- {
- table->s->key_parts+= entry->used_fields.bits_set();
- sprintf(buf, "key%i", key++);
- if (table->add_tmp_key(entry->used_fields.to_ulonglong(),
- table->in_use->strdup(buf)) < 0)
- return TRUE;
- }
- return FALSE;
-}
/*
=== modified file 'sql/table.h'
--- a/sql/table.h 2010-04-29 21:10:39 +0000
+++ b/sql/table.h 2010-05-26 20:04:58 +0000
@@ -916,7 +916,8 @@ struct st_table {
inline bool needs_reopen_or_name_lock()
{ return s->version != refresh_version; }
bool alloc_keys(uint key_count);
- int add_tmp_key(ulonglong key_parts, char *key_name);
+ bool add_tmp_key(uint key, uint key_parts,
+ uint (*next_field_no) (uchar *), uchar *arg);
void use_index(int key_to_save);
void set_table_map(table_map map_arg, uint tablenr_arg)
{
@@ -1202,20 +1203,6 @@ class Item_in_subselect;
(TABLE_LIST::join_using_fields != NULL)
*/
-/*
- This structure is used to keep info about possible key for the result table
- of a derived table/view.
- The 'referenced_by' is the table map of the table to which this possible
- key corresponds.
- The 'used_field' is a map of fields of which this key consists of.
- See also the comment for the TABLE_LIST::update_derived_keys function.
-*/
-struct st_derived_table_key_map {
- table_map referenced_by;
- key_map used_fields;
-};
-typedef st_derived_table_key_map DERIVED_KEY_MAP;
-
class Index_hint;
struct st_lex;
struct TABLE_LIST
@@ -1531,8 +1518,6 @@ struct TABLE_LIST
uint table_open_method;
enum enum_schema_table_state schema_table_state;
- List<DERIVED_KEY_MAP> derived_keymap_list;
-
void calc_md5(char *buffer);
int view_check_option(THD *thd, bool ignore_failure);
bool create_field_translation(THD *thd);
@@ -1709,8 +1694,6 @@ struct TABLE_LIST
void wrap_into_nested_join(List<TABLE_LIST> &join_list);
bool init_derived(THD *thd, bool init_view);
int fetch_number_of_rows();
- bool update_derived_keys(Field *field, Item **values, uint num_values);
- bool generate_keys();
bool change_refs_to_fields();
private:
1
0
Status: Development => Mature
--
lp:maria/5.1
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria/5.1.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0
Hi Adam,
We're upgrading the bzr repositories on launchpad to a new repository format
2a.
Because of this, you need to do some changes on your buildbot hosts.
These are the slaves mariadb-brs and adutko-ultrasparc3.
(The slave adutko-centos5-amd64 does not need any changes, as it is not using
bzr for the build).
You have to do two things:
1. make sure you are using at least bzr 1.16 (or better 2.x). If the bzr on
either slave is older, then you need to upgrade it.
2. Upgrade the shared repository on the slave to format 2a. These are the
following directories on the slaves:
/export/home/buildbot/maria-slave/adutko-ultrasparc3/.bzr
/var/lib/buildbot/maria-slave/.bzr
You need to move away/delete those .bzr directories and replace them with a
new format 2a repository .bzr directory.
You can obtain the new .bzr directory in two ways:
- Download http://hasky.askmonty.org/download/mariadb-shared-repo.tgz , there
is a suitable .bzr included (300Mb).
- Delete the old .bzr and run
bzr init-repo --format=2a /export/home/buildbot/maria-slave/adutko-ultrasparc3
respectively
bzr init-repo --format=2a /var/lib/buildbot/maria-slave
In the latter case, buildbot will need to download (once) all revisions
from launchpad again, so the first way is best on slow connections (around
200Mb of download).
Afterwards you can verify that things work by running
/export/home/buildbot/maria-slave/adutko-ultrasparc3
/var/lib/buildbot/maria-slave
It should say something like
Shared repository with trees (format: 2a)
Just let me know if you need any help, and sorry for the inconvenience,
- Kristian.
2
2

[Maria-developers] [Branch ~maria-captains/maria/5.1] Rev 2859: Fix short version number
by noreply@launchpad.net 26 May '10
by noreply@launchpad.net 26 May '10
26 May '10
------------------------------------------------------------
revno: 2859
committer: Bo Thorsen <bo(a)askmonty.org>
branch nick: trunk-work
timestamp: Wed 2010-05-26 10:40:01 +0200
message:
Fix short version number
modified:
win/make_mariadb_win_dist
--
lp:maria/5.1
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria/5.1.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0

[Maria-developers] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (knielsen:2849)
by knielsen@knielsen-hq.org 26 May '10
by knielsen@knielsen-hq.org 26 May '10
26 May '10
#At lp:maria
2849 knielsen(a)knielsen-hq.org 2010-05-26
Preliminary commit of group commit patch.
added:
mysql-test/suite/binlog/r/binlog_ioerr.result
mysql-test/suite/binlog/t/binlog_ioerr.test
modified:
sql/handler.cc
sql/handler.h
sql/log.cc
sql/log.h
sql/log_event.h
sql/sql_class.cc
sql/sql_class.h
sql/sql_load.cc
sql/table.cc
sql/table.h
storage/xtradb/handler/ha_innodb.cc
=== added file 'mysql-test/suite/binlog/r/binlog_ioerr.result'
--- a/mysql-test/suite/binlog/r/binlog_ioerr.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/binlog/r/binlog_ioerr.result 2010-05-26 08:16:18 +0000
@@ -0,0 +1,28 @@
+CALL mtr.add_suppression("Error writing file 'master-bin'");
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES(0);
+SET SESSION debug='+d,fail_binlog_write_1';
+INSERT INTO t1 VALUES(1);
+ERROR HY000: Error writing file 'master-bin' (errno: 22)
+INSERT INTO t1 VALUES(2);
+ERROR HY000: Error writing file 'master-bin' (errno: 22)
+SET SESSION debug='';
+INSERT INTO t1 VALUES(3);
+SELECT * FROM t1;
+a
+0
+3
+SHOW BINLOG EVENTS;
+Log_name Pos Event_type Server_id End_log_pos Info
+BINLOG POS Format_desc 1 ENDPOS Server ver: #, Binlog ver: #
+BINLOG POS Query 1 ENDPOS use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb
+BINLOG POS Query 1 ENDPOS BEGIN
+BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(0)
+BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
+BINLOG POS Query 1 ENDPOS BEGIN
+BINLOG POS Query 1 ENDPOS BEGIN
+BINLOG POS Query 1 ENDPOS BEGIN
+BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(3)
+BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
+DROP TABLE t1;
=== added file 'mysql-test/suite/binlog/t/binlog_ioerr.test'
--- a/mysql-test/suite/binlog/t/binlog_ioerr.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/binlog/t/binlog_ioerr.test 2010-05-26 08:16:18 +0000
@@ -0,0 +1,29 @@
+source include/have_debug.inc;
+source include/have_innodb.inc;
+source include/have_log_bin.inc;
+source include/have_binlog_format_mixed_or_statement.inc;
+
+CALL mtr.add_suppression("Error writing file 'master-bin'");
+
+RESET MASTER;
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES(0);
+SET SESSION debug='+d,fail_binlog_write_1';
+--error ER_ERROR_ON_WRITE
+INSERT INTO t1 VALUES(1);
+--error ER_ERROR_ON_WRITE
+INSERT INTO t1 VALUES(2);
+SET SESSION debug='';
+INSERT INTO t1 VALUES(3);
+SELECT * FROM t1;
+
+# Actually the output from this currently shows a bug.
+# The injected IO error leaves partially written transactions in the binlog in
+# the form of stray "BEGIN" events.
+# These should disappear from the output if binlog error handling is improved.
+--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
+--replace_column 1 BINLOG 2 POS 5 ENDPOS
+SHOW BINLOG EVENTS;
+
+DROP TABLE t1;
=== modified file 'sql/handler.cc'
--- a/sql/handler.cc 2010-04-06 22:47:08 +0000
+++ b/sql/handler.cc 2010-05-26 08:16:18 +0000
@@ -76,6 +76,7 @@ TYPELIB tx_isolation_typelib= {array_ele
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
uint known_extensions_id= 0;
+static int commit_one_phase_2(THD *thd, bool all, bool do_commit_ordered);
static plugin_ref ha_default_plugin(THD *thd)
@@ -544,6 +545,26 @@ err:
DBUG_RETURN(1);
}
+/*
+ This is a queue of THDs waiting for being group committed with
+ tc_log->group_log_xid().
+*/
+static THD *group_commit_queue;
+/*
+ This mutex protects the group_commit_queue on platforms without native
+ atomic operations.
+ */
+static pthread_mutex_t LOCK_group_commit_queue;
+/* This mutex is used to serialize calls to handler prepare_ordered methods. */
+static pthread_mutex_t LOCK_prepare_ordered;
+/* This mutex is used to serialize calls to handler commit_ordered methods. */
+static pthread_mutex_t LOCK_commit_ordered;
+/* This mutex is used to serialize calls to group_log_xid(). */
+static pthread_mutex_t LOCK_group_commit;
+static pthread_cond_t COND_group_commit;
+
+static bool mutexes_inited= FALSE;
+
int ha_init()
{
int error= 0;
@@ -557,6 +578,19 @@ int ha_init()
*/
opt_using_transactions= total_ha>(ulong)opt_bin_log;
savepoint_alloc_size+= sizeof(SAVEPOINT);
+
+ group_commit_queue= NULL;
+ my_pthread_mutex_init(&LOCK_group_commit_queue, MY_MUTEX_INIT_FAST,
+ "LOCK_group_commit_queue", MYF(0));
+ my_pthread_mutex_init(&LOCK_prepare_ordered, MY_MUTEX_INIT_SLOW,
+ "LOCK_prepare_ordered", MYF(0));
+ my_pthread_mutex_init(&LOCK_commit_ordered, MY_MUTEX_INIT_SLOW,
+ "LOCK_commit_ordered", MYF(0));
+ my_pthread_mutex_init(&LOCK_group_commit, MY_MUTEX_INIT_SLOW,
+ "LOCK_group_commit", MYF(0));
+ pthread_cond_init(&COND_group_commit, 0);
+ mutexes_inited= TRUE;
+
DBUG_RETURN(error);
}
@@ -574,6 +608,15 @@ int ha_end()
if (ha_finish_errors())
error= 1;
+ if (mutexes_inited)
+ {
+ pthread_mutex_destroy(&LOCK_group_commit_queue);
+ pthread_mutex_destroy(&LOCK_prepare_ordered);
+ pthread_mutex_destroy(&LOCK_commit_ordered);
+ pthread_mutex_destroy(&LOCK_group_commit);
+ mutexes_inited= FALSE;
+ }
+
DBUG_RETURN(error);
}
@@ -1053,6 +1096,108 @@ ha_check_and_coalesce_trx_read_only(THD
return rw_ha_count;
}
+/*
+ Atomically enqueue a THD at the head of the queue of threads waiting to
+ group commit, and return the previous head of the queue.
+*/
+static THD *
+enqueue_atomic(THD *thd)
+{
+ my_atomic_rwlock_wrlock(&LOCK_group_commit_queue);
+ thd->next_commit_ordered= group_commit_queue;
+ while (!my_atomic_casptr((void **)(&group_commit_queue),
+ (void **)(&thd->next_commit_ordered),
+ thd))
+ ;
+ my_atomic_rwlock_wrunlock(&LOCK_group_commit_queue);
+ return thd->next_commit_ordered;
+}
+
+static THD *
+atomic_grab_reverse_queue()
+{
+ my_atomic_rwlock_wrlock(&LOCK_group_commit_queue);
+ THD *queue= group_commit_queue;
+ while (!my_atomic_casptr((void **)(&group_commit_queue),
+ (void **)(&queue),
+ NULL))
+ ;
+ my_atomic_rwlock_wrunlock(&LOCK_group_commit_queue);
+
+ /*
+ Since we enqueue at the head, the queue is actually in reverse order.
+ So reverse it back into correct commit order before returning.
+ */
+ THD *prev= NULL;
+ while (queue)
+ {
+ THD *next= queue->next_commit_ordered;
+ queue->next_commit_ordered= prev;
+ prev= queue;
+ queue= next;
+ }
+
+ return prev;
+}
+
+static void
+call_commit_ordered(Ha_trx_info *ha_info, THD *thd, bool all)
+{
+ for (; ha_info; ha_info= ha_info->next())
+ {
+ handlerton *ht= ha_info->ht();
+ if (!ht->commit_ordered)
+ continue;
+ ht->commit_ordered(ht, thd, all);
+ }
+}
+
+static void
+group_commit_wait_for_wakeup(THD *thd)
+{
+ pthread_mutex_lock(&thd->LOCK_commit_ordered);
+ while (!thd->group_commit_ready)
+ pthread_cond_wait(&thd->COND_commit_ordered,
+ &thd->LOCK_commit_ordered);
+ pthread_mutex_unlock(&thd->LOCK_commit_ordered);
+}
+
+static void
+group_commit_wakeup_other(THD *other_thd)
+{
+ pthread_mutex_lock(&other_thd->LOCK_commit_ordered);
+ other_thd->group_commit_ready= TRUE;
+ pthread_cond_signal(&other_thd->COND_commit_ordered);
+ pthread_mutex_unlock(&other_thd->LOCK_commit_ordered);
+}
+
+static bool group_commit_queue_busy= 0;
+
+static void
+group_commit_mark_queue_idle()
+{
+ pthread_mutex_lock(&LOCK_group_commit);
+ group_commit_queue_busy= FALSE;
+ pthread_cond_signal(&COND_group_commit);
+ pthread_mutex_unlock(&LOCK_group_commit);
+}
+
+static void
+group_commit_mark_queue_busy()
+{
+ safe_mutex_assert_owner(&LOCK_group_commit);
+ group_commit_queue_busy= TRUE;
+}
+
+static void
+group_commit_wait_queue_idle()
+{
+ /* Wait for any existing queue run to finish. */
+ safe_mutex_assert_owner(&LOCK_group_commit);
+ while (group_commit_queue_busy)
+ pthread_cond_wait(&COND_group_commit, &LOCK_group_commit);
+}
+
/**
@retval
@@ -1070,7 +1215,7 @@ ha_check_and_coalesce_trx_read_only(THD
*/
int ha_commit_trans(THD *thd, bool all)
{
- int error= 0, cookie= 0;
+ int error= 0;
/*
'all' means that this is either an explicit commit issued by
user, or an implicit commit issued by a DDL.
@@ -1085,7 +1230,10 @@ int ha_commit_trans(THD *thd, bool all)
*/
bool is_real_trans= all || thd->transaction.all.ha_list == 0;
Ha_trx_info *ha_info= trans->ha_list;
- my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
+ bool need_prepare_ordered, need_commit_ordered;
+ bool need_enqueue;
+ bool is_group_commit_leader;
+ my_xid xid;
DBUG_ENTER("ha_commit_trans");
/*
@@ -1118,85 +1266,277 @@ int ha_commit_trans(THD *thd, bool all)
DBUG_RETURN(2);
}
#ifdef USING_TRANSACTIONS
- if (ha_info)
+ if (!ha_info)
{
- uint rw_ha_count;
- bool rw_trans;
+ /* Free resources and perform other cleanup even for 'empty' transactions. */
+ if (is_real_trans)
+ thd->transaction.cleanup();
+ DBUG_RETURN(0);
+ }
- DBUG_EXECUTE_IF("crash_commit_before", abort(););
+ DBUG_EXECUTE_IF("crash_commit_before", abort(););
- /* Close all cursors that can not survive COMMIT */
- if (is_real_trans) /* not a statement commit */
- thd->stmt_map.close_transient_cursors();
+ /* Close all cursors that can not survive COMMIT */
+ if (is_real_trans) /* not a statement commit */
+ thd->stmt_map.close_transient_cursors();
- rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
- /* rw_trans is TRUE when we in a transaction changing data */
- rw_trans= is_real_trans && (rw_ha_count > 0);
+ uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
+ /* rw_trans is TRUE when we in a transaction changing data */
+ bool rw_trans= is_real_trans && (rw_ha_count > 0);
- if (rw_trans &&
- wait_if_global_read_lock(thd, 0, 0))
- {
- ha_rollback_trans(thd, all);
- DBUG_RETURN(1);
- }
+ if (rw_trans &&
+ wait_if_global_read_lock(thd, 0, 0))
+ {
+ ha_rollback_trans(thd, all);
+ DBUG_RETURN(1);
+ }
+
+ if (rw_trans &&
+ opt_readonly &&
+ !(thd->security_ctx->master_access & SUPER_ACL) &&
+ !thd->slave_thread)
+ {
+ my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
+ goto err;
+ }
- if (rw_trans &&
- opt_readonly &&
- !(thd->security_ctx->master_access & SUPER_ACL) &&
- !thd->slave_thread)
+ if (trans->no_2pc || (rw_ha_count <= 1))
+ {
+ error= ha_commit_one_phase(thd, all);
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+ }
+
+ need_prepare_ordered= FALSE;
+ need_commit_ordered= FALSE;
+ xid= thd->transaction.xid_state.xid.get_my_xid();
+
+ for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
+ {
+ int err;
+ handlerton *ht= hi->ht();
+ /*
+ Do not call two-phase commit if this particular
+ transaction is read-only. This allows for simpler
+ implementation in engines that are always read-only.
+ */
+ if (! hi->is_trx_read_write())
+ continue;
+ /*
+ Sic: we know that prepare() is not NULL since otherwise
+ trans->no_2pc would have been set.
+ */
+ if ((err= ht->prepare(ht, thd, all)))
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+ status_var_increment(thd->status_var.ha_prepare_count);
+
+ if (err)
+ goto err;
+
+ if (ht->prepare_ordered)
+ need_prepare_ordered= TRUE;
+ if (ht->commit_ordered)
+ need_commit_ordered= TRUE;
+ }
+ DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT(););
+
+ if (!is_real_trans)
+ {
+ error= commit_one_phase_2(thd, all, FALSE);
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+ }
+
+ /*
+ We can optimise away some of the thread synchronisation that may not be
+ needed.
+
+ If need_prepare_ordered, then we need to take LOCK_prepare_ordered.
+
+ If (xid && use_group_log_xid), then we need to enqueue (and this must
+ be done under LOCK_prepare_ordered if we take that lock).
+
+ Similarly, if (need_prepare_ordered && need_commit_ordered), then we
+ need to enqueue under the LOCK_prepare_ordered.
+
+ If (xid && use_group_log_xid), then we need to take LOCK_group_commit.
+
+ If need_commit_ordered, then we need to take LOCK_commit_ordered.
+
+ Cases not covered by above can be skipped to optimise things a bit.
+ */
+ need_enqueue= (xid && tc_log->use_group_log_xid) ||
+ (need_prepare_ordered && need_commit_ordered);
+
+ thd->group_commit_ready= FALSE;
+ thd->group_commit_all= all;
+ if (need_prepare_ordered)
+ {
+ pthread_mutex_lock(&LOCK_prepare_ordered);
+
+ for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
{
- my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
- ha_rollback_trans(thd, all);
- error= 1;
- goto end;
+ int err;
+ handlerton *ht= hi->ht();
+ if (! hi->is_trx_read_write())
+ continue;
+ if (ht->prepare_ordered && (err= ht->prepare_ordered(ht, thd, all)))
+ {
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
+ goto err;
+ }
}
+ }
+ if (need_enqueue)
+ {
+ THD *previous_queue= enqueue_atomic(thd);
+ is_group_commit_leader= (previous_queue == NULL);
+ }
+ if (need_prepare_ordered)
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
- if (!trans->no_2pc && (rw_ha_count > 1))
+ int cookie;
+ if (tc_log->use_group_log_xid)
+ {
+ if (is_group_commit_leader)
{
- for (; ha_info && !error; ha_info= ha_info->next())
+ pthread_mutex_lock(&LOCK_group_commit);
+ group_commit_wait_queue_idle();
+
+ THD *queue= atomic_grab_reverse_queue();
+ /* The first in the queue is the leader. */
+ DBUG_ASSERT(queue == thd);
+
+ /*
+ This will set individual error codes in each thd->xid_error, as
+ well as set thd->xid_cookie for later unlog() call.
+ */
+ tc_log->group_log_xid(queue);
+
+ /*
+ Call commit_ordered methods for all transactions in the queue
+ (that did not get an error in group_log_xid()).
+
+ We do this under an additional global LOCK_commit_ordered; this is
+ so that transactions that do not need 2-phase commit do not have
+ to wait for the potentially long duration of LOCK_group_commit.
+ */
+ if (need_commit_ordered)
{
- int err;
- handlerton *ht= ha_info->ht();
- /*
- Do not call two-phase commit if this particular
- transaction is read-only. This allows for simpler
- implementation in engines that are always read-only.
- */
- if (! ha_info->is_trx_read_write())
- continue;
- /*
- Sic: we know that prepare() is not NULL since otherwise
- trans->no_2pc would have been set.
- */
- if ((err= ht->prepare(ht, thd, all)))
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ for (THD *thd2= queue; thd2 != NULL; thd2= thd2->next_commit_ordered)
{
- my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
- error= 1;
+ if (!queue->xid_error)
+ call_commit_ordered(ha_info, thd2, thd2->group_commit_all);
}
- status_var_increment(thd->status_var.ha_prepare_count);
+ pthread_mutex_unlock(&LOCK_commit_ordered);
}
- DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT(););
- if (error || (is_real_trans && xid &&
- (error= !(cookie= tc_log->log_xid(thd, xid)))))
- {
- ha_rollback_trans(thd, all);
- error= 1;
- goto end;
- }
- DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT(););
+ pthread_mutex_unlock(&LOCK_group_commit);
+
+ /* Wake up everyone except ourself. */
+ while ((queue= queue->next_commit_ordered) != NULL)
+ group_commit_wakeup_other(queue);
+ }
+ else
+ {
+ /* If not leader, just wait until leader wakes us up. */
+ group_commit_wait_for_wakeup(thd);
+ }
+
+ /*
+ Now that we're back in our own thread context, do any delayed error
+ reporting.
+ */
+ if (thd->xid_error)
+ {
+ tc_log->xid_delayed_error(thd);
+ goto err;
+ }
+ cookie= thd->xid_cookie;
+ /* The cookie must be non-zero in the non-error case. */
+ DBUG_ASSERT(cookie);
+ }
+ else
+ {
+ if (xid)
+ cookie= tc_log->log_xid(thd, xid);
+
+ if (!need_enqueue)
+ {
+ error= commit_one_phase_2(thd, all, TRUE);
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+ }
+
+ /*
+ We only get here to do correctly sequenced prepare_ordered and
+ commit_ordered() calls.
+
+ In this case, we need to wait for the previous in queue to finish
+ commit_ordered before us to get the correct sequence.
+ */
+ DBUG_ASSERT(need_prepare_ordered && need_commit_ordered);
+
+ if (is_group_commit_leader)
+ {
+ pthread_mutex_lock(&LOCK_group_commit);
+ group_commit_wait_queue_idle();
+ THD *queue= atomic_grab_reverse_queue();
+ /*
+ Mark the queue busy while we bounce it from one thread to the
+ next.
+ */
+ group_commit_mark_queue_busy();
+ pthread_mutex_unlock(&LOCK_group_commit);
+
+ /* The first in the queue is the leader. */
+ DBUG_ASSERT(queue == thd);
}
- error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
- DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT(););
+ else
+ {
+ /* If not leader, just wait until previous thread wakes us up. */
+ group_commit_wait_for_wakeup(thd);
+ }
+
+ /* Only run commit_ordered() if log_xid was successful. */
if (cookie)
- tc_log->unlog(cookie, xid);
- DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
-end:
- if (rw_trans)
- start_waiting_global_read_lock(thd);
+ {
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ call_commit_ordered(ha_info, thd, all);
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+ }
+
+ THD *next= thd->next_commit_ordered;
+ if (next)
+ group_commit_wakeup_other(next);
+ else
+ group_commit_mark_queue_idle();
+
+ if (!cookie)
+ goto err;
}
- /* Free resources and perform other cleanup even for 'empty' transactions. */
- else if (is_real_trans)
- thd->transaction.cleanup();
+
+ DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT(););
+
+ error= commit_one_phase_2(thd, all, FALSE) ? 2 : 0;
+
+ DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT(););
+ DBUG_ASSERT(cookie);
+ tc_log->unlog(cookie, xid);
+
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+
+ /* Come here if error and we need to rollback. */
+err:
+ if (!error)
+ error= 1;
+ ha_rollback_trans(thd, all);
+
+end:
+ if (rw_trans)
+ start_waiting_global_read_lock(thd);
#endif /* USING_TRANSACTIONS */
DBUG_RETURN(error);
}
@@ -1207,6 +1547,17 @@ end:
*/
int ha_commit_one_phase(THD *thd, bool all)
{
+ /*
+ When we come here, we did not call handler commit_ordered() methods in
+ ha_commit_trans() 2-phase commit, so we pass TRUE to do it in
+ commit_one_phase_2().
+ */
+ return commit_one_phase_2(thd, all, TRUE);
+}
+
+static int
+commit_one_phase_2(THD *thd, bool all, bool do_commit_ordered)
+{
int error=0;
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
/*
@@ -1218,10 +1569,40 @@ int ha_commit_one_phase(THD *thd, bool a
*/
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
- DBUG_ENTER("ha_commit_one_phase");
+ DBUG_ENTER("commit_one_phase_2");
#ifdef USING_TRANSACTIONS
if (ha_info)
{
+ if (is_real_trans && do_commit_ordered)
+ {
+ /*
+ If we did not do it already, call any commit_ordered() method.
+
+ Even though we do not need to keep any ordering with other threads
+ (as there is no prepare or log_xid for this commit), we still need to
+ do this under the LOCK_commit_ordered mutex to not run in parallel
+ with other commit_ordered calls.
+ */
+
+ bool locked= FALSE;
+
+ for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
+ {
+ handlerton *ht= hi->ht();
+ if (ht->commit_ordered)
+ {
+ if (!locked)
+ {
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ locked= 1;
+ }
+ ht->commit_ordered(ht, thd, all);
+ }
+ }
+ if (locked)
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+ }
+
for (; ha_info; ha_info= ha_info_next)
{
int err;
=== modified file 'sql/handler.h'
--- a/sql/handler.h 2010-01-14 16:51:00 +0000
+++ b/sql/handler.h 2010-05-26 08:16:18 +0000
@@ -656,9 +656,81 @@ struct handlerton
NOTE 'all' is also false in auto-commit mode where 'end of statement'
and 'real commit' mean the same event.
*/
- int (*commit)(handlerton *hton, THD *thd, bool all);
+ int (*commit)(handlerton *hton, THD *thd, bool all);
+ /*
+ The commit_ordered() method is called prior to the commit() method, after
+ the transaction manager has decided to commit (not rollback) the
+ transaction.
+
+ The calls to commit_ordered() in multiple parallel transactions is
+ guaranteed to happen in the same order in every participating
+ handler. This can be used to ensure the same commit order among multiple
+ handlers (eg. in table handler and binlog). So if transaction T1 calls
+ into commit_ordered() of handler A before T2, then T1 will also call
+ commit_ordered() of handler B before T2.
+
+ The intension is that commit_ordered() should do the minimal amount of
+ work that needs to happen in consistent commit order among handlers. To
+ preserve ordering, calls need to be serialised on a global mutex, so
+ doing any time-consuming or blocking operations in commit_ordered() will
+ limit scalability.
+
+ Handlers can rely on commit_ordered() calls being serialised (no two
+ calls can run in parallel, so no extra locking on the handler part is
+ required to ensure this).
+
+ Note that commit_ordered() can be called from a different thread than the
+ one handling the transaction! So it can not do anything that depends on
+ thread local storage, in particular it can not call my_error() and
+ friends (instead it can store the error code and delay the call to
+ my_error() to the commit() method).
+
+ Similarly, since commit_ordered() returns void, any return error code
+ must be saved and returned from the commit() method instead.
+
+ commit_ordered() is called only when actually committing a transaction
+ (autocommit or not), not when ending a statement in the middle of a
+ transaction.
+
+ The commit_ordered method is optional, and can be left unset if not
+ needed in a particular handler.
+ */
+ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
int (*rollback)(handlerton *hton, THD *thd, bool all);
int (*prepare)(handlerton *hton, THD *thd, bool all);
+ /*
+ The prepare_ordered method is optional. If set, it will be called after
+ successful prepare() in all handlers participating in 2-phase commit.
+
+ The calls to prepare_ordered() among multiple parallel transactions are
+ ordered consistently with calls to commit_ordered(). This means that
+ calls to prepare_ordered() effectively define the commit order, and that
+ each handler will see the same sequence of transactions calling into
+ prepare_ordered() and commit_ordered().
+
+ Thus, prepare_ordered() can be used to define commit order for handlers
+ that need to do this in the prepare step (like binlog). It can also be
+ used to release transactions locks early in an order consistent with the
+ order transactions will be eventually committed.
+
+ Like commit_ordered(), prepare_ordered() calls are serialised to maintain
+ ordering, so the intension is that they should execute fast, with only
+ the minimal amount of work needed to define commit order. Handlers can
+ rely on this serialisation, and do not need to do any extra locking to
+ avoid two prepare_ordered() calls running in parallel.
+
+ Unlike commit_ordered(), prepare_ordered() _is_ guaranteed to be called
+ in the context of the thread handling the rest of the transaction.
+
+ Note that for user-level XA SQL commands, no consistent ordering among
+ prepare_ordered() and commit_ordered() is guaranteed (as that would
+ require blocking all other commits for an indefinite time).
+
+ prepare_ordered() is called only when actually committing a transaction
+ (autocommit or not), not when ending a statement in the middle of a
+ transaction.
+ */
+ int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
int (*recover)(handlerton *hton, XID *xid_list, uint len);
int (*commit_by_xid)(handlerton *hton, XID *xid);
int (*rollback_by_xid)(handlerton *hton, XID *xid);
=== modified file 'sql/log.cc'
--- a/sql/log.cc 2010-04-06 22:47:08 +0000
+++ b/sql/log.cc 2010-05-26 08:16:18 +0000
@@ -154,9 +154,12 @@ class binlog_trx_data {
public:
binlog_trx_data()
: at_least_one_stmt_committed(0), incident(FALSE), m_pending(0),
- before_stmt_pos(MY_OFF_T_UNDEF)
+ before_stmt_pos(MY_OFF_T_UNDEF), using_xa(0)
{
trans_log.end_of_file= max_binlog_cache_size;
+ (void) my_pthread_mutex_init(&LOCK_group_commit, MY_MUTEX_INIT_SLOW,
+ "LOCK_group_commit", MYF(0));
+ (void) pthread_cond_init(&COND_group_commit, 0);
}
~binlog_trx_data()
@@ -208,11 +211,12 @@ public:
completely.
*/
void reset() {
- if (!empty())
+ if (trans_log.type != WRITE_CACHE || !empty())
truncate(0);
before_stmt_pos= MY_OFF_T_UNDEF;
incident= FALSE;
trans_log.end_of_file= max_binlog_cache_size;
+ using_xa= FALSE;
DBUG_ASSERT(empty());
}
@@ -257,6 +261,41 @@ public:
Binlog position before the start of the current statement.
*/
my_off_t before_stmt_pos;
+
+ /* 0 or error when writing to binlog; set during group commit. */
+ int error;
+ /* If error != 0, value of errno (for my_error() reporting). */
+ int commit_errno;
+ /* Link for queueing transactions up for group commit to binlog. */
+ binlog_trx_data *next;
+ /*
+ Flag set true when group commit for this transaction is finished; used
+ with pthread_cond_wait() to wait until commit is done.
+ This flag is protected by LOCK_group_commit.
+ */
+ bool done;
+ /*
+ Flag set if this transaction is the group commit leader that will handle
+ the actual writing to the binlog.
+ This flag is protected by LOCK_group_commit.
+ */
+ bool group_commit_leader;
+ /*
+ Flag set true if this transaction is committed with log_xid() as part of
+ XA, false if not.
+ */
+ bool using_xa;
+ /*
+ Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
+ written during group commit. The incident_event is only valid if
+ has_incident() is true.
+ */
+ Log_event *begin_event;
+ Log_event *end_event;
+ Log_event *incident_event;
+ /* Mutex and condition for wakeup after group commit. */
+ pthread_mutex_t LOCK_group_commit;
+ pthread_cond_t COND_group_commit;
};
handlerton *binlog_hton;
@@ -1391,117 +1430,188 @@ static int binlog_close_connection(handl
return 0;
}
+/* Helper functions for binlog_flush_trx_cache(). */
+static int
+binlog_flush_trx_cache_prepare(THD *thd)
+{
+ if (thd->binlog_flush_pending_rows_event(TRUE))
+ return 1;
+ return 0;
+}
+
+static void
+binlog_flush_trx_cache_finish(THD *thd, binlog_trx_data *trx_data)
+{
+ IO_CACHE *trans_log= &trx_data->trans_log;
+
+ trx_data->reset();
+
+ statistic_increment(binlog_cache_use, &LOCK_status);
+ if (trans_log->disk_writes != 0)
+ {
+ statistic_increment(binlog_cache_disk_use, &LOCK_status);
+ trans_log->disk_writes= 0;
+ }
+}
+
/*
- End a transaction.
+ End a transaction, writing events to the binary log.
SYNOPSIS
- binlog_end_trans()
+ binlog_flush_trx_cache()
thd The thread whose transaction should be ended
trx_data Pointer to the transaction data to use
- end_ev The end event to use, or NULL
- all True if the entire transaction should be ended, false if
- only the statement transaction should be ended.
+ end_ev The end event to use (COMMIT, ROLLBACK, or commit XID)
DESCRIPTION
End the currently open transaction. The transaction can be either
- a real transaction (if 'all' is true) or a statement transaction
- (if 'all' is false).
+ a real transaction or a statement transaction.
- If 'end_ev' is NULL, the transaction is a rollback of only
- transactional tables, so the transaction cache will be truncated
- to either just before the last opened statement transaction (if
- 'all' is false), or reset completely (if 'all' is true).
+ This can be to commit a transaction, with a COMMIT query event or an XA
+ commit XID event. But it can also be to rollback a transaction with a
+ ROLLBACK query event, used for rolling back transactions which also
+ contain updates to non-transactional tables.
*/
static int
-binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
- Log_event *end_ev, bool all)
+binlog_flush_trx_cache(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev)
{
- DBUG_ENTER("binlog_end_trans");
- int error=0;
- IO_CACHE *trans_log= &trx_data->trans_log;
- DBUG_PRINT("enter", ("transaction: %s end_ev: 0x%lx",
- all ? "all" : "stmt", (long) end_ev));
+ DBUG_ENTER("binlog_flush_trx_cache");
DBUG_PRINT("info", ("thd->options={ %s%s}",
FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
FLAGSTR(thd->options, OPTION_BEGIN)));
+ if (binlog_flush_trx_cache_prepare(thd))
+ DBUG_RETURN(1);
+
/*
- NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
- only transactional tables. If the transaction contain changes to
- any non-transactiona tables, we need write the transaction and log
- a ROLLBACK last.
- */
- if (end_ev != NULL)
- {
- if (thd->binlog_flush_pending_rows_event(TRUE))
- DBUG_RETURN(1);
- /*
- Doing a commit or a rollback including non-transactional tables,
- i.e., ending a transaction where we might write the transaction
- cache to the binary log.
-
- We can always end the statement when ending a transaction since
- transactions are not allowed inside stored functions. If they
- were, we would have to ensure that we're not ending a statement
- inside a stored function.
- */
- error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev,
- trx_data->has_incident());
- trx_data->reset();
+ Doing a commit or a rollback including non-transactional tables,
+ i.e., ending a transaction where we might write the transaction
+ cache to the binary log.
+
+ We can always end the statement when ending a transaction since
+ transactions are not allowed inside stored functions. If they
+ were, we would have to ensure that we're not ending a statement
+ inside a stored function.
+ */
+ int error= mysql_bin_log.write_transaction_to_binlog(thd, trx_data, end_ev);
- /*
- We need to step the table map version after writing the
- transaction cache to disk.
- */
- mysql_bin_log.update_table_map_version();
- statistic_increment(binlog_cache_use, &LOCK_status);
- if (trans_log->disk_writes != 0)
- {
- statistic_increment(binlog_cache_disk_use, &LOCK_status);
- trans_log->disk_writes= 0;
- }
- }
- else
- {
- /*
- If rolling back an entire transaction or a single statement not
- inside a transaction, we reset the transaction cache.
+ binlog_flush_trx_cache_finish(thd, trx_data);
- If rolling back a statement in a transaction, we truncate the
- transaction cache to remove the statement.
- */
- thd->binlog_remove_pending_rows_event(TRUE);
- if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
- {
- if (trx_data->has_incident())
- error= mysql_bin_log.write_incident(thd, TRUE);
- trx_data->reset();
- }
- else // ...statement
- trx_data->truncate(trx_data->before_stmt_pos);
+ DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL);
+ DBUG_RETURN(error);
+}
- /*
- We need to step the table map version on a rollback to ensure
- that a new table map event is generated instead of the one that
- was written to the thrown-away transaction cache.
- */
- mysql_bin_log.update_table_map_version();
+/*
+ Discard a transaction, ie. ROLLBACK with only transactional table updates.
+
+ SYNOPSIS
+ binlog_truncate_trx_cache()
+
+ thd The thread whose transaction should be ended
+ trx_data Pointer to the transaction data to use
+ all True if the entire transaction should be ended, false if
+ only the statement transaction should be ended.
+
+ DESCRIPTION
+
+ Rollback (and end) a transaction that only modifies transactional
+ tables. The transaction can be either a real transaction (if 'all' is
+ true) or a statement transaction (if 'all' is false).
+
+ The transaction cache will be truncated to either just before the last
+ opened statement transaction (if 'all' is false), or reset completely (if
+ 'all' is true).
+ */
+static int
+binlog_truncate_trx_cache(THD *thd, binlog_trx_data *trx_data, bool all)
+{
+ DBUG_ENTER("binlog_truncate_trx_cache");
+ int error= 0;
+ DBUG_PRINT("enter", ("transaction: %s", all ? "all" : "stmt"));
+ DBUG_PRINT("info", ("thd->options={ %s%s}",
+ FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
+ FLAGSTR(thd->options, OPTION_BEGIN)));
+
+ /*
+ ROLLBACK with nothing to replicate: i.e., rollback of only transactional
+ tables.
+ */
+
+ /*
+ If rolling back an entire transaction or a single statement not
+ inside a transaction, we reset the transaction cache.
+
+ If rolling back a statement in a transaction, we truncate the
+ transaction cache to remove the statement.
+ */
+ thd->binlog_remove_pending_rows_event(TRUE);
+ if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+ {
+ if (trx_data->has_incident())
+ error= mysql_bin_log.write_incident(thd);
+ trx_data->reset();
}
+ else // ...statement
+ trx_data->truncate(trx_data->before_stmt_pos);
DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL);
DBUG_RETURN(error);
}
+static LEX_STRING const write_error_msg=
+ { C_STRING_WITH_LEN("error writing to the binary log") };
+
static int binlog_prepare(handlerton *hton, THD *thd, bool all)
{
/*
- do nothing.
- just pretend we can do 2pc, so that MySQL won't
- switch to 1pc.
- real work will be done in MYSQL_BIN_LOG::log_xid()
+ If this prepare is for a single statement in the middle of a transactions,
+ not the actual transaction commit, then we do nothing. The real work is
+ only done later, in the prepare for making persistent changes.
*/
+ if (!all && (thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+ return 0;
+
+ binlog_trx_data *trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+
+ trx_data->using_xa= TRUE;
+
+ if (binlog_flush_trx_cache_prepare(thd))
+ return 1;
+
+ my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
+ if (!xid)
+ {
+ /* Skip logging this transaction, marked by setting end_event to NULL. */
+ trx_data->end_event= NULL;
+ return 0;
+ }
+
+ /*
+ Allocate the extra events that will be logged to the binlog in binlog group
+ commit. Use placement new to allocate them on the THD memroot, as they need
+ to remain live until log_xid() returns.
+ */
+ size_t needed_size= sizeof(Query_log_event) + sizeof(Xid_log_event);
+ if (trx_data->has_incident())
+ needed_size+= sizeof(Incident_log_event);
+ uchar *mem= (uchar *)thd->alloc(needed_size);
+ if (!mem)
+ return 1;
+
+ trx_data->begin_event= new ((void *)mem)
+ Query_log_event(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+ mem+= sizeof(Query_log_event);
+
+ trx_data->end_event= new ((void *)mem) Xid_log_event(thd, xid);
+
+ if (trx_data->has_incident())
+ trx_data->incident_event= new ((void *)(mem + sizeof(Xid_log_event)))
+ Incident_log_event(thd, INCIDENT_LOST_EVENTS, write_error_msg);
+
return 0;
}
@@ -1525,11 +1635,11 @@ static int binlog_commit(handlerton *hto
binlog_trx_data *const trx_data=
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
- if (trx_data->empty())
+ if (trx_data->using_xa)
{
// we're here because trans_log was flushed in MYSQL_BIN_LOG::log_xid()
- trx_data->reset();
- DBUG_RETURN(0);
+ binlog_flush_trx_cache_finish(thd, trx_data);
+ DBUG_RETURN(error);
}
/*
@@ -1556,8 +1666,8 @@ static int binlog_commit(handlerton *hto
!stmt_has_updated_trans_table(thd) &&
thd->transaction.stmt.modified_non_trans_table))
{
- Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
- error= binlog_end_trans(thd, trx_data, &qev, all);
+ Query_log_event end_ev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
+ error= binlog_flush_trx_cache(thd, trx_data, &end_ev);
}
trx_data->at_least_one_stmt_committed = my_b_tell(&trx_data->trans_log) > 0;
@@ -1621,7 +1731,7 @@ static int binlog_rollback(handlerton *h
(thd->options & OPTION_KEEP_LOG)) &&
mysql_bin_log.check_write_error(thd))
trx_data->set_incident();
- error= binlog_end_trans(thd, trx_data, 0, all);
+ error= binlog_truncate_trx_cache(thd, trx_data, all);
}
else
{
@@ -1641,8 +1751,8 @@ static int binlog_rollback(handlerton *h
thd->current_stmt_binlog_row_based) ||
((thd->options & OPTION_KEEP_LOG)))
{
- Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
- error= binlog_end_trans(thd, trx_data, &qev, all);
+ Query_log_event end_ev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
+ error= binlog_flush_trx_cache(thd, trx_data, &end_ev);
}
/*
Otherwise, we simply truncate the cache as there is no change on
@@ -1650,7 +1760,7 @@ static int binlog_rollback(handlerton *h
*/
else if ((all && !thd->transaction.all.modified_non_trans_table) ||
(!all && !thd->transaction.stmt.modified_non_trans_table))
- error= binlog_end_trans(thd, trx_data, 0, all);
+ error= binlog_truncate_trx_cache(thd, trx_data, all);
}
if (!all)
trx_data->before_stmt_pos = MY_OFF_T_UNDEF; // part of the stmt rollback
@@ -2464,7 +2574,7 @@ const char *MYSQL_LOG::generate_name(con
MYSQL_BIN_LOG::MYSQL_BIN_LOG()
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
- need_start_event(TRUE), m_table_map_version(0),
+ need_start_event(TRUE),
is_relay_log(0),
description_event_for_exec(0), description_event_for_queue(0)
{
@@ -2477,6 +2587,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG()
index_file_name[0] = 0;
bzero((char*) &index_file, sizeof(index_file));
bzero((char*) &purge_index_file, sizeof(purge_index_file));
+ use_group_log_xid= TRUE;
}
/* this is called only once */
@@ -2492,6 +2603,7 @@ void MYSQL_BIN_LOG::cleanup()
delete description_event_for_exec;
(void) pthread_mutex_destroy(&LOCK_log);
(void) pthread_mutex_destroy(&LOCK_index);
+ (void) pthread_mutex_destroy(&LOCK_queue);
(void) pthread_cond_destroy(&update_cond);
}
DBUG_VOID_RETURN;
@@ -2520,6 +2632,8 @@ void MYSQL_BIN_LOG::init_pthread_objects
*/
(void) my_pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW, "LOCK_index",
MYF_NO_DEADLOCK_DETECTION);
+ (void) my_pthread_mutex_init(&LOCK_queue, MY_MUTEX_INIT_FAST, "LOCK_queue",
+ MYF(0));
(void) pthread_cond_init(&update_cond, 0);
}
@@ -4113,7 +4227,6 @@ int THD::binlog_write_table_map(TABLE *t
DBUG_RETURN(error);
binlog_table_maps++;
- table->s->table_map_version= mysql_bin_log.table_map_version();
DBUG_RETURN(0);
}
@@ -4194,64 +4307,41 @@ MYSQL_BIN_LOG::flush_and_set_pending_row
if (Rows_log_event* pending= trx_data->pending())
{
- IO_CACHE *file= &log_file;
-
/*
Decide if we should write to the log file directly or to the
transaction log.
*/
if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
- file= &trx_data->trans_log;
-
- /*
- If we are writing to the log file directly, we could avoid
- locking the log. This does not work since we need to step the
- m_table_map_version below, and that change has to be protected
- by the LOCK_log mutex.
- */
- pthread_mutex_lock(&LOCK_log);
-
- /*
- Write pending event to log file or transaction cache
- */
- if (pending->write(file))
{
- pthread_mutex_unlock(&LOCK_log);
- set_write_error(thd);
- DBUG_RETURN(1);
+ /* Write to transaction log/cache. */
+ if (pending->write(&trx_data->trans_log))
+ {
+ set_write_error(thd);
+ DBUG_RETURN(1);
+ }
}
-
- /*
- We step the table map version if we are writing an event
- representing the end of a statement. We do this regardless of
- wheather we write to the transaction cache or to directly to the
- file.
-
- In an ideal world, we could avoid stepping the table map version
- if we were writing to a transaction cache, since we could then
- reuse the table map that was written earlier in the transaction
- cache. This does not work since STMT_END_F implies closing all
- table mappings on the slave side.
-
- TODO: Find a solution so that table maps does not have to be
- written several times within a transaction.
- */
- if (pending->get_flags(Rows_log_event::STMT_END_F))
- ++m_table_map_version;
-
- delete pending;
-
- if (file == &log_file)
+ else
{
+ /* Write directly to log file. */
+ pthread_mutex_lock(&LOCK_log);
+ if (pending->write(&log_file))
+ {
+ pthread_mutex_unlock(&LOCK_log);
+ set_write_error(thd);
+ DBUG_RETURN(1);
+ }
+
error= flush_and_sync();
if (!error)
{
signal_update();
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
+
+ pthread_mutex_unlock(&LOCK_log);
}
- pthread_mutex_unlock(&LOCK_log);
+ delete pending;
}
thd->binlog_set_pending_rows_event(event);
@@ -4450,9 +4540,6 @@ err:
set_write_error(thd);
}
- if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
- ++m_table_map_version;
-
pthread_mutex_unlock(&LOCK_log);
DBUG_RETURN(error);
}
@@ -4575,18 +4662,14 @@ uint MYSQL_BIN_LOG::next_file_id()
SYNOPSIS
write_cache()
cache Cache to write to the binary log
- lock_log True if the LOCK_log mutex should be aquired, false otherwise
- sync_log True if the log should be flushed and sync:ed
DESCRIPTION
Write the contents of the cache to the binary log. The cache will
be reset as a READ_CACHE to be able to read the contents from it.
*/
-int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
+int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache)
{
- Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
-
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
return ER_ERROR_ON_WRITE;
uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
@@ -4697,6 +4780,7 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE
}
/* Write data to the binary log file */
+ DBUG_EXECUTE_IF("fail_binlog_write_1", return ER_ERROR_ON_WRITE;);
if (my_b_write(&log_file, cache->read_pos, length))
return ER_ERROR_ON_WRITE;
cache->read_pos=cache->read_end; // Mark buffer used up
@@ -4704,9 +4788,6 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE
DBUG_ASSERT(carry == 0);
- if (sync_log)
- flush_and_sync();
-
return 0; // All OK
}
@@ -4739,26 +4820,22 @@ int query_error_code(THD *thd, bool not_
return error;
}
-bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
+bool MYSQL_BIN_LOG::write_incident(THD *thd)
{
uint error= 0;
DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
- LEX_STRING const write_error_msg=
- { C_STRING_WITH_LEN("error writing to the binary log") };
Incident incident= INCIDENT_LOST_EVENTS;
Incident_log_event ev(thd, incident, write_error_msg);
- if (lock)
- pthread_mutex_lock(&LOCK_log);
+
+ pthread_mutex_lock(&LOCK_log);
error= ev.write(&log_file);
- if (lock)
+ if (!error && !(error= flush_and_sync()))
{
- if (!error && !(error= flush_and_sync()))
- {
- signal_update();
- rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
- }
- pthread_mutex_unlock(&LOCK_log);
+ signal_update();
+ rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
+ pthread_mutex_unlock(&LOCK_log);
+
DBUG_RETURN(error);
}
@@ -4786,103 +4863,364 @@ bool MYSQL_BIN_LOG::write_incident(THD *
'cache' needs to be reinitialized after this functions returns.
*/
-bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
- bool incident)
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev)
{
- DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
+ DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
+
+ /*
+ Create the necessary events here, where we have the correct THD (and
+ thread context).
+
+ Due to group commit the actual writing to binlog may happen in a different
+ thread.
+ */
+ Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+ trx_data->begin_event= &qinfo;
+ trx_data->end_event= end_ev;
+ if (trx_data->has_incident())
+ {
+ Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, write_error_msg);
+ trx_data->incident_event= &inc_ev;
+ DBUG_RETURN(write_transaction_to_binlog_events(trx_data));
+ }
+ else
+ {
+ trx_data->incident_event= NULL;
+ DBUG_RETURN(write_transaction_to_binlog_events(trx_data));
+ }
+}
+
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog_events(binlog_trx_data *trx_data)
+{
+ /*
+ To facilitate group commit for the binlog, we first queue up ourselves in
+ the group commit queue. Then the first thread to enter the queue waits for
+ the LOCK_log mutex, and commits for everyone in the queue once it gets the
+ lock. Any other threads in the queue just wait for the first one to finish
+ the commit and wake them up.
+ */
+
+ pthread_mutex_lock(&trx_data->LOCK_group_commit);
+ const binlog_trx_data *orig_queue= atomic_enqueue_trx(trx_data);
+
+ if (orig_queue != NULL)
+ {
+ trx_data->group_commit_leader= FALSE;
+ trx_data->done= FALSE;
+ trx_group_commit_participant(trx_data);
+ }
+ else
+ {
+ trx_data->group_commit_leader= TRUE;
+ pthread_mutex_unlock(&trx_data->LOCK_group_commit);
+ trx_group_commit_leader(NULL);
+ }
+
+ return trx_group_commit_finish(trx_data);
+}
+
+/*
+ Participate as secondary transaction in group commit.
+
+ Another thread is already waiting to obtain the LOCK_log, and should include
+ this thread in the group commit once the log is obtained. So here we put
+ ourself in the queue and wait to be signalled that the group commit is done.
+
+ Note that this function must be called with the trs_data->LOCK_group_commit
+ locked; the mutex will be released before return.
+*/
+void
+MYSQL_BIN_LOG::trx_group_commit_participant(binlog_trx_data *trx_data)
+{
+ safe_mutex_assert_owner(&trx_data->LOCK_group_commit);
+
+ /* Wait until trx_data.done == true and woken up by the leader. */
+ while (!trx_data->done)
+ pthread_cond_wait(&trx_data->COND_group_commit,
+ &trx_data->LOCK_group_commit);
+ pthread_mutex_unlock(&trx_data->LOCK_group_commit);
+}
+
+bool
+MYSQL_BIN_LOG::trx_group_commit_finish(binlog_trx_data *trx_data)
+{
+ if (trx_data->error)
+ {
+ switch (trx_data->error)
+ {
+ case ER_ERROR_ON_WRITE:
+ my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, trx_data->commit_errno);
+ break;
+ case ER_ERROR_ON_READ:
+ my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
+ trx_data->trans_log.file_name, trx_data->commit_errno);
+ break;
+ default:
+ /*
+ There are not (and should not be) any errors thrown not covered above.
+ But just in case one is added later without updating the above switch
+ statement, include a catch-all.
+ */
+ my_printf_error(trx_data->error,
+ "Error writing transaction to binary log: %d",
+ MYF(ME_NOREFRESH), trx_data->error);
+ }
+
+ /*
+ Since we return error, this transaction XID will not be committed, so
+ we need to mark it as not needed for recovery (unlog() is not called
+ for a transaction if log_xid() fails).
+ */
+ if (trx_data->end_event->get_type_code() == XID_EVENT)
+ mark_xid_done();
+
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ Do binlog group commit as the lead thread.
+
+ This must be called when this thread/transaction is queued at the start of
+ the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
+ commit all the transactions in the queue (more may have entered while waiting
+ for LOCK_log). After commit is done, all other threads in the queue will be
+ signalled.
+
+ */
+void
+MYSQL_BIN_LOG::trx_group_commit_leader(THD *first_thd)
+{
+ uint xid_count= 0;
+ uint write_count= 0;
+
+ /* First, put anything from group_log_xid into the queue. */
+ binlog_trx_data *full_queue= NULL;
+ binlog_trx_data **next_ptr= &full_queue;
+ for (THD *thd= first_thd; thd; thd= thd->next_commit_ordered)
+ {
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+
+ /* Skip log_xid for transactions without xid, marked by NULL end_event. */
+ if (!trx_data->end_event)
+ continue;
+
+ trx_data->error= 0;
+ *next_ptr= trx_data;
+ next_ptr= &(trx_data->next);
+ }
+
+ /*
+ Next, lock the LOCK_log(), and once we get it, add any additional writes
+ that queued up while we were waiting.
+
+ Note that if some writer not going through log_xid() comes in and gets the
+ LOCK_log before us, they will not be able to include us in their group
+ commit (and they are not able to handle ensuring same commit order between
+ us and participating transactional storage engines anyway).
+
+ On the other hand, when we get the LOCK_log, we will be able to include
+ any non-trasactional writes that queued up in our group commit. This
+ should hopefully not be too big of a problem, as group commit is most
+ important for the transactional case anyway when durability (fsync) is
+ enabled.
+ */
VOID(pthread_mutex_lock(&LOCK_log));
- /* NULL would represent nothing to replicate after ROLLBACK */
- DBUG_ASSERT(commit_event != NULL);
+ /*
+ As the queue is in reverse order of entering, reverse the queue as we add
+ it to the existing one. Note that there is no ordering defined between
+ transactional and non-transactional commits.
+ */
+ binlog_trx_data *current= atomic_grab_trx_queue();
+ binlog_trx_data *xtra_queue= NULL;
+ while (current)
+ {
+ current->error= 0;
+ binlog_trx_data *next= current->next;
+ current->next= xtra_queue;
+ xtra_queue= current;
+ current= next;
+ }
+ *next_ptr= xtra_queue;
+ /*
+ Now we have in full_queue the list of transactions to be committed in
+ order.
+ */
DBUG_ASSERT(is_open());
if (likely(is_open())) // Should always be true
{
/*
- We only bother to write to the binary log if there is anything
- to write.
- */
- if (my_b_tell(cache) > 0)
+ Commit every transaction in the queue.
+
+ Note that we are doing this in a different thread than the one running
+ the transaction! So we are limited in the operations we can do. In
+ particular, we cannot call my_error() on behalf of a transaction, as
+ that obtains the THD from thread local storage. Instead, we must set
+ current->error and let the thread do the error reporting itself once
+ we wake it up.
+ */
+ for (current= full_queue; current != NULL; current= current->next)
{
- /*
- Log "BEGIN" at the beginning of every transaction. Here, a
- transaction is either a BEGIN..COMMIT block or a single
- statement in autocommit mode.
- */
- Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+ IO_CACHE *cache= ¤t->trans_log;
/*
- Now this Query_log_event has artificial log_pos 0. It must be
- adjusted to reflect the real position in the log. Not doing it
- would confuse the slave: it would prevent this one from
- knowing where he is in the master's binlog, which would result
- in wrong positions being shown to the user, MASTER_POS_WAIT
- undue waiting etc.
+ We only bother to write to the binary log if there is anything
+ to write.
*/
- if (qinfo.write(&log_file))
- goto err;
-
- DBUG_EXECUTE_IF("crash_before_writing_xid",
- {
- if ((write_error= write_cache(cache, false, true)))
- DBUG_PRINT("info", ("error writing binlog cache: %d",
- write_error));
- DBUG_PRINT("info", ("crashing before writing xid"));
- abort();
- });
-
- if ((write_error= write_cache(cache, false, false)))
- goto err;
+ if (my_b_tell(cache) > 0)
+ {
+ current->error= write_transaction(current);
+ if (current->error)
+ current->commit_errno= errno;
- if (commit_event && commit_event->write(&log_file))
- goto err;
+ write_count++;
+ }
- if (incident && write_incident(thd, FALSE))
- goto err;
+ if (current->end_event->get_type_code() == XID_EVENT)
+ xid_count++;
+ }
+ if (write_count > 0)
+ {
if (flush_and_sync())
- goto err;
- DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_ABORT(););
- if (cache->error) // Error on read
{
- sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
- write_error=1; // Don't give more errors
- goto err;
+ for (current= full_queue; current != NULL; current= current->next)
+ {
+ if (!current->error)
+ {
+ current->error= ER_ERROR_ON_WRITE;
+ current->commit_errno= errno;
+ }
+ }
+ }
+ else
+ {
+ signal_update();
}
- signal_update();
}
/*
- if commit_event is Xid_log_event, increase the number of
+ if any commit_events are Xid_log_event, increase the number of
prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
if there're prepared xids in it - see the comment in new_file() for
an explanation.
- If the commit_event is not Xid_log_event (then it's a Query_log_event)
- rotate binlog, if necessary.
+ If no Xid_log_events (then it's all Query_log_event) rotate binlog,
+ if necessary.
*/
- if (commit_event && commit_event->get_type_code() == XID_EVENT)
+ if (xid_count > 0)
{
- pthread_mutex_lock(&LOCK_prep_xids);
- prepared_xids++;
- pthread_mutex_unlock(&LOCK_prep_xids);
+ mark_xids_active(xid_count);
}
else
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
+
VOID(pthread_mutex_unlock(&LOCK_log));
- DBUG_RETURN(0);
+ /*
+ Signal those that are not part of group_log_xid, and are not group leaders
+ running the queue.
-err:
- if (!write_error)
+ Since a group leader runs the queue itself if a group_log_xid does not get
+ to do it forst, such leader threads do not need wait or wakeup.
+ */
+ for (current= xtra_queue; current != NULL; current= current->next)
{
- write_error= 1;
- sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+ /*
+ Note that we need to take LOCK_group_commit even in the case of a leader!
+
+ Otherwise there is a race between setting and testing the
+ group_commit_leader flag.
+ */
+ pthread_mutex_lock(¤t->LOCK_group_commit);
+ if (!current->group_commit_leader)
+ {
+ current->done= true;
+ pthread_cond_signal(¤t->COND_group_commit);
+ }
+ pthread_mutex_unlock(¤t->LOCK_group_commit);
}
- VOID(pthread_mutex_unlock(&LOCK_log));
- DBUG_RETURN(1);
}
+int
+MYSQL_BIN_LOG::write_transaction(binlog_trx_data *trx_data)
+{
+ IO_CACHE *cache= &trx_data->trans_log;
+ /*
+ Log "BEGIN" at the beginning of every transaction. Here, a transaction is
+ either a BEGIN..COMMIT block or a single statement in autocommit mode. The
+ event was constructed in write_transaction_to_binlog(), in the thread
+ running the transaction.
+
+ Now this Query_log_event has artificial log_pos 0. It must be
+ adjusted to reflect the real position in the log. Not doing it
+ would confuse the slave: it would prevent this one from
+ knowing where he is in the master's binlog, which would result
+ in wrong positions being shown to the user, MASTER_POS_WAIT
+ undue waiting etc.
+ */
+ if (trx_data->begin_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+
+ DBUG_EXECUTE_IF("crash_before_writing_xid",
+ {
+ if ((write_cache(cache)))
+ DBUG_PRINT("info", ("error writing binlog cache"));
+ else
+ flush_and_sync();
+
+ DBUG_PRINT("info", ("crashing before writing xid"));
+ abort();
+ });
+
+ if (write_cache(cache))
+ return ER_ERROR_ON_WRITE;
+
+ if (trx_data->end_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+
+ if (trx_data->has_incident() && trx_data->incident_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+
+ if (cache->error) // Error on read
+ return ER_ERROR_ON_READ;
+
+ return 0;
+}
+
+binlog_trx_data *
+MYSQL_BIN_LOG::atomic_enqueue_trx(binlog_trx_data *trx_data)
+{
+ my_atomic_rwlock_wrlock(&LOCK_queue);
+ trx_data->next= group_commit_queue;
+ while (!my_atomic_casptr((void **)(&group_commit_queue),
+ (void **)(&trx_data->next),
+ trx_data))
+ ;
+ my_atomic_rwlock_wrunlock(&LOCK_queue);
+ return trx_data->next;
+}
+
+binlog_trx_data *
+MYSQL_BIN_LOG::atomic_grab_trx_queue()
+{
+ my_atomic_rwlock_wrlock(&LOCK_queue);
+ binlog_trx_data *queue= group_commit_queue;
+ while (!my_atomic_casptr((void **)(&group_commit_queue),
+ (void **)(&queue),
+ NULL))
+ ;
+ my_atomic_rwlock_wrunlock(&LOCK_queue);
+ return queue;
+}
/**
Wait until we get a signal that the binary log has been updated.
@@ -5879,9 +6217,6 @@ void TC_LOG_BINLOG::close()
}
/**
- @todo
- group commit
-
@retval
0 error
@retval
@@ -5889,19 +6224,83 @@ void TC_LOG_BINLOG::close()
*/
int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
{
- DBUG_ENTER("TC_LOG_BINLOG::log");
- Xid_log_event xle(thd, xid);
- binlog_trx_data *trx_data=
- (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+ int error;
+ DBUG_ENTER("TC_LOG_BINLOG::log_xid");
+
+ thd->next_commit_ordered= 0;
+ group_log_xid(thd);
+ if (thd->xid_error)
+ error= xid_delayed_error(thd);
+ else
+ error= 0;
+
/*
- We always commit the entire transaction when writing an XID. Also
- note that the return value is inverted.
- */
- DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle, TRUE));
+ Note that the return value is inverted: zero on failure, private non-zero
+ 'cookie' on success.
+ */
+ DBUG_RETURN(!error);
}
-void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+/*
+ Do a binlog log_xid() for a group of transactions, linked through
+ thd->next_commit_ordered.
+*/
+void
+TC_LOG_BINLOG::group_log_xid(THD *first_thd)
+{
+ DBUG_ENTER("TC_LOG_BINLOG::group_log_xid");
+ trx_group_commit_leader(first_thd);
+ for (THD *thd= first_thd; thd; thd= thd->next_commit_ordered)
+ {
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+ thd->xid_error= trx_data->error;
+ thd->xid_cookie= !trx_data->error;
+ }
+ DBUG_VOID_RETURN;
+}
+
+int
+TC_LOG_BINLOG::xid_delayed_error(THD *thd)
{
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+ return trx_group_commit_finish(trx_data);
+}
+
+/*
+ After an XID is logged, we need to hold on to the current binlog file until
+ it is fully committed in the storage engine. The reason is that crash
+ recovery only looks at the latest binlog, so we must make sure there are no
+ outstanding prepared (but not committed) transactions before rotating the
+ binlog.
+
+ To handle this, we keep a count of outstanding XIDs. This function is used
+ to increase this count when committing one or more transactions to the
+ binary log.
+*/
+void
+TC_LOG_BINLOG::mark_xids_active(uint xid_count)
+{
+ DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
+ DBUG_PRINT("info", ("xid_count=%u", xid_count));
+ pthread_mutex_lock(&LOCK_prep_xids);
+ prepared_xids+= xid_count;
+ pthread_mutex_unlock(&LOCK_prep_xids);
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Once an XID is committed, it is safe to rotate the binary log, as it can no
+ longer be needed during crash recovery.
+
+ This function is called to mark an XID this way. It needs to decrease the
+ count of pending XIDs, and signal the log rotator thread when it reaches zero.
+*/
+void
+TC_LOG_BINLOG::mark_xid_done()
+{
+ DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
pthread_mutex_lock(&LOCK_prep_xids);
DBUG_ASSERT(prepared_xids > 0);
if (--prepared_xids == 0) {
@@ -5909,7 +6308,16 @@ void TC_LOG_BINLOG::unlog(ulong cookie,
pthread_cond_signal(&COND_prep_xids);
}
pthread_mutex_unlock(&LOCK_prep_xids);
- rotate_and_purge(0); // as ::write() did not rotate
+ DBUG_VOID_RETURN;
+}
+
+void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+{
+ DBUG_ENTER("TC_LOG_BINLOG::unlog");
+ if (xid)
+ mark_xid_done();
+ rotate_and_purge(0); // as ::write_transaction_to_binlog() did not rotate
+ DBUG_VOID_RETURN;
}
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
=== modified file 'sql/log.h'
--- a/sql/log.h 2009-12-04 14:40:42 +0000
+++ b/sql/log.h 2010-05-26 08:16:18 +0000
@@ -28,13 +28,49 @@ class TC_LOG
{
public:
int using_heuristic_recover();
- TC_LOG() {}
+ /* True if we should use group_log_xid(), false to use log_xid(). */
+ bool use_group_log_xid;
+
+ TC_LOG() : use_group_log_xid(0) {}
virtual ~TC_LOG() {}
virtual int open(const char *opt_name)=0;
virtual void close()=0;
virtual int log_xid(THD *thd, my_xid xid)=0;
virtual void unlog(ulong cookie, my_xid xid)=0;
+ /*
+ If use_group_log_xid is true, then this method is used instead of
+ log_xid() to do logging of a group of transactions all at once.
+
+ The transactions will be linked through THD::next_commit_ordered.
+
+ Additionally, when this method is used instead of log_xid(), the order in
+ which handler->prepare_ordered() and handler->commit_ordered() are called
+ is guaranteed to be the same as the order of calls and THD list elements
+ for group_log_xid().
+
+ This can be used to efficiently implement group commit that at the same
+ time preserves the order of commits among handlers and TC (eg. to get same
+ commit order in InnoDB and binary log).
+
+ For TCs that do not need this, it can be preferable to use plain log_xid()
+ instead, as it allows threads to run log_xid() in parallel with each
+ other. In contrast, group_log_xid() runs under a global mutex, so it is
+ guaranteed that only once call into it will be active at once.
+
+ Since this call handles multiple threads/THDs at once, my_error() (and
+ other code that relies on thread local storage) cannot be used in this
+ method. Instead, in case of error, thd->xid_error should be set to the
+ error code, and xid_delayed_error() will be called later in the correct
+ thread context to actually report the error.
+
+ In the success case, this method must set thd->xid_cookie for each thread
+ to the cookie that is normally returned from log_xid() (which must be
+ non-zero in the non-error case).
+ */
+ virtual void group_log_xid(THD *first_thd) { DBUG_ASSERT(FALSE); }
+ /* Error reporting for group_log_xid(). */
+ virtual int xid_delayed_error(THD *thd) { DBUG_ASSERT(FALSE); return 0; }
};
class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
@@ -227,12 +263,19 @@ private:
time_t last_time;
};
+class binlog_trx_data;
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
{
private:
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
pthread_mutex_t LOCK_index;
pthread_mutex_t LOCK_prep_xids;
+ /*
+ Mutex to protect the queue of transactions waiting to participate in group
+ commit. (Only used on platforms without native atomic operations).
+ */
+ pthread_mutex_t LOCK_queue;
+
pthread_cond_t COND_prep_xids;
pthread_cond_t update_cond;
ulonglong bytes_written;
@@ -271,8 +314,8 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
In 5.0 it's 0 for relay logs too!
*/
bool no_auto_events;
-
- ulonglong m_table_map_version;
+ /* Queue of transactions queued up to participate in group commit. */
+ binlog_trx_data *group_commit_queue;
int write_to_file(IO_CACHE *cache);
/*
@@ -282,6 +325,14 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
*/
void new_file_without_locking();
void new_file_impl(bool need_lock);
+ int write_transaction(binlog_trx_data *trx_data);
+ bool write_transaction_to_binlog_events(binlog_trx_data *trx_data);
+ void trx_group_commit_participant(binlog_trx_data *trx_data);
+ void trx_group_commit_leader(THD *first_thd);
+ binlog_trx_data *atomic_enqueue_trx(binlog_trx_data *trx_data);
+ binlog_trx_data *atomic_grab_trx_queue();
+ void mark_xid_done();
+ void mark_xids_active(uint xid_count);
public:
MYSQL_LOG::generate_name;
@@ -311,17 +362,11 @@ public:
int open(const char *opt_name);
void close();
int log_xid(THD *thd, my_xid xid);
+ int xid_delayed_error(THD *thd);
+ void group_log_xid(THD *first_thd);
void unlog(ulong cookie, my_xid xid);
int recover(IO_CACHE *log, Format_description_log_event *fdle);
#if !defined(MYSQL_CLIENT)
- bool is_table_mapped(TABLE *table) const
- {
- return table->s->table_map_version == table_map_version();
- }
-
- ulonglong table_map_version() const { return m_table_map_version; }
- void update_table_map_version() { ++m_table_map_version; }
-
int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event);
int remove_pending_rows_event(THD *thd);
@@ -362,10 +407,12 @@ public:
void new_file();
bool write(Log_event* event_info); // binary log write
- bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
- bool write_incident(THD *thd, bool lock);
+ bool write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev);
+ bool trx_group_commit_finish(binlog_trx_data *trx_data);
+ bool write_incident(THD *thd);
- int write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
+ int write_cache(IO_CACHE *cache);
void set_write_error(THD *thd);
bool check_write_error(THD *thd);
=== modified file 'sql/log_event.h'
--- a/sql/log_event.h 2010-03-04 08:03:07 +0000
+++ b/sql/log_event.h 2010-05-26 08:16:18 +0000
@@ -463,10 +463,9 @@ struct sql_ex_info
#define LOG_EVENT_SUPPRESS_USE_F 0x8
/*
- The table map version internal to the log should be increased after
- the event has been written to the binary log.
+ This used to be LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F, but is now unused.
*/
-#define LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F 0x10
+#define LOG_EVENT_UNUSED1_F 0x10
/**
@def LOG_EVENT_ARTIFICIAL_F
=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc 2010-01-15 15:27:55 +0000
+++ b/sql/sql_class.cc 2010-05-26 08:16:18 +0000
@@ -673,6 +673,8 @@ THD::THD()
active_vio = 0;
#endif
pthread_mutex_init(&LOCK_thd_data, MY_MUTEX_INIT_FAST);
+ pthread_mutex_init(&LOCK_commit_ordered, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&COND_commit_ordered, 0);
/* Variables with default values */
proc_info="login";
@@ -3773,7 +3775,6 @@ int THD::binlog_flush_pending_rows_event
if (stmt_end)
{
pending->set_flags(Rows_log_event::STMT_END_F);
- pending->flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
binlog_table_maps= 0;
}
@@ -3901,7 +3902,6 @@ int THD::binlog_query(THD::enum_binlog_q
{
Query_log_event qinfo(this, query_arg, query_len, is_trans, suppress_use,
errcode);
- qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
/*
Binlog table maps will be irrelevant after a Query_log_event
(they are just removed on the slave side) so after the query
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2010-03-30 12:36:49 +0000
+++ b/sql/sql_class.h 2010-05-26 08:16:18 +0000
@@ -1438,6 +1438,21 @@ public:
/* container for handler's private per-connection data */
Ha_data ha_data[MAX_HA];
+ /* Mutex and condition for waking up threads after group commit. */
+ pthread_mutex_t LOCK_commit_ordered;
+ pthread_cond_t COND_commit_ordered;
+ bool group_commit_ready;
+ /* Pointer for linking THDs into queue waiting for group commit. */
+ THD *next_commit_ordered;
+ /*
+ The "all" parameter of commit(), to communicate it to the thread that
+ calls commit_ordered().
+ */
+ bool group_commit_all;
+ /* Set by TC_LOG::group_log_xid(), to return per-thd error and cookie. */
+ int xid_error;
+ int xid_cookie;
+
#ifndef MYSQL_CLIENT
int binlog_setup_trx_data();
=== modified file 'sql/sql_load.cc'
--- a/sql/sql_load.cc 2010-03-04 08:03:07 +0000
+++ b/sql/sql_load.cc 2010-05-26 08:16:18 +0000
@@ -516,7 +516,6 @@ int mysql_load(THD *thd,sql_exchange *ex
else
{
Delete_file_log_event d(thd, db, transactional_table);
- d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
(void) mysql_bin_log.write(&d);
}
}
@@ -698,7 +697,6 @@ static bool write_execute_load_query_log
(duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
(ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
transactional_table, FALSE, errcode);
- e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
return mysql_bin_log.write(&e);
}
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-03-10 10:32:14 +0000
+++ b/sql/table.cc 2010-05-26 08:16:18 +0000
@@ -297,13 +297,6 @@ TABLE_SHARE *alloc_table_share(TABLE_LIS
share->version= refresh_version;
/*
- This constant is used to mark that no table map version has been
- assigned. No arithmetic is done on the value: it will be
- overwritten with a value taken from MYSQL_BIN_LOG.
- */
- share->table_map_version= ~(ulonglong)0;
-
- /*
Since alloc_table_share() can be called without any locking (for
example, ha_create_table... functions), we do not assign a table
map id here. Instead we assign a value that is not used
@@ -367,10 +360,9 @@ void init_tmp_table_share(THD *thd, TABL
share->frm_version= FRM_VER_TRUE_VARCHAR;
/*
- Temporary tables are not replicated, but we set up these fields
+ Temporary tables are not replicated, but we set up this fields
anyway to be able to catch errors.
*/
- share->table_map_version= ~(ulonglong)0;
share->cached_row_logging_check= -1;
/*
=== modified file 'sql/table.h'
--- a/sql/table.h 2010-02-10 19:06:24 +0000
+++ b/sql/table.h 2010-05-26 08:16:18 +0000
@@ -433,7 +433,6 @@ typedef struct st_table_share
bool waiting_on_cond; /* Protection against free */
bool deleting; /* going to delete this table */
ulong table_map_id; /* for row-based replication */
- ulonglong table_map_version;
/*
Cache for row-based replication table share checks that does not
=== modified file 'storage/xtradb/handler/ha_innodb.cc'
--- a/storage/xtradb/handler/ha_innodb.cc 2010-01-15 21:12:30 +0000
+++ b/storage/xtradb/handler/ha_innodb.cc 2010-05-26 08:16:18 +0000
@@ -138,8 +138,6 @@ bool check_global_access(THD *thd, ulong
/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
@@ -239,6 +237,7 @@ static const char* innobase_change_buffe
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -1356,7 +1355,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1416,8 +1414,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -2024,6 +2020,7 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
@@ -2492,7 +2489,6 @@ skip_overwrite:
innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&analyze_mutex, MY_MUTEX_INIT_FAST);
@@ -2547,7 +2543,6 @@ innobase_end(
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
pthread_mutex_destroy(&commit_threads_m);
pthread_mutex_destroy(&commit_cond_m);
pthread_mutex_destroy(&analyze_mutex);
@@ -2681,6 +2676,101 @@ innobase_start_trx_and_assign_read_view(
}
/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ if (trx->active_trans == 0
+ && trx->conc_state != TRX_NOT_STARTED) {
+ /* We throw an error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+ /* Since we will reserve the kernel mutex, we have to release
+ the search system latch first to obey the latching order. */
+
+ if (trx->has_search_latch) {
+ trx_search_latch_release_if_reserved(trx);
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ pthread_cond_wait(&commit_cond,
+ &commit_cond_m);
+ pthread_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ /* The following calls to read the MySQL binary log
+ file name and the position return consistent results:
+ 1) We use commit_ordered() to get same commit order
+ in InnoDB as in binary log.
+ 2) A MySQL log file rotation cannot happen because
+ MySQL protects against this by having a counter of
+ transactions in prepared state and it only allows
+ a rotation when the counter drops to zero. See
+ LOCK_prep_xids and COND_prep_xids in log.cc. */
+ trx->mysql_log_file_name = mysql_bin_log_file_name();
+ trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ pthread_cond_signal(&commit_cond);
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@return 0 */
@@ -2702,13 +2792,6 @@ innobase_commit(
trx = check_trx_exists(thd);
- /* Since we will reserve the kernel mutex, we have to release
- the search system latch first to obey the latching order. */
-
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
/* The flag trx->active_trans is set to 1 in
1. ::external_lock(),
@@ -2736,62 +2819,8 @@ innobase_commit(
/* We were instructed to commit the whole transaction, or
this is an SQL statement end and autocommit is on */
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
- }
-
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
-
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
trx->active_trans = 0;
@@ -4621,6 +4650,7 @@ no_commit:
no need to re-acquire locks on it. */
/* Altering to InnoDB format */
+ innobase_commit_ordered(ht, user_thd, 1);
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
prebuilt->trx->active_trans = 1;
@@ -4637,6 +4667,7 @@ no_commit:
/* Commit the transaction. This will release the table
locks, so they have to be acquired again. */
+ innobase_commit_ordered(ht, user_thd, 1);
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
prebuilt->trx->active_trans = 1;
@@ -8339,6 +8370,7 @@ ha_innobase::external_lock(
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
if (trx->active_trans != 0) {
+ innobase_commit_ordered(ht, thd, TRUE);
innobase_commit(ht, thd, TRUE);
}
} else {
@@ -9448,36 +9480,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
- if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
- /* choose group commit rather than binlog order */
- return(error);
- }
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
return(error);
}
@@ -10669,11 +10671,6 @@ static MYSQL_SYSVAR_ENUM(adaptive_checkp
"Enable/Disable flushing along modified age. (none, reflex, [estimate])",
NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib);
-static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
- PLUGIN_VAR_RQCMDARG,
- "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
- NULL, NULL, 0, 0, 1, 0);
-
static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
PLUGIN_VAR_RQCMDARG,
"Enable/Disable converting automatically *.ibd files when import tablespace.",
@@ -10763,7 +10760,6 @@ static struct st_mysql_sys_var* innobase
MYSQL_SYSVAR(flush_neighbor_pages),
MYSQL_SYSVAR(read_ahead),
MYSQL_SYSVAR(adaptive_checkpoint),
- MYSQL_SYSVAR(enable_unsafe_group_commit),
MYSQL_SYSVAR(expand_import),
MYSQL_SYSVAR(extra_rsegments),
MYSQL_SYSVAR(dict_size_limit),
1
0

[Maria-developers] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (knielsen:2849)
by knielsen@knielsen-hq.org 26 May '10
by knielsen@knielsen-hq.org 26 May '10
26 May '10
#At lp:maria
2849 knielsen(a)knielsen-hq.org 2010-05-26
Preliminary commit of group commit proof-of-concept.
modified:
sql/handler.cc
sql/handler.h
sql/log.cc
sql/log.h
sql/log_event.h
sql/sql_class.cc
sql/sql_class.h
sql/sql_load.cc
sql/table.cc
sql/table.h
storage/xtradb/handler/ha_innodb.cc
=== modified file 'sql/handler.cc'
--- a/sql/handler.cc 2010-04-06 22:47:08 +0000
+++ b/sql/handler.cc 2010-05-26 08:13:32 +0000
@@ -76,6 +76,7 @@ TYPELIB tx_isolation_typelib= {array_ele
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
uint known_extensions_id= 0;
+static int commit_one_phase_2(THD *thd, bool all, bool do_commit_ordered);
static plugin_ref ha_default_plugin(THD *thd)
@@ -544,6 +545,26 @@ err:
DBUG_RETURN(1);
}
+/*
+ This is a queue of THDs waiting for being group committed with
+ tc_log->group_log_xid().
+*/
+static THD *group_commit_queue;
+/*
+ This mutex protects the group_commit_queue on platforms without native
+ atomic operations.
+ */
+static pthread_mutex_t LOCK_group_commit_queue;
+/* This mutex is used to serialize calls to handler prepare_ordered methods. */
+static pthread_mutex_t LOCK_prepare_ordered;
+/* This mutex is used to serialize calls to handler commit_ordered methods. */
+static pthread_mutex_t LOCK_commit_ordered;
+/* This mutex is used to serialize calls to group_log_xid(). */
+static pthread_mutex_t LOCK_group_commit;
+static pthread_cond_t COND_group_commit;
+
+static bool mutexes_inited= FALSE;
+
int ha_init()
{
int error= 0;
@@ -557,6 +578,19 @@ int ha_init()
*/
opt_using_transactions= total_ha>(ulong)opt_bin_log;
savepoint_alloc_size+= sizeof(SAVEPOINT);
+
+ group_commit_queue= NULL;
+ my_pthread_mutex_init(&LOCK_group_commit_queue, MY_MUTEX_INIT_FAST,
+ "LOCK_group_commit_queue", MYF(0));
+ my_pthread_mutex_init(&LOCK_prepare_ordered, MY_MUTEX_INIT_SLOW,
+ "LOCK_prepare_ordered", MYF(0));
+ my_pthread_mutex_init(&LOCK_commit_ordered, MY_MUTEX_INIT_SLOW,
+ "LOCK_commit_ordered", MYF(0));
+ my_pthread_mutex_init(&LOCK_group_commit, MY_MUTEX_INIT_SLOW,
+ "LOCK_group_commit", MYF(0));
+ pthread_cond_init(&COND_group_commit, 0);
+ mutexes_inited= TRUE;
+
DBUG_RETURN(error);
}
@@ -574,6 +608,15 @@ int ha_end()
if (ha_finish_errors())
error= 1;
+ if (mutexes_inited)
+ {
+ pthread_mutex_destroy(&LOCK_group_commit_queue);
+ pthread_mutex_destroy(&LOCK_prepare_ordered);
+ pthread_mutex_destroy(&LOCK_commit_ordered);
+ pthread_mutex_destroy(&LOCK_group_commit);
+ mutexes_inited= FALSE;
+ }
+
DBUG_RETURN(error);
}
@@ -1053,6 +1096,108 @@ ha_check_and_coalesce_trx_read_only(THD
return rw_ha_count;
}
+/*
+ Atomically enqueue a THD at the head of the queue of threads waiting to
+ group commit, and return the previous head of the queue.
+*/
+static THD *
+enqueue_atomic(THD *thd)
+{
+ my_atomic_rwlock_wrlock(&LOCK_group_commit_queue);
+ thd->next_commit_ordered= group_commit_queue;
+ while (!my_atomic_casptr((void **)(&group_commit_queue),
+ (void **)(&thd->next_commit_ordered),
+ thd))
+ ;
+ my_atomic_rwlock_wrunlock(&LOCK_group_commit_queue);
+ return thd->next_commit_ordered;
+}
+
+static THD *
+atomic_grab_reverse_queue()
+{
+ my_atomic_rwlock_wrlock(&LOCK_group_commit_queue);
+ THD *queue= group_commit_queue;
+ while (!my_atomic_casptr((void **)(&group_commit_queue),
+ (void **)(&queue),
+ NULL))
+ ;
+ my_atomic_rwlock_wrunlock(&LOCK_group_commit_queue);
+
+ /*
+ Since we enqueue at the head, the queue is actually in reverse order.
+ So reverse it back into correct commit order before returning.
+ */
+ THD *prev= NULL;
+ while (queue)
+ {
+ THD *next= queue->next_commit_ordered;
+ queue->next_commit_ordered= prev;
+ prev= queue;
+ queue= next;
+ }
+
+ return prev;
+}
+
+static void
+call_commit_ordered(Ha_trx_info *ha_info, THD *thd, bool all)
+{
+ for (; ha_info; ha_info= ha_info->next())
+ {
+ handlerton *ht= ha_info->ht();
+ if (!ht->commit_ordered)
+ continue;
+ ht->commit_ordered(ht, thd, all);
+ }
+}
+
+static void
+group_commit_wait_for_wakeup(THD *thd)
+{
+ pthread_mutex_lock(&thd->LOCK_commit_ordered);
+ while (!thd->group_commit_ready)
+ pthread_cond_wait(&thd->COND_commit_ordered,
+ &thd->LOCK_commit_ordered);
+ pthread_mutex_unlock(&thd->LOCK_commit_ordered);
+}
+
+static void
+group_commit_wakeup_other(THD *other_thd)
+{
+ pthread_mutex_lock(&other_thd->LOCK_commit_ordered);
+ other_thd->group_commit_ready= TRUE;
+ pthread_cond_signal(&other_thd->COND_commit_ordered);
+ pthread_mutex_unlock(&other_thd->LOCK_commit_ordered);
+}
+
+static bool group_commit_queue_busy= 0;
+
+static void
+group_commit_mark_queue_idle()
+{
+ pthread_mutex_lock(&LOCK_group_commit);
+ group_commit_queue_busy= FALSE;
+ pthread_cond_signal(&COND_group_commit);
+ pthread_mutex_unlock(&LOCK_group_commit);
+}
+
+static void
+group_commit_mark_queue_busy()
+{
+ safe_mutex_assert_owner(&LOCK_group_commit);
+ group_commit_queue_busy= TRUE;
+}
+
+static void
+group_commit_wait_queue_idle()
+{
+ /* Wait for any existing queue run to finish. */
+ safe_mutex_assert_owner(&LOCK_group_commit);
+ while (group_commit_queue_busy)
+ pthread_cond_wait(&COND_group_commit, &LOCK_group_commit);
+}
+
/**
@retval
@@ -1070,7 +1215,7 @@ ha_check_and_coalesce_trx_read_only(THD
*/
int ha_commit_trans(THD *thd, bool all)
{
- int error= 0, cookie= 0;
+ int error= 0;
/*
'all' means that this is either an explicit commit issued by
user, or an implicit commit issued by a DDL.
@@ -1085,7 +1230,10 @@ int ha_commit_trans(THD *thd, bool all)
*/
bool is_real_trans= all || thd->transaction.all.ha_list == 0;
Ha_trx_info *ha_info= trans->ha_list;
- my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
+ bool need_prepare_ordered, need_commit_ordered;
+ bool need_enqueue;
+ bool is_group_commit_leader;
+ my_xid xid;
DBUG_ENTER("ha_commit_trans");
/*
@@ -1118,85 +1266,277 @@ int ha_commit_trans(THD *thd, bool all)
DBUG_RETURN(2);
}
#ifdef USING_TRANSACTIONS
- if (ha_info)
+ if (!ha_info)
{
- uint rw_ha_count;
- bool rw_trans;
+ /* Free resources and perform other cleanup even for 'empty' transactions. */
+ if (is_real_trans)
+ thd->transaction.cleanup();
+ DBUG_RETURN(0);
+ }
- DBUG_EXECUTE_IF("crash_commit_before", abort(););
+ DBUG_EXECUTE_IF("crash_commit_before", abort(););
- /* Close all cursors that can not survive COMMIT */
- if (is_real_trans) /* not a statement commit */
- thd->stmt_map.close_transient_cursors();
+ /* Close all cursors that can not survive COMMIT */
+ if (is_real_trans) /* not a statement commit */
+ thd->stmt_map.close_transient_cursors();
- rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
- /* rw_trans is TRUE when we in a transaction changing data */
- rw_trans= is_real_trans && (rw_ha_count > 0);
+ uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
+ /* rw_trans is TRUE when we in a transaction changing data */
+ bool rw_trans= is_real_trans && (rw_ha_count > 0);
- if (rw_trans &&
- wait_if_global_read_lock(thd, 0, 0))
- {
- ha_rollback_trans(thd, all);
- DBUG_RETURN(1);
- }
+ if (rw_trans &&
+ wait_if_global_read_lock(thd, 0, 0))
+ {
+ ha_rollback_trans(thd, all);
+ DBUG_RETURN(1);
+ }
+
+ if (rw_trans &&
+ opt_readonly &&
+ !(thd->security_ctx->master_access & SUPER_ACL) &&
+ !thd->slave_thread)
+ {
+ my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
+ goto err;
+ }
- if (rw_trans &&
- opt_readonly &&
- !(thd->security_ctx->master_access & SUPER_ACL) &&
- !thd->slave_thread)
+ if (trans->no_2pc || (rw_ha_count <= 1))
+ {
+ error= ha_commit_one_phase(thd, all);
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+ }
+
+ need_prepare_ordered= FALSE;
+ need_commit_ordered= FALSE;
+ xid= thd->transaction.xid_state.xid.get_my_xid();
+
+ for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
+ {
+ int err;
+ handlerton *ht= hi->ht();
+ /*
+ Do not call two-phase commit if this particular
+ transaction is read-only. This allows for simpler
+ implementation in engines that are always read-only.
+ */
+ if (! hi->is_trx_read_write())
+ continue;
+ /*
+ Sic: we know that prepare() is not NULL since otherwise
+ trans->no_2pc would have been set.
+ */
+ if ((err= ht->prepare(ht, thd, all)))
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+ status_var_increment(thd->status_var.ha_prepare_count);
+
+ if (err)
+ goto err;
+
+ if (ht->prepare_ordered)
+ need_prepare_ordered= TRUE;
+ if (ht->commit_ordered)
+ need_commit_ordered= TRUE;
+ }
+ DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT(););
+
+ if (!is_real_trans)
+ {
+ error= commit_one_phase_2(thd, all, FALSE);
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+ }
+
+ /*
+ We can optimise away some of the thread synchronisation that may not be
+ needed.
+
+ If need_prepare_ordered, then we need to take LOCK_prepare_ordered.
+
+ If (xid && use_group_log_xid), then we need to enqueue (and this must
+ be done under LOCK_prepare_ordered if we take that lock).
+
+ Similarly, if (need_prepare_ordered && need_commit_ordered), then we
+ need to enqueue under the LOCK_prepare_ordered.
+
+ If (xid && use_group_log_xid), then we need to take LOCK_group_commit.
+
+ If need_commit_ordered, then we need to take LOCK_commit_ordered.
+
+ Cases not covered by above can be skipped to optimise things a bit.
+ */
+ need_enqueue= (xid && tc_log->use_group_log_xid) ||
+ (need_prepare_ordered && need_commit_ordered);
+
+ thd->group_commit_ready= FALSE;
+ thd->group_commit_all= all;
+ if (need_prepare_ordered)
+ {
+ pthread_mutex_lock(&LOCK_prepare_ordered);
+
+ for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
{
- my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
- ha_rollback_trans(thd, all);
- error= 1;
- goto end;
+ int err;
+ handlerton *ht= hi->ht();
+ if (! hi->is_trx_read_write())
+ continue;
+ if (ht->prepare_ordered && (err= ht->prepare_ordered(ht, thd, all)))
+ {
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
+ goto err;
+ }
}
+ }
+ if (need_enqueue)
+ {
+ THD *previous_queue= enqueue_atomic(thd);
+ is_group_commit_leader= (previous_queue == NULL);
+ }
+ if (need_prepare_ordered)
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
- if (!trans->no_2pc && (rw_ha_count > 1))
+ int cookie;
+ if (tc_log->use_group_log_xid)
+ {
+ if (is_group_commit_leader)
{
- for (; ha_info && !error; ha_info= ha_info->next())
+ pthread_mutex_lock(&LOCK_group_commit);
+ group_commit_wait_queue_idle();
+
+ THD *queue= atomic_grab_reverse_queue();
+ /* The first in the queue is the leader. */
+ DBUG_ASSERT(queue == thd);
+
+ /*
+ This will set individual error codes in each thd->xid_error, as
+ well as set thd->xid_cookie for later unlog() call.
+ */
+ tc_log->group_log_xid(queue);
+
+ /*
+ Call commit_ordered methods for all transactions in the queue
+ (that did not get an error in group_log_xid()).
+
+ We do this under an additional global LOCK_commit_ordered; this is
+ so that transactions that do not need 2-phase commit do not have
+ to wait for the potentially long duration of LOCK_group_commit.
+ */
+ if (need_commit_ordered)
{
- int err;
- handlerton *ht= ha_info->ht();
- /*
- Do not call two-phase commit if this particular
- transaction is read-only. This allows for simpler
- implementation in engines that are always read-only.
- */
- if (! ha_info->is_trx_read_write())
- continue;
- /*
- Sic: we know that prepare() is not NULL since otherwise
- trans->no_2pc would have been set.
- */
- if ((err= ht->prepare(ht, thd, all)))
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ for (THD *thd2= queue; thd2 != NULL; thd2= thd2->next_commit_ordered)
{
- my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
- error= 1;
+ if (!queue->xid_error)
+ call_commit_ordered(ha_info, thd2, thd2->group_commit_all);
}
- status_var_increment(thd->status_var.ha_prepare_count);
+ pthread_mutex_unlock(&LOCK_commit_ordered);
}
- DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_ABORT(););
- if (error || (is_real_trans && xid &&
- (error= !(cookie= tc_log->log_xid(thd, xid)))))
- {
- ha_rollback_trans(thd, all);
- error= 1;
- goto end;
- }
- DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT(););
+ pthread_mutex_unlock(&LOCK_group_commit);
+
+ /* Wake up everyone except ourself. */
+ while ((queue= queue->next_commit_ordered) != NULL)
+ group_commit_wakeup_other(queue);
+ }
+ else
+ {
+ /* If not leader, just wait until leader wakes us up. */
+ group_commit_wait_for_wakeup(thd);
+ }
+
+ /*
+ Now that we're back in our own thread context, do any delayed error
+ reporting.
+ */
+ if (thd->xid_error)
+ {
+ tc_log->xid_delayed_error(thd);
+ goto err;
+ }
+ cookie= thd->xid_cookie;
+ /* The cookie must be non-zero in the non-error case. */
+ DBUG_ASSERT(cookie);
+ }
+ else
+ {
+ if (xid)
+ cookie= tc_log->log_xid(thd, xid);
+
+ if (!need_enqueue)
+ {
+ error= commit_one_phase_2(thd, all, TRUE);
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+ }
+
+ /*
+ We only get here to do correctly sequenced prepare_ordered and
+ commit_ordered() calls.
+
+ In this case, we need to wait for the previous in queue to finish
+ commit_ordered before us to get the correct sequence.
+ */
+ DBUG_ASSERT(need_prepare_ordered && need_commit_ordered);
+
+ if (is_group_commit_leader)
+ {
+ pthread_mutex_lock(&LOCK_group_commit);
+ group_commit_wait_queue_idle();
+ THD *queue= atomic_grab_reverse_queue();
+ /*
+ Mark the queue busy while we bounce it from one thread to the
+ next.
+ */
+ group_commit_mark_queue_busy();
+ pthread_mutex_unlock(&LOCK_group_commit);
+
+ /* The first in the queue is the leader. */
+ DBUG_ASSERT(queue == thd);
}
- error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
- DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT(););
+ else
+ {
+ /* If not leader, just wait until previous thread wakes us up. */
+ group_commit_wait_for_wakeup(thd);
+ }
+
+ /* Only run commit_ordered() if log_xid was successful. */
if (cookie)
- tc_log->unlog(cookie, xid);
- DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
-end:
- if (rw_trans)
- start_waiting_global_read_lock(thd);
+ {
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ call_commit_ordered(ha_info, thd, all);
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+ }
+
+ THD *next= thd->next_commit_ordered;
+ if (next)
+ group_commit_wakeup_other(next);
+ else
+ group_commit_mark_queue_idle();
+
+ if (!cookie)
+ goto err;
}
- /* Free resources and perform other cleanup even for 'empty' transactions. */
- else if (is_real_trans)
- thd->transaction.cleanup();
+
+ DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_ABORT(););
+
+ error= commit_one_phase_2(thd, all, FALSE) ? 2 : 0;
+
+ DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_ABORT(););
+ DBUG_ASSERT(cookie);
+ tc_log->unlog(cookie, xid);
+
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_ABORT(););
+ goto end;
+
+ /* Come here if error and we need to rollback. */
+err:
+ if (!error)
+ error= 1;
+ ha_rollback_trans(thd, all);
+
+end:
+ if (rw_trans)
+ start_waiting_global_read_lock(thd);
#endif /* USING_TRANSACTIONS */
DBUG_RETURN(error);
}
@@ -1207,6 +1547,17 @@ end:
*/
int ha_commit_one_phase(THD *thd, bool all)
{
+ /*
+ When we come here, we did not call handler commit_ordered() methods in
+ ha_commit_trans() 2-phase commit, so we pass TRUE to do it in
+ commit_one_phase_2().
+ */
+ return commit_one_phase_2(thd, all, TRUE);
+}
+
+static int
+commit_one_phase_2(THD *thd, bool all, bool do_commit_ordered)
+{
int error=0;
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
/*
@@ -1218,10 +1569,40 @@ int ha_commit_one_phase(THD *thd, bool a
*/
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
- DBUG_ENTER("ha_commit_one_phase");
+ DBUG_ENTER("commit_one_phase_2");
#ifdef USING_TRANSACTIONS
if (ha_info)
{
+ if (is_real_trans && do_commit_ordered)
+ {
+ /*
+ If we did not do it already, call any commit_ordered() method.
+
+ Even though we do not need to keep any ordering with other threads
+ (as there is no prepare or log_xid for this commit), we still need to
+ do this under the LOCK_commit_ordered mutex to not run in parallel
+ with other commit_ordered calls.
+ */
+
+ bool locked= FALSE;
+
+ for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
+ {
+ handlerton *ht= hi->ht();
+ if (ht->commit_ordered)
+ {
+ if (!locked)
+ {
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ locked= 1;
+ }
+ ht->commit_ordered(ht, thd, all);
+ }
+ }
+ if (locked)
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+ }
+
for (; ha_info; ha_info= ha_info_next)
{
int err;
=== modified file 'sql/handler.h'
--- a/sql/handler.h 2010-01-14 16:51:00 +0000
+++ b/sql/handler.h 2010-05-26 08:13:32 +0000
@@ -656,9 +656,81 @@ struct handlerton
NOTE 'all' is also false in auto-commit mode where 'end of statement'
and 'real commit' mean the same event.
*/
- int (*commit)(handlerton *hton, THD *thd, bool all);
+ int (*commit)(handlerton *hton, THD *thd, bool all);
+ /*
+ The commit_ordered() method is called prior to the commit() method, after
+ the transaction manager has decided to commit (not rollback) the
+ transaction.
+
+ The calls to commit_ordered() in multiple parallel transactions is
+ guaranteed to happen in the same order in every participating
+ handler. This can be used to ensure the same commit order among multiple
+ handlers (eg. in table handler and binlog). So if transaction T1 calls
+ into commit_ordered() of handler A before T2, then T1 will also call
+ commit_ordered() of handler B before T2.
+
+ The intension is that commit_ordered() should do the minimal amount of
+ work that needs to happen in consistent commit order among handlers. To
+ preserve ordering, calls need to be serialised on a global mutex, so
+ doing any time-consuming or blocking operations in commit_ordered() will
+ limit scalability.
+
+ Handlers can rely on commit_ordered() calls being serialised (no two
+ calls can run in parallel, so no extra locking on the handler part is
+ required to ensure this).
+
+ Note that commit_ordered() can be called from a different thread than the
+ one handling the transaction! So it can not do anything that depends on
+ thread local storage, in particular it can not call my_error() and
+ friends (instead it can store the error code and delay the call to
+ my_error() to the commit() method).
+
+ Similarly, since commit_ordered() returns void, any return error code
+ must be saved and returned from the commit() method instead.
+
+ commit_ordered() is called only when actually committing a transaction
+ (autocommit or not), not when ending a statement in the middle of a
+ transaction.
+
+ The commit_ordered method is optional, and can be left unset if not
+ needed in a particular handler.
+ */
+ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
int (*rollback)(handlerton *hton, THD *thd, bool all);
int (*prepare)(handlerton *hton, THD *thd, bool all);
+ /*
+ The prepare_ordered method is optional. If set, it will be called after
+ successful prepare() in all handlers participating in 2-phase commit.
+
+ The calls to prepare_ordered() among multiple parallel transactions are
+ ordered consistently with calls to commit_ordered(). This means that
+ calls to prepare_ordered() effectively define the commit order, and that
+ each handler will see the same sequence of transactions calling into
+ prepare_ordered() and commit_ordered().
+
+ Thus, prepare_ordered() can be used to define commit order for handlers
+ that need to do this in the prepare step (like binlog). It can also be
+ used to release transactions locks early in an order consistent with the
+ order transactions will be eventually committed.
+
+ Like commit_ordered(), prepare_ordered() calls are serialised to maintain
+ ordering, so the intension is that they should execute fast, with only
+ the minimal amount of work needed to define commit order. Handlers can
+ rely on this serialisation, and do not need to do any extra locking to
+ avoid two prepare_ordered() calls running in parallel.
+
+ Unlike commit_ordered(), prepare_ordered() _is_ guaranteed to be called
+ in the context of the thread handling the rest of the transaction.
+
+ Note that for user-level XA SQL commands, no consistent ordering among
+ prepare_ordered() and commit_ordered() is guaranteed (as that would
+ require blocking all other commits for an indefinite time).
+
+ prepare_ordered() is called only when actually committing a transaction
+ (autocommit or not), not when ending a statement in the middle of a
+ transaction.
+ */
+ int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
int (*recover)(handlerton *hton, XID *xid_list, uint len);
int (*commit_by_xid)(handlerton *hton, XID *xid);
int (*rollback_by_xid)(handlerton *hton, XID *xid);
=== modified file 'sql/log.cc'
--- a/sql/log.cc 2010-04-06 22:47:08 +0000
+++ b/sql/log.cc 2010-05-26 08:13:32 +0000
@@ -154,9 +154,12 @@ class binlog_trx_data {
public:
binlog_trx_data()
: at_least_one_stmt_committed(0), incident(FALSE), m_pending(0),
- before_stmt_pos(MY_OFF_T_UNDEF)
+ before_stmt_pos(MY_OFF_T_UNDEF), using_xa(0)
{
trans_log.end_of_file= max_binlog_cache_size;
+ (void) my_pthread_mutex_init(&LOCK_group_commit, MY_MUTEX_INIT_SLOW,
+ "LOCK_group_commit", MYF(0));
+ (void) pthread_cond_init(&COND_group_commit, 0);
}
~binlog_trx_data()
@@ -208,11 +211,12 @@ public:
completely.
*/
void reset() {
- if (!empty())
+ if (trans_log.type != WRITE_CACHE || !empty())
truncate(0);
before_stmt_pos= MY_OFF_T_UNDEF;
incident= FALSE;
trans_log.end_of_file= max_binlog_cache_size;
+ using_xa= FALSE;
DBUG_ASSERT(empty());
}
@@ -257,6 +261,41 @@ public:
Binlog position before the start of the current statement.
*/
my_off_t before_stmt_pos;
+
+ /* 0 or error when writing to binlog; set during group commit. */
+ int error;
+ /* If error != 0, value of errno (for my_error() reporting). */
+ int commit_errno;
+ /* Link for queueing transactions up for group commit to binlog. */
+ binlog_trx_data *next;
+ /*
+ Flag set true when group commit for this transaction is finished; used
+ with pthread_cond_wait() to wait until commit is done.
+ This flag is protected by LOCK_group_commit.
+ */
+ bool done;
+ /*
+ Flag set if this transaction is the group commit leader that will handle
+ the actual writing to the binlog.
+ This flag is protected by LOCK_group_commit.
+ */
+ bool group_commit_leader;
+ /*
+ Flag set true if this transaction is committed with log_xid() as part of
+ XA, false if not.
+ */
+ bool using_xa;
+ /*
+ Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
+ written during group commit. The incident_event is only valid if
+ has_incident() is true.
+ */
+ Log_event *begin_event;
+ Log_event *end_event;
+ Log_event *incident_event;
+ /* Mutex and condition for wakeup after group commit. */
+ pthread_mutex_t LOCK_group_commit;
+ pthread_cond_t COND_group_commit;
};
handlerton *binlog_hton;
@@ -1391,117 +1430,188 @@ static int binlog_close_connection(handl
return 0;
}
+/* Helper functions for binlog_flush_trx_cache(). */
+static int
+binlog_flush_trx_cache_prepare(THD *thd)
+{
+ if (thd->binlog_flush_pending_rows_event(TRUE))
+ return 1;
+ return 0;
+}
+
+static void
+binlog_flush_trx_cache_finish(THD *thd, binlog_trx_data *trx_data)
+{
+ IO_CACHE *trans_log= &trx_data->trans_log;
+
+ trx_data->reset();
+
+ statistic_increment(binlog_cache_use, &LOCK_status);
+ if (trans_log->disk_writes != 0)
+ {
+ statistic_increment(binlog_cache_disk_use, &LOCK_status);
+ trans_log->disk_writes= 0;
+ }
+}
+
/*
- End a transaction.
+ End a transaction, writing events to the binary log.
SYNOPSIS
- binlog_end_trans()
+ binlog_flush_trx_cache()
thd The thread whose transaction should be ended
trx_data Pointer to the transaction data to use
- end_ev The end event to use, or NULL
- all True if the entire transaction should be ended, false if
- only the statement transaction should be ended.
+ end_ev The end event to use (COMMIT, ROLLBACK, or commit XID)
DESCRIPTION
End the currently open transaction. The transaction can be either
- a real transaction (if 'all' is true) or a statement transaction
- (if 'all' is false).
+ a real transaction or a statement transaction.
- If 'end_ev' is NULL, the transaction is a rollback of only
- transactional tables, so the transaction cache will be truncated
- to either just before the last opened statement transaction (if
- 'all' is false), or reset completely (if 'all' is true).
+ This can be to commit a transaction, with a COMMIT query event or an XA
+ commit XID event. But it can also be to rollback a transaction with a
+ ROLLBACK query event, used for rolling back transactions which also
+ contain updates to non-transactional tables.
*/
static int
-binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
- Log_event *end_ev, bool all)
+binlog_flush_trx_cache(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev)
{
- DBUG_ENTER("binlog_end_trans");
- int error=0;
- IO_CACHE *trans_log= &trx_data->trans_log;
- DBUG_PRINT("enter", ("transaction: %s end_ev: 0x%lx",
- all ? "all" : "stmt", (long) end_ev));
+ DBUG_ENTER("binlog_flush_trx_cache");
DBUG_PRINT("info", ("thd->options={ %s%s}",
FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
FLAGSTR(thd->options, OPTION_BEGIN)));
+ if (binlog_flush_trx_cache_prepare(thd))
+ DBUG_RETURN(1);
+
/*
- NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
- only transactional tables. If the transaction contain changes to
- any non-transactiona tables, we need write the transaction and log
- a ROLLBACK last.
- */
- if (end_ev != NULL)
- {
- if (thd->binlog_flush_pending_rows_event(TRUE))
- DBUG_RETURN(1);
- /*
- Doing a commit or a rollback including non-transactional tables,
- i.e., ending a transaction where we might write the transaction
- cache to the binary log.
-
- We can always end the statement when ending a transaction since
- transactions are not allowed inside stored functions. If they
- were, we would have to ensure that we're not ending a statement
- inside a stored function.
- */
- error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev,
- trx_data->has_incident());
- trx_data->reset();
+ Doing a commit or a rollback including non-transactional tables,
+ i.e., ending a transaction where we might write the transaction
+ cache to the binary log.
+
+ We can always end the statement when ending a transaction since
+ transactions are not allowed inside stored functions. If they
+ were, we would have to ensure that we're not ending a statement
+ inside a stored function.
+ */
+ int error= mysql_bin_log.write_transaction_to_binlog(thd, trx_data, end_ev);
- /*
- We need to step the table map version after writing the
- transaction cache to disk.
- */
- mysql_bin_log.update_table_map_version();
- statistic_increment(binlog_cache_use, &LOCK_status);
- if (trans_log->disk_writes != 0)
- {
- statistic_increment(binlog_cache_disk_use, &LOCK_status);
- trans_log->disk_writes= 0;
- }
- }
- else
- {
- /*
- If rolling back an entire transaction or a single statement not
- inside a transaction, we reset the transaction cache.
+ binlog_flush_trx_cache_finish(thd, trx_data);
- If rolling back a statement in a transaction, we truncate the
- transaction cache to remove the statement.
- */
- thd->binlog_remove_pending_rows_event(TRUE);
- if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
- {
- if (trx_data->has_incident())
- error= mysql_bin_log.write_incident(thd, TRUE);
- trx_data->reset();
- }
- else // ...statement
- trx_data->truncate(trx_data->before_stmt_pos);
+ DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL);
+ DBUG_RETURN(error);
+}
- /*
- We need to step the table map version on a rollback to ensure
- that a new table map event is generated instead of the one that
- was written to the thrown-away transaction cache.
- */
- mysql_bin_log.update_table_map_version();
+/*
+ Discard a transaction, ie. ROLLBACK with only transactional table updates.
+
+ SYNOPSIS
+ binlog_truncate_trx_cache()
+
+ thd The thread whose transaction should be ended
+ trx_data Pointer to the transaction data to use
+ all True if the entire transaction should be ended, false if
+ only the statement transaction should be ended.
+
+ DESCRIPTION
+
+ Rollback (and end) a transaction that only modifies transactional
+ tables. The transaction can be either a real transaction (if 'all' is
+ true) or a statement transaction (if 'all' is false).
+
+ The transaction cache will be truncated to either just before the last
+ opened statement transaction (if 'all' is false), or reset completely (if
+ 'all' is true).
+ */
+static int
+binlog_truncate_trx_cache(THD *thd, binlog_trx_data *trx_data, bool all)
+{
+ DBUG_ENTER("binlog_truncate_trx_cache");
+ int error= 0;
+ DBUG_PRINT("enter", ("transaction: %s", all ? "all" : "stmt"));
+ DBUG_PRINT("info", ("thd->options={ %s%s}",
+ FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
+ FLAGSTR(thd->options, OPTION_BEGIN)));
+
+ /*
+ ROLLBACK with nothing to replicate: i.e., rollback of only transactional
+ tables.
+ */
+
+ /*
+ If rolling back an entire transaction or a single statement not
+ inside a transaction, we reset the transaction cache.
+
+ If rolling back a statement in a transaction, we truncate the
+ transaction cache to remove the statement.
+ */
+ thd->binlog_remove_pending_rows_event(TRUE);
+ if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+ {
+ if (trx_data->has_incident())
+ error= mysql_bin_log.write_incident(thd);
+ trx_data->reset();
}
+ else // ...statement
+ trx_data->truncate(trx_data->before_stmt_pos);
DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL);
DBUG_RETURN(error);
}
+static LEX_STRING const write_error_msg=
+ { C_STRING_WITH_LEN("error writing to the binary log") };
+
static int binlog_prepare(handlerton *hton, THD *thd, bool all)
{
/*
- do nothing.
- just pretend we can do 2pc, so that MySQL won't
- switch to 1pc.
- real work will be done in MYSQL_BIN_LOG::log_xid()
+ If this prepare is for a single statement in the middle of a transactions,
+ not the actual transaction commit, then we do nothing. The real work is
+ only done later, in the prepare for making persistent changes.
*/
+ if (!all && (thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+ return 0;
+
+ binlog_trx_data *trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+
+ trx_data->using_xa= TRUE;
+
+ if (binlog_flush_trx_cache_prepare(thd))
+ return 1;
+
+ my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
+ if (!xid)
+ {
+ /* Skip logging this transaction, marked by setting end_event to NULL. */
+ trx_data->end_event= NULL;
+ return 0;
+ }
+
+ /*
+ Allocate the extra events that will be logged to the binlog in binlog group
+ commit. Use placement new to allocate them on the THD memroot, as they need
+ to remain live until log_xid() returns.
+ */
+ size_t needed_size= sizeof(Query_log_event) + sizeof(Xid_log_event);
+ if (trx_data->has_incident())
+ needed_size+= sizeof(Incident_log_event);
+ uchar *mem= (uchar *)thd->alloc(needed_size);
+ if (!mem)
+ return 1;
+
+ trx_data->begin_event= new ((void *)mem)
+ Query_log_event(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+ mem+= sizeof(Query_log_event);
+
+ trx_data->end_event= new ((void *)mem) Xid_log_event(thd, xid);
+
+ if (trx_data->has_incident())
+ trx_data->incident_event= new ((void *)(mem + sizeof(Xid_log_event)))
+ Incident_log_event(thd, INCIDENT_LOST_EVENTS, write_error_msg);
+
return 0;
}
@@ -1525,11 +1635,11 @@ static int binlog_commit(handlerton *hto
binlog_trx_data *const trx_data=
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
- if (trx_data->empty())
+ if (trx_data->using_xa)
{
// we're here because trans_log was flushed in MYSQL_BIN_LOG::log_xid()
- trx_data->reset();
- DBUG_RETURN(0);
+ binlog_flush_trx_cache_finish(thd, trx_data);
+ DBUG_RETURN(error);
}
/*
@@ -1556,8 +1666,8 @@ static int binlog_commit(handlerton *hto
!stmt_has_updated_trans_table(thd) &&
thd->transaction.stmt.modified_non_trans_table))
{
- Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
- error= binlog_end_trans(thd, trx_data, &qev, all);
+ Query_log_event end_ev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
+ error= binlog_flush_trx_cache(thd, trx_data, &end_ev);
}
trx_data->at_least_one_stmt_committed = my_b_tell(&trx_data->trans_log) > 0;
@@ -1621,7 +1731,7 @@ static int binlog_rollback(handlerton *h
(thd->options & OPTION_KEEP_LOG)) &&
mysql_bin_log.check_write_error(thd))
trx_data->set_incident();
- error= binlog_end_trans(thd, trx_data, 0, all);
+ error= binlog_truncate_trx_cache(thd, trx_data, all);
}
else
{
@@ -1641,8 +1751,8 @@ static int binlog_rollback(handlerton *h
thd->current_stmt_binlog_row_based) ||
((thd->options & OPTION_KEEP_LOG)))
{
- Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
- error= binlog_end_trans(thd, trx_data, &qev, all);
+ Query_log_event end_ev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
+ error= binlog_flush_trx_cache(thd, trx_data, &end_ev);
}
/*
Otherwise, we simply truncate the cache as there is no change on
@@ -1650,7 +1760,7 @@ static int binlog_rollback(handlerton *h
*/
else if ((all && !thd->transaction.all.modified_non_trans_table) ||
(!all && !thd->transaction.stmt.modified_non_trans_table))
- error= binlog_end_trans(thd, trx_data, 0, all);
+ error= binlog_truncate_trx_cache(thd, trx_data, all);
}
if (!all)
trx_data->before_stmt_pos = MY_OFF_T_UNDEF; // part of the stmt rollback
@@ -2464,7 +2574,7 @@ const char *MYSQL_LOG::generate_name(con
MYSQL_BIN_LOG::MYSQL_BIN_LOG()
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
- need_start_event(TRUE), m_table_map_version(0),
+ need_start_event(TRUE),
is_relay_log(0),
description_event_for_exec(0), description_event_for_queue(0)
{
@@ -2477,6 +2587,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG()
index_file_name[0] = 0;
bzero((char*) &index_file, sizeof(index_file));
bzero((char*) &purge_index_file, sizeof(purge_index_file));
+ use_group_log_xid= TRUE;
}
/* this is called only once */
@@ -2492,6 +2603,7 @@ void MYSQL_BIN_LOG::cleanup()
delete description_event_for_exec;
(void) pthread_mutex_destroy(&LOCK_log);
(void) pthread_mutex_destroy(&LOCK_index);
+ (void) pthread_mutex_destroy(&LOCK_queue);
(void) pthread_cond_destroy(&update_cond);
}
DBUG_VOID_RETURN;
@@ -2520,6 +2632,8 @@ void MYSQL_BIN_LOG::init_pthread_objects
*/
(void) my_pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW, "LOCK_index",
MYF_NO_DEADLOCK_DETECTION);
+ (void) my_pthread_mutex_init(&LOCK_queue, MY_MUTEX_INIT_FAST, "LOCK_queue",
+ MYF(0));
(void) pthread_cond_init(&update_cond, 0);
}
@@ -4113,7 +4227,6 @@ int THD::binlog_write_table_map(TABLE *t
DBUG_RETURN(error);
binlog_table_maps++;
- table->s->table_map_version= mysql_bin_log.table_map_version();
DBUG_RETURN(0);
}
@@ -4194,64 +4307,41 @@ MYSQL_BIN_LOG::flush_and_set_pending_row
if (Rows_log_event* pending= trx_data->pending())
{
- IO_CACHE *file= &log_file;
-
/*
Decide if we should write to the log file directly or to the
transaction log.
*/
if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
- file= &trx_data->trans_log;
-
- /*
- If we are writing to the log file directly, we could avoid
- locking the log. This does not work since we need to step the
- m_table_map_version below, and that change has to be protected
- by the LOCK_log mutex.
- */
- pthread_mutex_lock(&LOCK_log);
-
- /*
- Write pending event to log file or transaction cache
- */
- if (pending->write(file))
{
- pthread_mutex_unlock(&LOCK_log);
- set_write_error(thd);
- DBUG_RETURN(1);
+ /* Write to transaction log/cache. */
+ if (pending->write(&trx_data->trans_log))
+ {
+ set_write_error(thd);
+ DBUG_RETURN(1);
+ }
}
-
- /*
- We step the table map version if we are writing an event
- representing the end of a statement. We do this regardless of
- wheather we write to the transaction cache or to directly to the
- file.
-
- In an ideal world, we could avoid stepping the table map version
- if we were writing to a transaction cache, since we could then
- reuse the table map that was written earlier in the transaction
- cache. This does not work since STMT_END_F implies closing all
- table mappings on the slave side.
-
- TODO: Find a solution so that table maps does not have to be
- written several times within a transaction.
- */
- if (pending->get_flags(Rows_log_event::STMT_END_F))
- ++m_table_map_version;
-
- delete pending;
-
- if (file == &log_file)
+ else
{
+ /* Write directly to log file. */
+ pthread_mutex_lock(&LOCK_log);
+ if (pending->write(&log_file))
+ {
+ pthread_mutex_unlock(&LOCK_log);
+ set_write_error(thd);
+ DBUG_RETURN(1);
+ }
+
error= flush_and_sync();
if (!error)
{
signal_update();
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
+
+ pthread_mutex_unlock(&LOCK_log);
}
- pthread_mutex_unlock(&LOCK_log);
+ delete pending;
}
thd->binlog_set_pending_rows_event(event);
@@ -4450,9 +4540,6 @@ err:
set_write_error(thd);
}
- if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
- ++m_table_map_version;
-
pthread_mutex_unlock(&LOCK_log);
DBUG_RETURN(error);
}
@@ -4575,18 +4662,14 @@ uint MYSQL_BIN_LOG::next_file_id()
SYNOPSIS
write_cache()
cache Cache to write to the binary log
- lock_log True if the LOCK_log mutex should be aquired, false otherwise
- sync_log True if the log should be flushed and sync:ed
DESCRIPTION
Write the contents of the cache to the binary log. The cache will
be reset as a READ_CACHE to be able to read the contents from it.
*/
-int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
+int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache)
{
- Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
-
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
return ER_ERROR_ON_WRITE;
uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
@@ -4697,6 +4780,7 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE
}
/* Write data to the binary log file */
+ DBUG_EXECUTE_IF("fail_binlog_write_1", return ER_ERROR_ON_WRITE;);
if (my_b_write(&log_file, cache->read_pos, length))
return ER_ERROR_ON_WRITE;
cache->read_pos=cache->read_end; // Mark buffer used up
@@ -4704,9 +4788,6 @@ int MYSQL_BIN_LOG::write_cache(IO_CACHE
DBUG_ASSERT(carry == 0);
- if (sync_log)
- flush_and_sync();
-
return 0; // All OK
}
@@ -4739,26 +4820,22 @@ int query_error_code(THD *thd, bool not_
return error;
}
-bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
+bool MYSQL_BIN_LOG::write_incident(THD *thd)
{
uint error= 0;
DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
- LEX_STRING const write_error_msg=
- { C_STRING_WITH_LEN("error writing to the binary log") };
Incident incident= INCIDENT_LOST_EVENTS;
Incident_log_event ev(thd, incident, write_error_msg);
- if (lock)
- pthread_mutex_lock(&LOCK_log);
+
+ pthread_mutex_lock(&LOCK_log);
error= ev.write(&log_file);
- if (lock)
+ if (!error && !(error= flush_and_sync()))
{
- if (!error && !(error= flush_and_sync()))
- {
- signal_update();
- rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
- }
- pthread_mutex_unlock(&LOCK_log);
+ signal_update();
+ rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
+ pthread_mutex_unlock(&LOCK_log);
+
DBUG_RETURN(error);
}
@@ -4786,103 +4863,364 @@ bool MYSQL_BIN_LOG::write_incident(THD *
'cache' needs to be reinitialized after this functions returns.
*/
-bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
- bool incident)
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev)
{
- DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
+ DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
+
+ /*
+ Create the necessary events here, where we have the correct THD (and
+ thread context).
+
+ Due to group commit the actual writing to binlog may happen in a different
+ thread.
+ */
+ Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+ trx_data->begin_event= &qinfo;
+ trx_data->end_event= end_ev;
+ if (trx_data->has_incident())
+ {
+ Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, write_error_msg);
+ trx_data->incident_event= &inc_ev;
+ DBUG_RETURN(write_transaction_to_binlog_events(trx_data));
+ }
+ else
+ {
+ trx_data->incident_event= NULL;
+ DBUG_RETURN(write_transaction_to_binlog_events(trx_data));
+ }
+}
+
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog_events(binlog_trx_data *trx_data)
+{
+ /*
+ To facilitate group commit for the binlog, we first queue up ourselves in
+ the group commit queue. Then the first thread to enter the queue waits for
+ the LOCK_log mutex, and commits for everyone in the queue once it gets the
+ lock. Any other threads in the queue just wait for the first one to finish
+ the commit and wake them up.
+ */
+
+ pthread_mutex_lock(&trx_data->LOCK_group_commit);
+ const binlog_trx_data *orig_queue= atomic_enqueue_trx(trx_data);
+
+ if (orig_queue != NULL)
+ {
+ trx_data->group_commit_leader= FALSE;
+ trx_data->done= FALSE;
+ trx_group_commit_participant(trx_data);
+ }
+ else
+ {
+ trx_data->group_commit_leader= TRUE;
+ pthread_mutex_unlock(&trx_data->LOCK_group_commit);
+ trx_group_commit_leader(NULL);
+ }
+
+ return trx_group_commit_finish(trx_data);
+}
+
+/*
+ Participate as secondary transaction in group commit.
+
+ Another thread is already waiting to obtain the LOCK_log, and should include
+ this thread in the group commit once the log is obtained. So here we put
+ ourself in the queue and wait to be signalled that the group commit is done.
+
+ Note that this function must be called with the trs_data->LOCK_group_commit
+ locked; the mutex will be released before return.
+*/
+void
+MYSQL_BIN_LOG::trx_group_commit_participant(binlog_trx_data *trx_data)
+{
+ safe_mutex_assert_owner(&trx_data->LOCK_group_commit);
+
+ /* Wait until trx_data.done == true and woken up by the leader. */
+ while (!trx_data->done)
+ pthread_cond_wait(&trx_data->COND_group_commit,
+ &trx_data->LOCK_group_commit);
+ pthread_mutex_unlock(&trx_data->LOCK_group_commit);
+}
+
+bool
+MYSQL_BIN_LOG::trx_group_commit_finish(binlog_trx_data *trx_data)
+{
+ if (trx_data->error)
+ {
+ switch (trx_data->error)
+ {
+ case ER_ERROR_ON_WRITE:
+ my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, trx_data->commit_errno);
+ break;
+ case ER_ERROR_ON_READ:
+ my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
+ trx_data->trans_log.file_name, trx_data->commit_errno);
+ break;
+ default:
+ /*
+ There are not (and should not be) any errors thrown not covered above.
+ But just in case one is added later without updating the above switch
+ statement, include a catch-all.
+ */
+ my_printf_error(trx_data->error,
+ "Error writing transaction to binary log: %d",
+ MYF(ME_NOREFRESH), trx_data->error);
+ }
+
+ /*
+ Since we return error, this transaction XID will not be committed, so
+ we need to mark it as not needed for recovery (unlog() is not called
+ for a transaction if log_xid() fails).
+ */
+ if (trx_data->end_event->get_type_code() == XID_EVENT)
+ mark_xid_done();
+
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ Do binlog group commit as the lead thread.
+
+ This must be called when this thread/transaction is queued at the start of
+ the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
+ commit all the transactions in the queue (more may have entered while waiting
+ for LOCK_log). After commit is done, all other threads in the queue will be
+ signalled.
+
+ */
+void
+MYSQL_BIN_LOG::trx_group_commit_leader(THD *first_thd)
+{
+ uint xid_count= 0;
+ uint write_count= 0;
+
+ /* First, put anything from group_log_xid into the queue. */
+ binlog_trx_data *full_queue= NULL;
+ binlog_trx_data **next_ptr= &full_queue;
+ for (THD *thd= first_thd; thd; thd= thd->next_commit_ordered)
+ {
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+
+ /* Skip log_xid for transactions without xid, marked by NULL end_event. */
+ if (!trx_data->end_event)
+ continue;
+
+ trx_data->error= 0;
+ *next_ptr= trx_data;
+ next_ptr= &(trx_data->next);
+ }
+
+ /*
+ Next, lock the LOCK_log(), and once we get it, add any additional writes
+ that queued up while we were waiting.
+
+ Note that if some writer not going through log_xid() comes in and gets the
+ LOCK_log before us, they will not be able to include us in their group
+ commit (and they are not able to handle ensuring same commit order between
+ us and participating transactional storage engines anyway).
+
+ On the other hand, when we get the LOCK_log, we will be able to include
+ any non-trasactional writes that queued up in our group commit. This
+ should hopefully not be too big of a problem, as group commit is most
+ important for the transactional case anyway when durability (fsync) is
+ enabled.
+ */
VOID(pthread_mutex_lock(&LOCK_log));
- /* NULL would represent nothing to replicate after ROLLBACK */
- DBUG_ASSERT(commit_event != NULL);
+ /*
+ As the queue is in reverse order of entering, reverse the queue as we add
+ it to the existing one. Note that there is no ordering defined between
+ transactional and non-transactional commits.
+ */
+ binlog_trx_data *current= atomic_grab_trx_queue();
+ binlog_trx_data *xtra_queue= NULL;
+ while (current)
+ {
+ current->error= 0;
+ binlog_trx_data *next= current->next;
+ current->next= xtra_queue;
+ xtra_queue= current;
+ current= next;
+ }
+ *next_ptr= xtra_queue;
+ /*
+ Now we have in full_queue the list of transactions to be committed in
+ order.
+ */
DBUG_ASSERT(is_open());
if (likely(is_open())) // Should always be true
{
/*
- We only bother to write to the binary log if there is anything
- to write.
- */
- if (my_b_tell(cache) > 0)
+ Commit every transaction in the queue.
+
+ Note that we are doing this in a different thread than the one running
+ the transaction! So we are limited in the operations we can do. In
+ particular, we cannot call my_error() on behalf of a transaction, as
+ that obtains the THD from thread local storage. Instead, we must set
+ current->error and let the thread do the error reporting itself once
+ we wake it up.
+ */
+ for (current= full_queue; current != NULL; current= current->next)
{
- /*
- Log "BEGIN" at the beginning of every transaction. Here, a
- transaction is either a BEGIN..COMMIT block or a single
- statement in autocommit mode.
- */
- Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+ IO_CACHE *cache= ¤t->trans_log;
/*
- Now this Query_log_event has artificial log_pos 0. It must be
- adjusted to reflect the real position in the log. Not doing it
- would confuse the slave: it would prevent this one from
- knowing where he is in the master's binlog, which would result
- in wrong positions being shown to the user, MASTER_POS_WAIT
- undue waiting etc.
+ We only bother to write to the binary log if there is anything
+ to write.
*/
- if (qinfo.write(&log_file))
- goto err;
-
- DBUG_EXECUTE_IF("crash_before_writing_xid",
- {
- if ((write_error= write_cache(cache, false, true)))
- DBUG_PRINT("info", ("error writing binlog cache: %d",
- write_error));
- DBUG_PRINT("info", ("crashing before writing xid"));
- abort();
- });
-
- if ((write_error= write_cache(cache, false, false)))
- goto err;
+ if (my_b_tell(cache) > 0)
+ {
+ current->error= write_transaction(current);
+ if (current->error)
+ current->commit_errno= errno;
- if (commit_event && commit_event->write(&log_file))
- goto err;
+ write_count++;
+ }
- if (incident && write_incident(thd, FALSE))
- goto err;
+ if (current->end_event->get_type_code() == XID_EVENT)
+ xid_count++;
+ }
+ if (write_count > 0)
+ {
if (flush_and_sync())
- goto err;
- DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_ABORT(););
- if (cache->error) // Error on read
{
- sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
- write_error=1; // Don't give more errors
- goto err;
+ for (current= full_queue; current != NULL; current= current->next)
+ {
+ if (!current->error)
+ {
+ current->error= ER_ERROR_ON_WRITE;
+ current->commit_errno= errno;
+ }
+ }
+ }
+ else
+ {
+ signal_update();
}
- signal_update();
}
/*
- if commit_event is Xid_log_event, increase the number of
+ if any commit_events are Xid_log_event, increase the number of
prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
if there're prepared xids in it - see the comment in new_file() for
an explanation.
- If the commit_event is not Xid_log_event (then it's a Query_log_event)
- rotate binlog, if necessary.
+ If no Xid_log_events (then it's all Query_log_event) rotate binlog,
+ if necessary.
*/
- if (commit_event && commit_event->get_type_code() == XID_EVENT)
+ if (xid_count > 0)
{
- pthread_mutex_lock(&LOCK_prep_xids);
- prepared_xids++;
- pthread_mutex_unlock(&LOCK_prep_xids);
+ mark_xids_active(xid_count);
}
else
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
+
VOID(pthread_mutex_unlock(&LOCK_log));
- DBUG_RETURN(0);
+ /*
+ Signal those that are not part of group_log_xid, and are not group leaders
+ running the queue.
-err:
- if (!write_error)
+ Since a group leader runs the queue itself if a group_log_xid does not get
+ to do it forst, such leader threads do not need wait or wakeup.
+ */
+ for (current= xtra_queue; current != NULL; current= current->next)
{
- write_error= 1;
- sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+ /*
+ Note that we need to take LOCK_group_commit even in the case of a leader!
+
+ Otherwise there is a race between setting and testing the
+ group_commit_leader flag.
+ */
+ pthread_mutex_lock(¤t->LOCK_group_commit);
+ if (!current->group_commit_leader)
+ {
+ current->done= true;
+ pthread_cond_signal(¤t->COND_group_commit);
+ }
+ pthread_mutex_unlock(¤t->LOCK_group_commit);
}
- VOID(pthread_mutex_unlock(&LOCK_log));
- DBUG_RETURN(1);
}
+int
+MYSQL_BIN_LOG::write_transaction(binlog_trx_data *trx_data)
+{
+ IO_CACHE *cache= &trx_data->trans_log;
+ /*
+ Log "BEGIN" at the beginning of every transaction. Here, a transaction is
+ either a BEGIN..COMMIT block or a single statement in autocommit mode. The
+ event was constructed in write_transaction_to_binlog(), in the thread
+ running the transaction.
+
+ Now this Query_log_event has artificial log_pos 0. It must be
+ adjusted to reflect the real position in the log. Not doing it
+ would confuse the slave: it would prevent this one from
+ knowing where he is in the master's binlog, which would result
+ in wrong positions being shown to the user, MASTER_POS_WAIT
+ undue waiting etc.
+ */
+ if (trx_data->begin_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+
+ DBUG_EXECUTE_IF("crash_before_writing_xid",
+ {
+ if ((write_cache(cache)))
+ DBUG_PRINT("info", ("error writing binlog cache"));
+ else
+ flush_and_sync();
+
+ DBUG_PRINT("info", ("crashing before writing xid"));
+ abort();
+ });
+
+ if (write_cache(cache))
+ return ER_ERROR_ON_WRITE;
+
+ if (trx_data->end_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+
+ if (trx_data->has_incident() && trx_data->incident_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+
+ if (cache->error) // Error on read
+ return ER_ERROR_ON_READ;
+
+ return 0;
+}
+
+binlog_trx_data *
+MYSQL_BIN_LOG::atomic_enqueue_trx(binlog_trx_data *trx_data)
+{
+ my_atomic_rwlock_wrlock(&LOCK_queue);
+ trx_data->next= group_commit_queue;
+ while (!my_atomic_casptr((void **)(&group_commit_queue),
+ (void **)(&trx_data->next),
+ trx_data))
+ ;
+ my_atomic_rwlock_wrunlock(&LOCK_queue);
+ return trx_data->next;
+}
+
+binlog_trx_data *
+MYSQL_BIN_LOG::atomic_grab_trx_queue()
+{
+ my_atomic_rwlock_wrlock(&LOCK_queue);
+ binlog_trx_data *queue= group_commit_queue;
+ while (!my_atomic_casptr((void **)(&group_commit_queue),
+ (void **)(&queue),
+ NULL))
+ ;
+ my_atomic_rwlock_wrunlock(&LOCK_queue);
+ return queue;
+}
/**
Wait until we get a signal that the binary log has been updated.
@@ -5879,9 +6217,6 @@ void TC_LOG_BINLOG::close()
}
/**
- @todo
- group commit
-
@retval
0 error
@retval
@@ -5889,19 +6224,83 @@ void TC_LOG_BINLOG::close()
*/
int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
{
- DBUG_ENTER("TC_LOG_BINLOG::log");
- Xid_log_event xle(thd, xid);
- binlog_trx_data *trx_data=
- (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+ int error;
+ DBUG_ENTER("TC_LOG_BINLOG::log_xid");
+
+ thd->next_commit_ordered= 0;
+ group_log_xid(thd);
+ if (thd->xid_error)
+ error= xid_delayed_error(thd);
+ else
+ error= 0;
+
/*
- We always commit the entire transaction when writing an XID. Also
- note that the return value is inverted.
- */
- DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle, TRUE));
+ Note that the return value is inverted: zero on failure, private non-zero
+ 'cookie' on success.
+ */
+ DBUG_RETURN(!error);
}
-void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+/*
+ Do a binlog log_xid() for a group of transactions, linked through
+ thd->next_commit_ordered.
+*/
+void
+TC_LOG_BINLOG::group_log_xid(THD *first_thd)
+{
+ DBUG_ENTER("TC_LOG_BINLOG::group_log_xid");
+ trx_group_commit_leader(first_thd);
+ for (THD *thd= first_thd; thd; thd= thd->next_commit_ordered)
+ {
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+ thd->xid_error= trx_data->error;
+ thd->xid_cookie= !trx_data->error;
+ }
+ DBUG_VOID_RETURN;
+}
+
+int
+TC_LOG_BINLOG::xid_delayed_error(THD *thd)
{
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+ return trx_group_commit_finish(trx_data);
+}
+
+/*
+ After an XID is logged, we need to hold on to the current binlog file until
+ it is fully committed in the storage engine. The reason is that crash
+ recovery only looks at the latest binlog, so we must make sure there are no
+ outstanding prepared (but not committed) transactions before rotating the
+ binlog.
+
+ To handle this, we keep a count of outstanding XIDs. This function is used
+ to increase this count when committing one or more transactions to the
+ binary log.
+*/
+void
+TC_LOG_BINLOG::mark_xids_active(uint xid_count)
+{
+ DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
+ DBUG_PRINT("info", ("xid_count=%u", xid_count));
+ pthread_mutex_lock(&LOCK_prep_xids);
+ prepared_xids+= xid_count;
+ pthread_mutex_unlock(&LOCK_prep_xids);
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Once an XID is committed, it is safe to rotate the binary log, as it can no
+ longer be needed during crash recovery.
+
+ This function is called to mark an XID this way. It needs to decrease the
+ count of pending XIDs, and signal the log rotator thread when it reaches zero.
+*/
+void
+TC_LOG_BINLOG::mark_xid_done()
+{
+ DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
pthread_mutex_lock(&LOCK_prep_xids);
DBUG_ASSERT(prepared_xids > 0);
if (--prepared_xids == 0) {
@@ -5909,7 +6308,16 @@ void TC_LOG_BINLOG::unlog(ulong cookie,
pthread_cond_signal(&COND_prep_xids);
}
pthread_mutex_unlock(&LOCK_prep_xids);
- rotate_and_purge(0); // as ::write() did not rotate
+ DBUG_VOID_RETURN;
+}
+
+void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+{
+ DBUG_ENTER("TC_LOG_BINLOG::unlog");
+ if (xid)
+ mark_xid_done();
+ rotate_and_purge(0); // as ::write_transaction_to_binlog() did not rotate
+ DBUG_VOID_RETURN;
}
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
=== modified file 'sql/log.h'
--- a/sql/log.h 2009-12-04 14:40:42 +0000
+++ b/sql/log.h 2010-05-26 08:13:32 +0000
@@ -28,13 +28,49 @@ class TC_LOG
{
public:
int using_heuristic_recover();
- TC_LOG() {}
+ /* True if we should use group_log_xid(), false to use log_xid(). */
+ bool use_group_log_xid;
+
+ TC_LOG() : use_group_log_xid(0) {}
virtual ~TC_LOG() {}
virtual int open(const char *opt_name)=0;
virtual void close()=0;
virtual int log_xid(THD *thd, my_xid xid)=0;
virtual void unlog(ulong cookie, my_xid xid)=0;
+ /*
+ If use_group_log_xid is true, then this method is used instead of
+ log_xid() to do logging of a group of transactions all at once.
+
+ The transactions will be linked through THD::next_commit_ordered.
+
+ Additionally, when this method is used instead of log_xid(), the order in
+ which handler->prepare_ordered() and handler->commit_ordered() are called
+ is guaranteed to be the same as the order of calls and THD list elements
+ for group_log_xid().
+
+ This can be used to efficiently implement group commit that at the same
+ time preserves the order of commits among handlers and TC (eg. to get same
+ commit order in InnoDB and binary log).
+
+ For TCs that do not need this, it can be preferable to use plain log_xid()
+ instead, as it allows threads to run log_xid() in parallel with each
+ other. In contrast, group_log_xid() runs under a global mutex, so it is
+ guaranteed that only once call into it will be active at once.
+
+ Since this call handles multiple threads/THDs at once, my_error() (and
+ other code that relies on thread local storage) cannot be used in this
+ method. Instead, in case of error, thd->xid_error should be set to the
+ error code, and xid_delayed_error() will be called later in the correct
+ thread context to actually report the error.
+
+ In the success case, this method must set thd->xid_cookie for each thread
+ to the cookie that is normally returned from log_xid() (which must be
+ non-zero in the non-error case).
+ */
+ virtual void group_log_xid(THD *first_thd) { DBUG_ASSERT(FALSE); }
+ /* Error reporting for group_log_xid(). */
+ virtual int xid_delayed_error(THD *thd) { DBUG_ASSERT(FALSE); return 0; }
};
class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
@@ -227,12 +263,19 @@ private:
time_t last_time;
};
+class binlog_trx_data;
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
{
private:
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
pthread_mutex_t LOCK_index;
pthread_mutex_t LOCK_prep_xids;
+ /*
+ Mutex to protect the queue of transactions waiting to participate in group
+ commit. (Only used on platforms without native atomic operations).
+ */
+ pthread_mutex_t LOCK_queue;
+
pthread_cond_t COND_prep_xids;
pthread_cond_t update_cond;
ulonglong bytes_written;
@@ -271,8 +314,8 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
In 5.0 it's 0 for relay logs too!
*/
bool no_auto_events;
-
- ulonglong m_table_map_version;
+ /* Queue of transactions queued up to participate in group commit. */
+ binlog_trx_data *group_commit_queue;
int write_to_file(IO_CACHE *cache);
/*
@@ -282,6 +325,14 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
*/
void new_file_without_locking();
void new_file_impl(bool need_lock);
+ int write_transaction(binlog_trx_data *trx_data);
+ bool write_transaction_to_binlog_events(binlog_trx_data *trx_data);
+ void trx_group_commit_participant(binlog_trx_data *trx_data);
+ void trx_group_commit_leader(THD *first_thd);
+ binlog_trx_data *atomic_enqueue_trx(binlog_trx_data *trx_data);
+ binlog_trx_data *atomic_grab_trx_queue();
+ void mark_xid_done();
+ void mark_xids_active(uint xid_count);
public:
MYSQL_LOG::generate_name;
@@ -311,17 +362,11 @@ public:
int open(const char *opt_name);
void close();
int log_xid(THD *thd, my_xid xid);
+ int xid_delayed_error(THD *thd);
+ void group_log_xid(THD *first_thd);
void unlog(ulong cookie, my_xid xid);
int recover(IO_CACHE *log, Format_description_log_event *fdle);
#if !defined(MYSQL_CLIENT)
- bool is_table_mapped(TABLE *table) const
- {
- return table->s->table_map_version == table_map_version();
- }
-
- ulonglong table_map_version() const { return m_table_map_version; }
- void update_table_map_version() { ++m_table_map_version; }
-
int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event);
int remove_pending_rows_event(THD *thd);
@@ -362,10 +407,12 @@ public:
void new_file();
bool write(Log_event* event_info); // binary log write
- bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
- bool write_incident(THD *thd, bool lock);
+ bool write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev);
+ bool trx_group_commit_finish(binlog_trx_data *trx_data);
+ bool write_incident(THD *thd);
- int write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
+ int write_cache(IO_CACHE *cache);
void set_write_error(THD *thd);
bool check_write_error(THD *thd);
=== modified file 'sql/log_event.h'
--- a/sql/log_event.h 2010-03-04 08:03:07 +0000
+++ b/sql/log_event.h 2010-05-26 08:13:32 +0000
@@ -463,10 +463,9 @@ struct sql_ex_info
#define LOG_EVENT_SUPPRESS_USE_F 0x8
/*
- The table map version internal to the log should be increased after
- the event has been written to the binary log.
+ This used to be LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F, but is now unused.
*/
-#define LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F 0x10
+#define LOG_EVENT_UNUSED1_F 0x10
/**
@def LOG_EVENT_ARTIFICIAL_F
=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc 2010-01-15 15:27:55 +0000
+++ b/sql/sql_class.cc 2010-05-26 08:13:32 +0000
@@ -673,6 +673,8 @@ THD::THD()
active_vio = 0;
#endif
pthread_mutex_init(&LOCK_thd_data, MY_MUTEX_INIT_FAST);
+ pthread_mutex_init(&LOCK_commit_ordered, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&COND_commit_ordered, 0);
/* Variables with default values */
proc_info="login";
@@ -3773,7 +3775,6 @@ int THD::binlog_flush_pending_rows_event
if (stmt_end)
{
pending->set_flags(Rows_log_event::STMT_END_F);
- pending->flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
binlog_table_maps= 0;
}
@@ -3901,7 +3902,6 @@ int THD::binlog_query(THD::enum_binlog_q
{
Query_log_event qinfo(this, query_arg, query_len, is_trans, suppress_use,
errcode);
- qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
/*
Binlog table maps will be irrelevant after a Query_log_event
(they are just removed on the slave side) so after the query
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2010-03-30 12:36:49 +0000
+++ b/sql/sql_class.h 2010-05-26 08:13:32 +0000
@@ -1438,6 +1438,21 @@ public:
/* container for handler's private per-connection data */
Ha_data ha_data[MAX_HA];
+ /* Mutex and condition for waking up threads after group commit. */
+ pthread_mutex_t LOCK_commit_ordered;
+ pthread_cond_t COND_commit_ordered;
+ bool group_commit_ready;
+ /* Pointer for linking THDs into queue waiting for group commit. */
+ THD *next_commit_ordered;
+ /*
+ The "all" parameter of commit(), to communicate it to the thread that
+ calls commit_ordered().
+ */
+ bool group_commit_all;
+ /* Set by TC_LOG::group_log_xid(), to return per-thd error and cookie. */
+ int xid_error;
+ int xid_cookie;
+
#ifndef MYSQL_CLIENT
int binlog_setup_trx_data();
=== modified file 'sql/sql_load.cc'
--- a/sql/sql_load.cc 2010-03-04 08:03:07 +0000
+++ b/sql/sql_load.cc 2010-05-26 08:13:32 +0000
@@ -516,7 +516,6 @@ int mysql_load(THD *thd,sql_exchange *ex
else
{
Delete_file_log_event d(thd, db, transactional_table);
- d.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
(void) mysql_bin_log.write(&d);
}
}
@@ -698,7 +697,6 @@ static bool write_execute_load_query_log
(duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE :
(ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR),
transactional_table, FALSE, errcode);
- e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F;
return mysql_bin_log.write(&e);
}
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-03-10 10:32:14 +0000
+++ b/sql/table.cc 2010-05-26 08:13:32 +0000
@@ -297,13 +297,6 @@ TABLE_SHARE *alloc_table_share(TABLE_LIS
share->version= refresh_version;
/*
- This constant is used to mark that no table map version has been
- assigned. No arithmetic is done on the value: it will be
- overwritten with a value taken from MYSQL_BIN_LOG.
- */
- share->table_map_version= ~(ulonglong)0;
-
- /*
Since alloc_table_share() can be called without any locking (for
example, ha_create_table... functions), we do not assign a table
map id here. Instead we assign a value that is not used
@@ -367,10 +360,9 @@ void init_tmp_table_share(THD *thd, TABL
share->frm_version= FRM_VER_TRUE_VARCHAR;
/*
- Temporary tables are not replicated, but we set up these fields
+ Temporary tables are not replicated, but we set up this fields
anyway to be able to catch errors.
*/
- share->table_map_version= ~(ulonglong)0;
share->cached_row_logging_check= -1;
/*
=== modified file 'sql/table.h'
--- a/sql/table.h 2010-02-10 19:06:24 +0000
+++ b/sql/table.h 2010-05-26 08:13:32 +0000
@@ -433,7 +433,6 @@ typedef struct st_table_share
bool waiting_on_cond; /* Protection against free */
bool deleting; /* going to delete this table */
ulong table_map_id; /* for row-based replication */
- ulonglong table_map_version;
/*
Cache for row-based replication table share checks that does not
=== modified file 'storage/xtradb/handler/ha_innodb.cc'
--- a/storage/xtradb/handler/ha_innodb.cc 2010-01-15 21:12:30 +0000
+++ b/storage/xtradb/handler/ha_innodb.cc 2010-05-26 08:13:32 +0000
@@ -138,8 +138,6 @@ bool check_global_access(THD *thd, ulong
/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
@@ -239,6 +237,7 @@ static const char* innobase_change_buffe
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -1356,7 +1355,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1416,8 +1414,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -2024,6 +2020,7 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
@@ -2492,7 +2489,6 @@ skip_overwrite:
innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&analyze_mutex, MY_MUTEX_INIT_FAST);
@@ -2547,7 +2543,6 @@ innobase_end(
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
pthread_mutex_destroy(&commit_threads_m);
pthread_mutex_destroy(&commit_cond_m);
pthread_mutex_destroy(&analyze_mutex);
@@ -2681,6 +2676,101 @@ innobase_start_trx_and_assign_read_view(
}
/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ if (trx->active_trans == 0
+ && trx->conc_state != TRX_NOT_STARTED) {
+ /* We throw an error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+ /* Since we will reserve the kernel mutex, we have to release
+ the search system latch first to obey the latching order. */
+
+ if (trx->has_search_latch) {
+ trx_search_latch_release_if_reserved(trx);
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ pthread_cond_wait(&commit_cond,
+ &commit_cond_m);
+ pthread_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ /* The following calls to read the MySQL binary log
+ file name and the position return consistent results:
+ 1) We use commit_ordered() to get same commit order
+ in InnoDB as in binary log.
+ 2) A MySQL log file rotation cannot happen because
+ MySQL protects against this by having a counter of
+ transactions in prepared state and it only allows
+ a rotation when the counter drops to zero. See
+ LOCK_prep_xids and COND_prep_xids in log.cc. */
+ trx->mysql_log_file_name = mysql_bin_log_file_name();
+ trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ pthread_cond_signal(&commit_cond);
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@return 0 */
@@ -2702,13 +2792,6 @@ innobase_commit(
trx = check_trx_exists(thd);
- /* Since we will reserve the kernel mutex, we have to release
- the search system latch first to obey the latching order. */
-
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
/* The flag trx->active_trans is set to 1 in
1. ::external_lock(),
@@ -2736,62 +2819,8 @@ innobase_commit(
/* We were instructed to commit the whole transaction, or
this is an SQL statement end and autocommit is on */
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
- }
-
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
-
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
trx->active_trans = 0;
@@ -4621,6 +4650,7 @@ no_commit:
no need to re-acquire locks on it. */
/* Altering to InnoDB format */
+ innobase_commit_ordered(ht, user_thd, 1);
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
prebuilt->trx->active_trans = 1;
@@ -4637,6 +4667,7 @@ no_commit:
/* Commit the transaction. This will release the table
locks, so they have to be acquired again. */
+ innobase_commit_ordered(ht, user_thd, 1);
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
prebuilt->trx->active_trans = 1;
@@ -8339,6 +8370,7 @@ ha_innobase::external_lock(
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
if (trx->active_trans != 0) {
+ innobase_commit_ordered(ht, thd, TRUE);
innobase_commit(ht, thd, TRUE);
}
} else {
@@ -9448,36 +9480,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
- if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
- /* choose group commit rather than binlog order */
- return(error);
- }
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
return(error);
}
@@ -10669,11 +10671,6 @@ static MYSQL_SYSVAR_ENUM(adaptive_checkp
"Enable/Disable flushing along modified age. (none, reflex, [estimate])",
NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib);
-static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
- PLUGIN_VAR_RQCMDARG,
- "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
- NULL, NULL, 0, 0, 1, 0);
-
static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
PLUGIN_VAR_RQCMDARG,
"Enable/Disable converting automatically *.ibd files when import tablespace.",
@@ -10763,7 +10760,6 @@ static struct st_mysql_sys_var* innobase
MYSQL_SYSVAR(flush_neighbor_pages),
MYSQL_SYSVAR(read_ahead),
MYSQL_SYSVAR(adaptive_checkpoint),
- MYSQL_SYSVAR(enable_unsafe_group_commit),
MYSQL_SYSVAR(expand_import),
MYSQL_SYSVAR(extra_rsegments),
MYSQL_SYSVAR(dict_size_limit),
1
0

[Maria-developers] Rev 2792: fixed problem with subselect_debug.test in file:///home/bell/maria/bzr/work-maria-5.3-scache/
by sanja@askmonty.org 25 May '10
by sanja@askmonty.org 25 May '10
25 May '10
At file:///home/bell/maria/bzr/work-maria-5.3-scache/
------------------------------------------------------------
revno: 2792
revision-id: sanja(a)askmonty.org-20100525182914-z3zeviggq9026x1n
parent: sanja(a)askmonty.org-20100525125457-5rwbiihh0vtghdrj
committer: sanja(a)askmonty.org
branch nick: work-maria-5.3-scache
timestamp: Tue 2010-05-25 21:29:14 +0300
message:
fixed problem with subselect_debug.test
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-05-25 12:54:57 +0000
+++ b/sql/item_subselect.cc 2010-05-25 18:29:14 +0000
@@ -1201,7 +1201,8 @@
if (exec())
{
reset();
- DBUG_RETURN(NULL);
+ str->set((ulonglong)0,&my_charset_bin);
+ DBUG_RETURN(str);
}
if (scache)
@@ -1244,7 +1245,8 @@
if (exec())
{
reset();
- DBUG_RETURN(0);
+ int2my_decimal(E_DEC_FATAL_ERROR, 0, 0, decimal_value);
+ DBUG_RETURN(decimal_value);
}
if (scache)
1
0

25 May '10
Hi!
There was that in case of errors Item::val_str() and Item::val_decimal()
can return NULL (0) but I found some parts of the code do not allow it,
for example Item_sum_sum::add().
Should we revise val_* methods so that always retun something (for
example subquery EXISTS was changed (and even we have
subselect_debug.test about it), but single row subquery was not) or
change places where val_str() and val_decimal() that it will allow NULL (0).
1
0

[Maria-developers] [Branch ~maria-captains/maria/5.1] Rev 2858: Fix a couple of problems in the pack script, and disable a check feature that doesn't work right now
by noreply@launchpad.net 25 May '10
by noreply@launchpad.net 25 May '10
25 May '10
------------------------------------------------------------
revno: 2858
committer: Bo Thorsen <bo(a)askmonty.org>
branch nick: trunk-work
timestamp: Tue 2010-05-25 16:56:35 +0200
message:
Fix a couple of problems in the pack script, and disable a check feature that doesn't work right now
modified:
win/make_mariadb_win_dist
--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0

[Maria-developers] Updated (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 49
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 13:19)=-=-
Low Level Design modified.
--- /tmp/wklog.116.old.14255 2010-05-25 13:19:00.000000000 +0000
+++ /tmp/wklog.116.new.14255 2010-05-25 13:19:00.000000000 +0000
@@ -1 +1,363 @@
+1. Changes for ha_commit_trans()
+
+The gut of the code for commit is in the function ha_commit_trans() (and in
+commit_one_phase() which is called from it). This must be extended to use the
+new prepare_ordered(), group_log_xid(), and commit_ordered() calls.
+
+1.1 Atomic queue of committing transactions
+
+To keep the right commit order among participants, we put transactions into a
+queue. The operations on the queue are non-locking:
+
+ - Insert THD at the head of the queue, and return old queue.
+
+ THD *enqueue_atomic(THD *thd)
+
+ - Fetch (and delete) the whole queue.
+
+ THD *atomic_grab_reverse_queue()
+
+These are simple to implement with atomic compare-and-set. Note that there is
+no ABA problem [2], as we do not delete individual elements from the queue, we
+grab the whole queue and replace it with NULL.
+
+A transaction enters the queue when it does prepare_ordered(). This way, the
+scheduling order for prepare_ordered() calls is what determines the sequence
+in the queue and effectively the commit order.
+
+The queue is grabbed by the code doing group_log_xid() and commit_ordered()
+calls. The queue is passed directly to group_log_xid(), and afterwards
+iterated to do individual commit_ordered() calls.
+
+Using a lock-free queue allows prepare_ordered() (for one transaction) to run
+in parallel with commit_ordered (in another transaction), increasing potential
+parallelism.
+
+The queue is simply a linked list of THD objects, linked through a
+THD::next_commit_ordered field. Since we add at the head of the queue, the
+list is actually in reverse order, so must be reversed when we grab and delete
+it.
+
+The reason that enqueue_atomic() returns the old queue is so that we can check
+if an insert goes to the head of the queue. The thread at the head of the
+queue will do the sequential part of group commit for everyone.
+
+
+1.2 Locks
+
+1.2.1 Global LOCK_prepare_ordered
+
+This lock is taken to serialise calls to prepare_ordered(). Note that
+effectively, the commit order is decided by the order in which threads obtain
+this lock.
+
+
+1.2.2 Global LOCK_group_commit and COND_group_commit
+
+This lock is used to protect the serial part of group commit. It is taken
+around the code where we grab the queue, call group_log_xid() on the queue,
+and call commit_ordered() on each element of the queue, to make sure they
+happen serialised and in consistent order. It also protects the variable
+group_commit_queue_busy, which is used when not using group_log_xid() to delay
+running over a new queue until the first queue is completely done.
+
+
+1.2.3 Global LOCK_commit_ordered
+
+This lock is taken around calls to commit_ordered(), to ensure they happen
+serialised.
+
+
+1.2.4 Per-thread thd->LOCK_commit_ordered and thd->COND_commit_ordered
+
+This lock protects the thd->group_commit_ready variable, as well as the
+condition variable used to wake up threads after log_xid() and
+commit_ordered() finishes.
+
+
+1.2.5 Global LOCK_group_commit_queue
+
+This is only used on platforms with no native compare-and-set operations, to
+make the queue operations atomic.
+
+
+1.3 Commit algorithm.
+
+This is the basic algorithm, simplified by
+
+ - omitting some error handling
+
+ - omitting looping over all handlers when invoking handler methods
+
+ - omitting some possible optimisations when not all calls needed (see next
+ section).
+
+ - Omitting the case where no group_log_xid() is used, see below.
+
+---- BEGIN ALGORITHM ----
+ ht->prepare()
+
+ // Call prepare_ordered() and enqueue in correct commit order
+ lock(LOCK_prepare_ordered)
+ ht->prepare_ordered()
+ old_queue= enqueue_atomic(thd)
+ thd->group_commit_ready= FALSE
+ is_group_commit_leader= (old_queue == NULL)
+ unlock(LOCK_prepare_ordered)
+
+ if (is_group_commit_leader)
+
+ // The first in queue handles group commit for everyone
+
+ lock(LOCK_group_commit)
+ // Wait while queue is busy, see below for when this occurs
+ while (group_commit_queue_busy)
+ cond_wait(COND_group_commit)
+
+ // Grab and reverse the queue to get correct order of transactions
+ queue= atomic_grab_reverse_queue()
+
+ // This call will set individual error codes in thd->xid_error
+ // It also sets the cookie for unlog() in thd->xid_cookie
+ group_log_xid(queue)
+
+ lock(LOCK_commit_ordered)
+ for (other IN queue)
+ if (!other->xid_error)
+ ht->commit_ordered()
+ unlock(LOCK_commit_ordered)
+
+ unlock(LOCK_group_commit)
+
+ // Now we are done, so wake up all the others.
+ for (other IN TAIL(queue))
+ lock(other->LOCK_commit_ordered)
+ other->group_commit_ready= TRUE
+ cond_signal(other->COND_commit_ordered)
+ unlock(other->LOCK_commit_ordered)
+ else
+ // If not the leader, just wait until leader did the work for us.
+ lock(thd->LOCK_commit_ordered)
+ while (!thd->group_commit_ready)
+ cond_wait(thd->LOCK_commit_ordered, thd->COND_commit_ordered)
+ unlock(other->LOCK_commit_ordered)
+
+ // Finally do any error reporting now that we're back in own thread.
+ if (thd->xid_error)
+ xid_delayed_error(thd)
+ else
+ ht->commit(thd)
+ unlog(thd->xid_cookie, thd->xid)
+---- END ALGORITHM ----
+
+If the transaction coordinator does not support group_log_xid(), we have to do
+things differently. In this case after the serialisation point at
+prepare_ordered(), we have to parallelise again when running log_xid()
+(otherwise we would loose group commit). But then when log_xid() is done, we
+have to serialise again to check for any error and call commit_ordered() in
+correct sequence for any transaction where log_xid() did not return error.
+
+The central part of the algorithm in this case (when using log_xid()) is:
+
+---- BEGIN ALGORITHM ----
+ cookie= log_xid(thd)
+ error= (cookie == 0)
+
+ if (is_group_commit_leader)
+
+ // The first to enqueue grabs the queue and runs first.
+ // But we must wait until a previous queue run is fully done.
+
+ lock(LOCK_group_commit)
+ while (group_commit_queue_busy)
+ cond_wait(COND_group_commit)
+ queue= atomic_grab_reverse_queue()
+ // The queue will be busy until last thread in it is done.
+ group_commit_queue_busy= TRUE
+ unlock(LOCK_group_commit)
+ else
+ // Not first in queue -> wait for previous one to wake us up.
+ lock(thd->LOCK_commit_ordered)
+ while (!thd->group_commit_ready)
+ cond_wait(thd->LOCK_commit_ordered, thd->COND_commit_ordered)
+ unlock(other->LOCK_commit_ordered)
+
+ if (!error) // Only if log_xid() was successful
+ lock(LOCK_commit_ordered)
+ ht->commit_ordered()
+ unlock(LOCK_commit_ordered)
+
+ // Wake up the next thread, and release queue in last.
+ next= thd->next_commit_ordered
+
+ if (next)
+ lock(next->LOCK_commit_ordered)
+ next->group_commit_ready= TRUE
+ cond_signal(next->COND_commit_ordered)
+ unlock(next->LOCK_commit_ordered)
+ else
+ lock(LOCK_group_commit)
+ group_commit_queue_busy= FALSE
+ unlock(LOCK_group_commit)
+---- END ALGORITHM ----
+
+There are a number of locks taken in the algorithm, but in the group_log_xid()
+case most of them should be uncontended most of the time. The
+LOCK_group_commit of course will be contended, as new threads queue up waiting
+for the previous group commit (and binlog fsync()) to finish so they can do
+the next group commit. This is the whole point of implementing group commit.
+
+The LOCK_prepare_ordered and LOCK_commit_ordered mutexes should be not much
+contended as long as handlers follow the intension of having the corresponding
+handler calls execute quickly.
+
+The per-thread LOCK_commit_ordered mutexes should not be contended; they are
+only used to wake up a sleeping thread.
+
+
+1.4 Optimisations when not using all three new calls
+
+
+The prepare_ordered(), group_log_xid(), and commit_ordered() methods are
+optional, and if not implemented by a particular handler/transaction
+coordinator, we can optimise the algorithm to take advantage of not having to
+keep ordering for the missing parts.
+
+If there is no prepare_ordered(), then we need not take the
+LOCK_prepare_ordered mutex.
+
+If there is no commit_ordered(), then we need not take the LOCK_commit_ordered
+mutex.
+
+If there is no group_log_xid(), then we only need the queue to ensure same
+ordering of transactions for commit_ordered() as for prepare_ordered(). Thus,
+if either of these (or both) are also not present, we do not need to use the
+queue at all.
+
+
+2. Binlog code changes (log.cc)
+
+
+The bulk of the work needed for the binary log is to extend the code to allow
+group commit to the log. Unlike InnoDB/XtraDB, there is no existing support
+inside the binlog code for group commit.
+
+The existing code runs most of the write + fsync to the binary lock under the
+global LOCK_log mutex, preventing any group commit.
+
+To enable group commit, this code must be split into two parts:
+
+ - one part that runs per transaction, re-writing the embedded event positions
+ for the correct offset, and writing this into the in-memory log cache.
+
+ - another part that writes a set of transactions to the disk, and runs
+ fsync().
+
+Then in group_log_xid(), we can run the first part in a loop over all the
+transactions in the passed-in queue, and run the second part only once.
+
+The binlog code also has other code paths that write into the binlog,
+eg. non-transactional statements. These have to be adapted also to work with
+the new code.
+
+In order to get some group commit facility for these also, we change that part
+of the code in a similar way to ha_commit_trans. We keep another,
+binlog-internal queue of such non-transactional binlog writes, and such writes
+queue up here before sleeping on the LOCK_log mutex. Once a thread obtains the
+LOCK_log, it loops over the queue for the fast part, and does the slow part
+once, then finally wakes up the others in the queue.
+
+In the transactional case in group_log_xid(), before we run the passed-in
+queue, we add any members found in the binlog-internal queue. This allows
+these non-transactional writes to share the group commit.
+
+However, in the case where it is a non-transactional write that gets the
+LOCK_log, the transactional transactions from the ha_commit_trans() queue will
+not be able to take part (they will have to wait for their turn to do another
+fsync). It seems difficult to cleanly let the binlog code grab the queue from
+out of the ha_commit_trans() algorithm. I think the group commit is mostly
+useful in transactional workloads anyway (non-transactional engines will loose
+data anyway in case of crash, so why fsync() after each transaction?)
+
+
+3. XtraDB changes (ha_innodb.cc)
+
+The changes needed in XtraDB are comparatively simple, as XtraDB already
+implements group commit, it just needs to be enabled with the new
+commit_ordered() call.
+
+The existing commit() method already is logically in two parts. The first part
+runs under the prepare_commit_mutex() and must be run in same order as binlog
+commit. This part needs to be moved to commit_ordered(). The second part runs
+after releasing prepare_commit_mutex and does transaction log write+fsync; it
+can remain.
+
+Then the prepare_commit_mutex is removed (and the enable_unsafe_group_commit
+XtraDB option to disable it).
+
+There are two asserts that check that the thread running the first part of
+XtraDB commit is the same as the thread running the other operations for the
+transaction. These have to be removed (as commit_ordered() can run in a
+different thread). Also an error reporting with sql_print_error() has to be
+delayed until commit() time.
+
+
+4. Proof-of-concept implementation
+
+There is a proof-of-concept implementation of this architecture, in the form
+of a quilt patch series [3].
+
+A quick benchmark was done, with sync_binlog=1 and
+innodb_flush_log_at_trx_commit=1. 64 parallel threads doing single-row
+transactions against one table.
+
+Without the patch, we get only 25 queries per second.
+
+With the patch, we get 650 queries per second.
+
+
+5. Open issues/tasks
+
+5.1 XA / other prepare() and commit() call sites.
+
+Check that user-level XA is handled correctly and working. And covered
+sufficiently with tests. Also check that any other calls of ha->prepare() and
+ha->commit() outside of ha_commit_trans() are handled correctly.
+
+5.2 Testing
+
+This worklog needs additions to the test suite, including error inserts to
+check error handling, and synchronisation points to check thread parallelism
+correctness.
+
+
+6. Alternative implementations
+
+ - The binlog code maintains its own extra atomic transaction queue to handle
+ non-transactional commits in a good way together with transactional (with
+ respect to group commit). Alternatively, we could ignore this issue and
+ just give up on group commit for non-transactional statements, for some
+ code simplifications.
+
+ - The binlog code has two ways to prepare end_event and similar, one that
+ uses stack-allocation, and another for when stack allocation is not
+ possible that uses thd->mem_root. Probably the overhead of thd->mem_root is
+ so small that it would make sense to use the same code for both cases.
+
+ - Instead of adding extra fields to THD, we could allocate a separate
+ structure on the thd->mem_root() with the required extra fields (including
+ the THD pointer). Would seem to require initialising mutexes at every
+ commit though.
+
+ - It would probably be a good idea to implement TC_LOG_MMAP::group_log_xid()
+ (should not be hard).
+
+
+-----------------------------------------------------------------------
+
+References:
+
+[2] https://secure.wikimedia.org/wikipedia/en/wiki/ABA_problem
+
+[3] https://knielsen-hq.org/maria/patches.mwl116/
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High-Level Specification modified.
--- /tmp/wklog.116.old.14249 2010-05-25 13:18:34.000000000 +0000
+++ /tmp/wklog.116.new.14249 2010-05-25 13:18:34.000000000 +0000
@@ -1 +1,157 @@
+The basic idea in group commit is that multiple threads, each handling one
+transaction, prepare for commit and then queue up together waiting to do an
+fsync() on the transaction log. Then once the log is available, a single
+thread does the fsync() + other necessary book-keeping for all of the threads
+at once. After this, the single thread signals the other threads that it's
+done and they can finish up and return success (or failure) from the commit
+operation.
+
+So group commit has a parallel part, and a sequential part. So we need a
+facility for engines/binlog to participate in both the parallel and the
+sequential part.
+
+To do this, we add two new handlerton methods:
+
+ int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
+ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
+
+The idea is that the existing prepare() and commit() methods run in the
+parallel part of group commit, and the new prepare_ordered() and
+commit_ordered() run in the sequential part.
+
+The prepare_ordered() method is called after prepare(). The order of
+tranctions that call into prepare_ordered() is guaranteed to be the same among
+all storage engines and binlog, and it is serialised so no two calls can be
+running inside the same engine at the same time.
+
+The commit_ordered() method is called before commit(), and similarly is
+guaranteed to have same transaction order in all participants, and to be
+serialised within one engine.
+
+As the prepare_ordered() and commit_ordered() calls are serialised, the idea
+is that handlers should do the minimum amount of work needed in these calls,
+relaying most of the work (eg. fsync() ...) to prepare() and commit().
+
+As a concrete example, for InnoDB the commit_ordered() method will do the
+first part of commit that fixed the commit order in the transaction log
+buffer, and the commit() method will write the log to disk and fsync()
+it. This split already exists inside the InnoDB code, running before
+respectively after releasing the prepare_commit_mutex.
+
+In addition, the XA transaction coordinator (TC_LOG) is special, since it is
+the one responsible for deciding whether to commit or rollback the
+transaction. For this we need an extra method, since this decision can be done
+only after we know that all prepare() and prepare_ordered() calls succeed, and
+must be done to know whether to call commit_ordered()/commit(), or do rollback.
+
+The existing method for this is TC_LOG::log_xid(). To make implementing group
+commit simpler to implement in a transaction coordinator and more efficient,
+we introduce a new method:
+
+ void group_log_xid(THD *first_thd);
+
+This method runs in the sequential part of group commit. It receives a list of
+transactions to perform log_xid() on, in the correct commit order. (Note that
+TC_LOG can do parallel parts of group commit in its own prepare() and commit()
+methods).
+
+This method can make it easier to implement the group commit in TC_LOG, as it
+gets directly the list of transactions in the right order. Without it, it
+might need to compute such order anyway in a prepare_ordered() method, and the
+server has to create this ordered list anyway to implement the order guarantee
+for prepare_ordered() and commit_ordered().
+
+This group_log_xid() method also is more efficient, as it avoids some
+inter-thread synchronisation. Since group_log_xid() is serialised, we can run
+it together with all the commit_ordered() method calls and need only a single
+sequential code section. With the log_xid() methods, we would need first a
+sequential part for the prepare_ordered() calls, then a parallel part with
+log_xid() calls (to not loose group commit ability for log_xid()), then again
+a sequential part for the commit_ordered() method calls.
+
+The extra synchronisation is needed, as each commit_ordered() call will have
+to wait for log_xid() in one thread (if log_xid() fails then commit_ordered()
+should not be called), and also wait for commit_ordered() to finish in all
+threads handling earlier commits. In effect we will need to bounce the
+execution from one thread to the other among all participants in the group
+commit.
+
+As a consequence of the group_log_xid() optimisation, handlers must be aware
+that the commit_ordered() call can happen in another thread than the one
+running commit() (so thread local storage is not available). This should not
+be a big issue as the THD is available for storing any needed information.
+
+Since group_log_xid() runs for multiple transactions in a single thread, it
+can not do error reporting (my_error()) as that relies on thread local
+storage. Instead it sets an error code in THD::xid_error, and if there is an
+error then later another method will be called (in correct thread context) to
+actually report the error:
+
+ int xid_delayed_error(THD *thd)
+
+The three new methods prepare_ordered(), group_log_xid(), and commit_ordered()
+are optional (as is xid_delayed_error). A storage engine or transaction
+coordinator is free to not implement them if they are not needed. In this case
+there will be no order guarantee for the corresponding stage of group commit
+for that engine. For example, InnoDB needs no ordering of the prepare phase,
+so can omit implementing prepare_ordered(); TC_LOG_MMAP needs no ordering at
+all, so does not need to implement any of them.
+
+Note in particular that all existing engines (/binlog implementations if they
+exist) will work unmodified (and also without any change in group commit
+facilities or commit order guaranteed).
+
+Using these new APIs, the work will be to
+
+ - In ha_commit_trans(), implement the correct semantics for the three new
+ calls.
+
+ - In XtraDB, use the new commit_ordered() call to remove the
+ prepare_commit_mutex (and resurrect group commit) without loosing the
+ consistency with binlog commit order.
+
+ - In log.cc (binlog module), implement group_log_xid() to do group commit of
+ multiple transactions to the binlog with a single shared fsync() call.
+
+-----------------------------------------------------------------------
+Some possible alternative for this worklog:
+
+ - We could eliminate the group_log_xid() method for a simpler API, at the
+ cost of extra synchronisation between threads to do in-order
+ commit_ordered() method calls. This would also allow to call
+ commit_ordered() in the correct thread context.
+
+ - Alternatively, we could eliminate log_xid() and require that all
+ transaction coordinators implement group_log_xid() instead, again for some
+ moderate simplification.
+
+ - At the moment there is no plugin actually using prepare_ordered(), so, it
+ could be removed from the design. But it fits in well, is efficient to
+ implement, and could be useful later (eg. for the requested feature of
+ releasing locks early in InnoDB).
+
+-----------------------------------------------------------------------
+Some possible follow-up projects after this is implemented:
+
+ - Add statistics about how efficient group commit is (#fsyncs/#commits in
+ each engine and binlog).
+
+ - Implement an XtraDB prepare_ordered() methods that can release row locks
+ early (Mark Callaghan from Facebook advocates this, but need to determine
+ exactly how to do this safely).
+
+ - Implement a new crash recovery algorithm that uses the consistent commit
+ ordering to need only fsync() for the binlog. At crash recovery, any
+ missing transactions in an engine is replayed from the correct point in the
+ binlog (this point must be stored transactionally inside the engine, as
+ XtraDB already does today).
+
+ - Implement that START TRANSACTION WITH CONSISTENT SNAPSHOT 1) really gets a
+ consistent snapshow, with same set of committed and not committed
+ transactions in all engines, 2) returns a corresponding consistent binlog
+ position. This should be easy by piggybacking on the synchronisation
+ implemented for ha_commit_trans().
+
+ - Use this in XtraBackup to get consistent binlog position without having to
+ block all updates with FLUSH TABLES WITH READ LOCK.
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High Level Description modified.
--- /tmp/wklog.116.old.14234 2010-05-25 13:18:07.000000000 +0000
+++ /tmp/wklog.116.new.14234 2010-05-25 13:18:07.000000000 +0000
@@ -21,3 +21,69 @@
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
+----
+
+Implementing group commit in MySQL faces some challenges from the handler
+plugin architecture:
+
+1. Because storage engine handlers have separate transaction log from the
+mysql binlog (and from each other), there are multiple fsync() calls per
+commit that need the group commit optimisation (2 per participating storage
+engine + 1 for binlog).
+
+2. The code handling commit is split in several places, in main server code
+and in storage engine code. With pluggable binlog it will be split even
+more. This requires a good abstract yet powerful API to be able to implement
+group commit simply and efficiently in plugins without the different parts
+having to rely on iternals of the others.
+
+3. We want the order of commits to be the same in all engines participating in
+multiple transactions. This requirement is the reason that InnoDB currently
+breaks group commit with the infamous prepare_commit_mutex.
+
+While currently there is no server guarantee to get same commit order in
+engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
+several reasons why this could be desirable:
+
+ - InnoDB hot backup needs to be able to extract a binlog position that is
+ consistent with the hot backup to be able to provision a new slave, and
+ this is impossible without imposing at least partial consistent ordering
+ between InnoDB and binlog.
+
+ - Other backup methods could have similar needs, eg. XtraBackup or
+ `mysqldump --single-transaction`, to have consistent commit order between
+ binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
+ or similar expensive blocking operation. (other backup methods, like LVM
+ snapshot, don't need consistent commit order, as they can restore
+ out-of-order commits during crash recovery using XA).
+
+ - If we have consistent commit order, we can think about optimising commit to
+ need only one fsync (for binlog); lost commits in storage engines can then
+ be recovered from the binlog at crash recovery by re-playing against the
+ engine from a particular point in the binlog.
+
+ - With consistent commit order, we can get better semantics for START
+ TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
+ could even get it to return also a matching binlog position). Currently,
+ this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
+ engines.
+
+ - In InnoDB, the performance in the presense of hotspots can be improved if
+ we can release row locks early in the commit phase, but this requires that we
+release them in
+ the same order as commits in the binlog to ensure consistency between
+ master and slaves.
+
+ - There was some discussions around Galera [1] synchroneous replication and
+ global transaction ID that it needed consistent commit order among
+ participating engines.
+
+ - I believe there could be other applications for guaranteed consistent
+ commit order, and that the architecture described in this worklog can
+ implement such guarantee with reasonable overhead.
+
+
+References:
+
+[1] Galera: http://www.codership.com/products/galera_replication
+
-=-=(Knielsen - Tue, 25 May 2010, 08:28)=-=-
More thoughts on and changes to the archtecture. Got to something now that I am satisfied with and
that seems to be able to handle all issues.
Implement new prepare_ordered and commit_ordered handler methods and the logic in ha_commit_trans().
Implement TC_LOG::group_log_xid() method and logic in ha_commit_trans().
Implement XtraDB part, using commit_ordered() rather than prepare_commit_mutex.
Fix test suite failures.
Proof-of-concept patch series complete now.
Do initial benchmark, getting good results. With 64 threads, see 26x improvement in queries-per-sec.
Next step: write up the architecture description.
Worked 21 hours and estimate 0 hours remain (original estimate increased by 21 hours).
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
----
Implementing group commit in MySQL faces some challenges from the handler
plugin architecture:
1. Because storage engine handlers have separate transaction log from the
mysql binlog (and from each other), there are multiple fsync() calls per
commit that need the group commit optimisation (2 per participating storage
engine + 1 for binlog).
2. The code handling commit is split in several places, in main server code
and in storage engine code. With pluggable binlog it will be split even
more. This requires a good abstract yet powerful API to be able to implement
group commit simply and efficiently in plugins without the different parts
having to rely on iternals of the others.
3. We want the order of commits to be the same in all engines participating in
multiple transactions. This requirement is the reason that InnoDB currently
breaks group commit with the infamous prepare_commit_mutex.
While currently there is no server guarantee to get same commit order in
engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
several reasons why this could be desirable:
- InnoDB hot backup needs to be able to extract a binlog position that is
consistent with the hot backup to be able to provision a new slave, and
this is impossible without imposing at least partial consistent ordering
between InnoDB and binlog.
- Other backup methods could have similar needs, eg. XtraBackup or
`mysqldump --single-transaction`, to have consistent commit order between
binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
or similar expensive blocking operation. (other backup methods, like LVM
snapshot, don't need consistent commit order, as they can restore
out-of-order commits during crash recovery using XA).
- If we have consistent commit order, we can think about optimising commit to
need only one fsync (for binlog); lost commits in storage engines can then
be recovered from the binlog at crash recovery by re-playing against the
engine from a particular point in the binlog.
- With consistent commit order, we can get better semantics for START
TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
could even get it to return also a matching binlog position). Currently,
this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
engines.
- In InnoDB, the performance in the presense of hotspots can be improved if
we can release row locks early in the commit phase, but this requires that we
release them in
the same order as commits in the binlog to ensure consistency between
master and slaves.
- There was some discussions around Galera [1] synchroneous replication and
global transaction ID that it needed consistent commit order among
participating engines.
- I believe there could be other applications for guaranteed consistent
commit order, and that the architecture described in this worklog can
implement such guarantee with reasonable overhead.
References:
[1] Galera: http://www.codership.com/products/galera_replication
HIGH-LEVEL SPECIFICATION:
The basic idea in group commit is that multiple threads, each handling one
transaction, prepare for commit and then queue up together waiting to do an
fsync() on the transaction log. Then once the log is available, a single
thread does the fsync() + other necessary book-keeping for all of the threads
at once. After this, the single thread signals the other threads that it's
done and they can finish up and return success (or failure) from the commit
operation.
So group commit has a parallel part, and a sequential part. So we need a
facility for engines/binlog to participate in both the parallel and the
sequential part.
To do this, we add two new handlerton methods:
int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
The idea is that the existing prepare() and commit() methods run in the
parallel part of group commit, and the new prepare_ordered() and
commit_ordered() run in the sequential part.
The prepare_ordered() method is called after prepare(). The order of
tranctions that call into prepare_ordered() is guaranteed to be the same among
all storage engines and binlog, and it is serialised so no two calls can be
running inside the same engine at the same time.
The commit_ordered() method is called before commit(), and similarly is
guaranteed to have same transaction order in all participants, and to be
serialised within one engine.
As the prepare_ordered() and commit_ordered() calls are serialised, the idea
is that handlers should do the minimum amount of work needed in these calls,
relaying most of the work (eg. fsync() ...) to prepare() and commit().
As a concrete example, for InnoDB the commit_ordered() method will do the
first part of commit that fixed the commit order in the transaction log
buffer, and the commit() method will write the log to disk and fsync()
it. This split already exists inside the InnoDB code, running before
respectively after releasing the prepare_commit_mutex.
In addition, the XA transaction coordinator (TC_LOG) is special, since it is
the one responsible for deciding whether to commit or rollback the
transaction. For this we need an extra method, since this decision can be done
only after we know that all prepare() and prepare_ordered() calls succeed, and
must be done to know whether to call commit_ordered()/commit(), or do rollback.
The existing method for this is TC_LOG::log_xid(). To make implementing group
commit simpler to implement in a transaction coordinator and more efficient,
we introduce a new method:
void group_log_xid(THD *first_thd);
This method runs in the sequential part of group commit. It receives a list of
transactions to perform log_xid() on, in the correct commit order. (Note that
TC_LOG can do parallel parts of group commit in its own prepare() and commit()
methods).
This method can make it easier to implement the group commit in TC_LOG, as it
gets directly the list of transactions in the right order. Without it, it
might need to compute such order anyway in a prepare_ordered() method, and the
server has to create this ordered list anyway to implement the order guarantee
for prepare_ordered() and commit_ordered().
This group_log_xid() method also is more efficient, as it avoids some
inter-thread synchronisation. Since group_log_xid() is serialised, we can run
it together with all the commit_ordered() method calls and need only a single
sequential code section. With the log_xid() methods, we would need first a
sequential part for the prepare_ordered() calls, then a parallel part with
log_xid() calls (to not loose group commit ability for log_xid()), then again
a sequential part for the commit_ordered() method calls.
The extra synchronisation is needed, as each commit_ordered() call will have
to wait for log_xid() in one thread (if log_xid() fails then commit_ordered()
should not be called), and also wait for commit_ordered() to finish in all
threads handling earlier commits. In effect we will need to bounce the
execution from one thread to the other among all participants in the group
commit.
As a consequence of the group_log_xid() optimisation, handlers must be aware
that the commit_ordered() call can happen in another thread than the one
running commit() (so thread local storage is not available). This should not
be a big issue as the THD is available for storing any needed information.
Since group_log_xid() runs for multiple transactions in a single thread, it
can not do error reporting (my_error()) as that relies on thread local
storage. Instead it sets an error code in THD::xid_error, and if there is an
error then later another method will be called (in correct thread context) to
actually report the error:
int xid_delayed_error(THD *thd)
The three new methods prepare_ordered(), group_log_xid(), and commit_ordered()
are optional (as is xid_delayed_error). A storage engine or transaction
coordinator is free to not implement them if they are not needed. In this case
there will be no order guarantee for the corresponding stage of group commit
for that engine. For example, InnoDB needs no ordering of the prepare phase,
so can omit implementing prepare_ordered(); TC_LOG_MMAP needs no ordering at
all, so does not need to implement any of them.
Note in particular that all existing engines (/binlog implementations if they
exist) will work unmodified (and also without any change in group commit
facilities or commit order guaranteed).
Using these new APIs, the work will be to
- In ha_commit_trans(), implement the correct semantics for the three new
calls.
- In XtraDB, use the new commit_ordered() call to remove the
prepare_commit_mutex (and resurrect group commit) without loosing the
consistency with binlog commit order.
- In log.cc (binlog module), implement group_log_xid() to do group commit of
multiple transactions to the binlog with a single shared fsync() call.
-----------------------------------------------------------------------
Some possible alternative for this worklog:
- We could eliminate the group_log_xid() method for a simpler API, at the
cost of extra synchronisation between threads to do in-order
commit_ordered() method calls. This would also allow to call
commit_ordered() in the correct thread context.
- Alternatively, we could eliminate log_xid() and require that all
transaction coordinators implement group_log_xid() instead, again for some
moderate simplification.
- At the moment there is no plugin actually using prepare_ordered(), so, it
could be removed from the design. But it fits in well, is efficient to
implement, and could be useful later (eg. for the requested feature of
releasing locks early in InnoDB).
-----------------------------------------------------------------------
Some possible follow-up projects after this is implemented:
- Add statistics about how efficient group commit is (#fsyncs/#commits in
each engine and binlog).
- Implement an XtraDB prepare_ordered() methods that can release row locks
early (Mark Callaghan from Facebook advocates this, but need to determine
exactly how to do this safely).
- Implement a new crash recovery algorithm that uses the consistent commit
ordering to need only fsync() for the binlog. At crash recovery, any
missing transactions in an engine is replayed from the correct point in the
binlog (this point must be stored transactionally inside the engine, as
XtraDB already does today).
- Implement that START TRANSACTION WITH CONSISTENT SNAPSHOT 1) really gets a
consistent snapshow, with same set of committed and not committed
transactions in all engines, 2) returns a corresponding consistent binlog
position. This should be easy by piggybacking on the synchronisation
implemented for ha_commit_trans().
- Use this in XtraBackup to get consistent binlog position without having to
block all updates with FLUSH TABLES WITH READ LOCK.
LOW-LEVEL DESIGN:
1. Changes for ha_commit_trans()
The gut of the code for commit is in the function ha_commit_trans() (and in
commit_one_phase() which is called from it). This must be extended to use the
new prepare_ordered(), group_log_xid(), and commit_ordered() calls.
1.1 Atomic queue of committing transactions
To keep the right commit order among participants, we put transactions into a
queue. The operations on the queue are non-locking:
- Insert THD at the head of the queue, and return old queue.
THD *enqueue_atomic(THD *thd)
- Fetch (and delete) the whole queue.
THD *atomic_grab_reverse_queue()
These are simple to implement with atomic compare-and-set. Note that there is
no ABA problem [2], as we do not delete individual elements from the queue, we
grab the whole queue and replace it with NULL.
A transaction enters the queue when it does prepare_ordered(). This way, the
scheduling order for prepare_ordered() calls is what determines the sequence
in the queue and effectively the commit order.
The queue is grabbed by the code doing group_log_xid() and commit_ordered()
calls. The queue is passed directly to group_log_xid(), and afterwards
iterated to do individual commit_ordered() calls.
Using a lock-free queue allows prepare_ordered() (for one transaction) to run
in parallel with commit_ordered (in another transaction), increasing potential
parallelism.
The queue is simply a linked list of THD objects, linked through a
THD::next_commit_ordered field. Since we add at the head of the queue, the
list is actually in reverse order, so must be reversed when we grab and delete
it.
The reason that enqueue_atomic() returns the old queue is so that we can check
if an insert goes to the head of the queue. The thread at the head of the
queue will do the sequential part of group commit for everyone.
1.2 Locks
1.2.1 Global LOCK_prepare_ordered
This lock is taken to serialise calls to prepare_ordered(). Note that
effectively, the commit order is decided by the order in which threads obtain
this lock.
1.2.2 Global LOCK_group_commit and COND_group_commit
This lock is used to protect the serial part of group commit. It is taken
around the code where we grab the queue, call group_log_xid() on the queue,
and call commit_ordered() on each element of the queue, to make sure they
happen serialised and in consistent order. It also protects the variable
group_commit_queue_busy, which is used when not using group_log_xid() to delay
running over a new queue until the first queue is completely done.
1.2.3 Global LOCK_commit_ordered
This lock is taken around calls to commit_ordered(), to ensure they happen
serialised.
1.2.4 Per-thread thd->LOCK_commit_ordered and thd->COND_commit_ordered
This lock protects the thd->group_commit_ready variable, as well as the
condition variable used to wake up threads after log_xid() and
commit_ordered() finishes.
1.2.5 Global LOCK_group_commit_queue
This is only used on platforms with no native compare-and-set operations, to
make the queue operations atomic.
1.3 Commit algorithm.
This is the basic algorithm, simplified by
- omitting some error handling
- omitting looping over all handlers when invoking handler methods
- omitting some possible optimisations when not all calls needed (see next
section).
- Omitting the case where no group_log_xid() is used, see below.
---- BEGIN ALGORITHM ----
ht->prepare()
// Call prepare_ordered() and enqueue in correct commit order
lock(LOCK_prepare_ordered)
ht->prepare_ordered()
old_queue= enqueue_atomic(thd)
thd->group_commit_ready= FALSE
is_group_commit_leader= (old_queue == NULL)
unlock(LOCK_prepare_ordered)
if (is_group_commit_leader)
// The first in queue handles group commit for everyone
lock(LOCK_group_commit)
// Wait while queue is busy, see below for when this occurs
while (group_commit_queue_busy)
cond_wait(COND_group_commit)
// Grab and reverse the queue to get correct order of transactions
queue= atomic_grab_reverse_queue()
// This call will set individual error codes in thd->xid_error
// It also sets the cookie for unlog() in thd->xid_cookie
group_log_xid(queue)
lock(LOCK_commit_ordered)
for (other IN queue)
if (!other->xid_error)
ht->commit_ordered()
unlock(LOCK_commit_ordered)
unlock(LOCK_group_commit)
// Now we are done, so wake up all the others.
for (other IN TAIL(queue))
lock(other->LOCK_commit_ordered)
other->group_commit_ready= TRUE
cond_signal(other->COND_commit_ordered)
unlock(other->LOCK_commit_ordered)
else
// If not the leader, just wait until leader did the work for us.
lock(thd->LOCK_commit_ordered)
while (!thd->group_commit_ready)
cond_wait(thd->LOCK_commit_ordered, thd->COND_commit_ordered)
unlock(other->LOCK_commit_ordered)
// Finally do any error reporting now that we're back in own thread.
if (thd->xid_error)
xid_delayed_error(thd)
else
ht->commit(thd)
unlog(thd->xid_cookie, thd->xid)
---- END ALGORITHM ----
If the transaction coordinator does not support group_log_xid(), we have to do
things differently. In this case after the serialisation point at
prepare_ordered(), we have to parallelise again when running log_xid()
(otherwise we would loose group commit). But then when log_xid() is done, we
have to serialise again to check for any error and call commit_ordered() in
correct sequence for any transaction where log_xid() did not return error.
The central part of the algorithm in this case (when using log_xid()) is:
---- BEGIN ALGORITHM ----
cookie= log_xid(thd)
error= (cookie == 0)
if (is_group_commit_leader)
// The first to enqueue grabs the queue and runs first.
// But we must wait until a previous queue run is fully done.
lock(LOCK_group_commit)
while (group_commit_queue_busy)
cond_wait(COND_group_commit)
queue= atomic_grab_reverse_queue()
// The queue will be busy until last thread in it is done.
group_commit_queue_busy= TRUE
unlock(LOCK_group_commit)
else
// Not first in queue -> wait for previous one to wake us up.
lock(thd->LOCK_commit_ordered)
while (!thd->group_commit_ready)
cond_wait(thd->LOCK_commit_ordered, thd->COND_commit_ordered)
unlock(other->LOCK_commit_ordered)
if (!error) // Only if log_xid() was successful
lock(LOCK_commit_ordered)
ht->commit_ordered()
unlock(LOCK_commit_ordered)
// Wake up the next thread, and release queue in last.
next= thd->next_commit_ordered
if (next)
lock(next->LOCK_commit_ordered)
next->group_commit_ready= TRUE
cond_signal(next->COND_commit_ordered)
unlock(next->LOCK_commit_ordered)
else
lock(LOCK_group_commit)
group_commit_queue_busy= FALSE
unlock(LOCK_group_commit)
---- END ALGORITHM ----
There are a number of locks taken in the algorithm, but in the group_log_xid()
case most of them should be uncontended most of the time. The
LOCK_group_commit of course will be contended, as new threads queue up waiting
for the previous group commit (and binlog fsync()) to finish so they can do
the next group commit. This is the whole point of implementing group commit.
The LOCK_prepare_ordered and LOCK_commit_ordered mutexes should be not much
contended as long as handlers follow the intension of having the corresponding
handler calls execute quickly.
The per-thread LOCK_commit_ordered mutexes should not be contended; they are
only used to wake up a sleeping thread.
1.4 Optimisations when not using all three new calls
The prepare_ordered(), group_log_xid(), and commit_ordered() methods are
optional, and if not implemented by a particular handler/transaction
coordinator, we can optimise the algorithm to take advantage of not having to
keep ordering for the missing parts.
If there is no prepare_ordered(), then we need not take the
LOCK_prepare_ordered mutex.
If there is no commit_ordered(), then we need not take the LOCK_commit_ordered
mutex.
If there is no group_log_xid(), then we only need the queue to ensure same
ordering of transactions for commit_ordered() as for prepare_ordered(). Thus,
if either of these (or both) are also not present, we do not need to use the
queue at all.
2. Binlog code changes (log.cc)
The bulk of the work needed for the binary log is to extend the code to allow
group commit to the log. Unlike InnoDB/XtraDB, there is no existing support
inside the binlog code for group commit.
The existing code runs most of the write + fsync to the binary lock under the
global LOCK_log mutex, preventing any group commit.
To enable group commit, this code must be split into two parts:
- one part that runs per transaction, re-writing the embedded event positions
for the correct offset, and writing this into the in-memory log cache.
- another part that writes a set of transactions to the disk, and runs
fsync().
Then in group_log_xid(), we can run the first part in a loop over all the
transactions in the passed-in queue, and run the second part only once.
The binlog code also has other code paths that write into the binlog,
eg. non-transactional statements. These have to be adapted also to work with
the new code.
In order to get some group commit facility for these also, we change that part
of the code in a similar way to ha_commit_trans. We keep another,
binlog-internal queue of such non-transactional binlog writes, and such writes
queue up here before sleeping on the LOCK_log mutex. Once a thread obtains the
LOCK_log, it loops over the queue for the fast part, and does the slow part
once, then finally wakes up the others in the queue.
In the transactional case in group_log_xid(), before we run the passed-in
queue, we add any members found in the binlog-internal queue. This allows
these non-transactional writes to share the group commit.
However, in the case where it is a non-transactional write that gets the
LOCK_log, the transactional transactions from the ha_commit_trans() queue will
not be able to take part (they will have to wait for their turn to do another
fsync). It seems difficult to cleanly let the binlog code grab the queue from
out of the ha_commit_trans() algorithm. I think the group commit is mostly
useful in transactional workloads anyway (non-transactional engines will loose
data anyway in case of crash, so why fsync() after each transaction?)
3. XtraDB changes (ha_innodb.cc)
The changes needed in XtraDB are comparatively simple, as XtraDB already
implements group commit, it just needs to be enabled with the new
commit_ordered() call.
The existing commit() method already is logically in two parts. The first part
runs under the prepare_commit_mutex() and must be run in same order as binlog
commit. This part needs to be moved to commit_ordered(). The second part runs
after releasing prepare_commit_mutex and does transaction log write+fsync; it
can remain.
Then the prepare_commit_mutex is removed (and the enable_unsafe_group_commit
XtraDB option to disable it).
There are two asserts that check that the thread running the first part of
XtraDB commit is the same as the thread running the other operations for the
transaction. These have to be removed (as commit_ordered() can run in a
different thread). Also an error reporting with sql_print_error() has to be
delayed until commit() time.
4. Proof-of-concept implementation
There is a proof-of-concept implementation of this architecture, in the form
of a quilt patch series [3].
A quick benchmark was done, with sync_binlog=1 and
innodb_flush_log_at_trx_commit=1. 64 parallel threads doing single-row
transactions against one table.
Without the patch, we get only 25 queries per second.
With the patch, we get 650 queries per second.
5. Open issues/tasks
5.1 XA / other prepare() and commit() call sites.
Check that user-level XA is handled correctly and working. And covered
sufficiently with tests. Also check that any other calls of ha->prepare() and
ha->commit() outside of ha_commit_trans() are handled correctly.
5.2 Testing
This worklog needs additions to the test suite, including error inserts to
check error handling, and synchronisation points to check thread parallelism
correctness.
6. Alternative implementations
- The binlog code maintains its own extra atomic transaction queue to handle
non-transactional commits in a good way together with transactional (with
respect to group commit). Alternatively, we could ignore this issue and
just give up on group commit for non-transactional statements, for some
code simplifications.
- The binlog code has two ways to prepare end_event and similar, one that
uses stack-allocation, and another for when stack allocation is not
possible that uses thd->mem_root. Probably the overhead of thd->mem_root is
so small that it would make sense to use the same code for both cases.
- Instead of adding extra fields to THD, we could allocate a separate
structure on the thd->mem_root() with the required extra fields (including
the THD pointer). Would seem to require initialising mutexes at every
commit though.
- It would probably be a good idea to implement TC_LOG_MMAP::group_log_xid()
(should not be hard).
-----------------------------------------------------------------------
References:
[2] https://secure.wikimedia.org/wikipedia/en/wiki/ABA_problem
[3] https://knielsen-hq.org/maria/patches.mwl116/
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 49
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 13:19)=-=-
Low Level Design modified.
--- /tmp/wklog.116.old.14255 2010-05-25 13:19:00.000000000 +0000
+++ /tmp/wklog.116.new.14255 2010-05-25 13:19:00.000000000 +0000
@@ -1 +1,363 @@
+1. Changes for ha_commit_trans()
+
+The gut of the code for commit is in the function ha_commit_trans() (and in
+commit_one_phase() which is called from it). This must be extended to use the
+new prepare_ordered(), group_log_xid(), and commit_ordered() calls.
+
+1.1 Atomic queue of committing transactions
+
+To keep the right commit order among participants, we put transactions into a
+queue. The operations on the queue are non-locking:
+
+ - Insert THD at the head of the queue, and return old queue.
+
+ THD *enqueue_atomic(THD *thd)
+
+ - Fetch (and delete) the whole queue.
+
+ THD *atomic_grab_reverse_queue()
+
+These are simple to implement with atomic compare-and-set. Note that there is
+no ABA problem [2], as we do not delete individual elements from the queue, we
+grab the whole queue and replace it with NULL.
+
+A transaction enters the queue when it does prepare_ordered(). This way, the
+scheduling order for prepare_ordered() calls is what determines the sequence
+in the queue and effectively the commit order.
+
+The queue is grabbed by the code doing group_log_xid() and commit_ordered()
+calls. The queue is passed directly to group_log_xid(), and afterwards
+iterated to do individual commit_ordered() calls.
+
+Using a lock-free queue allows prepare_ordered() (for one transaction) to run
+in parallel with commit_ordered (in another transaction), increasing potential
+parallelism.
+
+The queue is simply a linked list of THD objects, linked through a
+THD::next_commit_ordered field. Since we add at the head of the queue, the
+list is actually in reverse order, so must be reversed when we grab and delete
+it.
+
+The reason that enqueue_atomic() returns the old queue is so that we can check
+if an insert goes to the head of the queue. The thread at the head of the
+queue will do the sequential part of group commit for everyone.
+
+
+1.2 Locks
+
+1.2.1 Global LOCK_prepare_ordered
+
+This lock is taken to serialise calls to prepare_ordered(). Note that
+effectively, the commit order is decided by the order in which threads obtain
+this lock.
+
+
+1.2.2 Global LOCK_group_commit and COND_group_commit
+
+This lock is used to protect the serial part of group commit. It is taken
+around the code where we grab the queue, call group_log_xid() on the queue,
+and call commit_ordered() on each element of the queue, to make sure they
+happen serialised and in consistent order. It also protects the variable
+group_commit_queue_busy, which is used when not using group_log_xid() to delay
+running over a new queue until the first queue is completely done.
+
+
+1.2.3 Global LOCK_commit_ordered
+
+This lock is taken around calls to commit_ordered(), to ensure they happen
+serialised.
+
+
+1.2.4 Per-thread thd->LOCK_commit_ordered and thd->COND_commit_ordered
+
+This lock protects the thd->group_commit_ready variable, as well as the
+condition variable used to wake up threads after log_xid() and
+commit_ordered() finishes.
+
+
+1.2.5 Global LOCK_group_commit_queue
+
+This is only used on platforms with no native compare-and-set operations, to
+make the queue operations atomic.
+
+
+1.3 Commit algorithm.
+
+This is the basic algorithm, simplified by
+
+ - omitting some error handling
+
+ - omitting looping over all handlers when invoking handler methods
+
+ - omitting some possible optimisations when not all calls needed (see next
+ section).
+
+ - Omitting the case where no group_log_xid() is used, see below.
+
+---- BEGIN ALGORITHM ----
+ ht->prepare()
+
+ // Call prepare_ordered() and enqueue in correct commit order
+ lock(LOCK_prepare_ordered)
+ ht->prepare_ordered()
+ old_queue= enqueue_atomic(thd)
+ thd->group_commit_ready= FALSE
+ is_group_commit_leader= (old_queue == NULL)
+ unlock(LOCK_prepare_ordered)
+
+ if (is_group_commit_leader)
+
+ // The first in queue handles group commit for everyone
+
+ lock(LOCK_group_commit)
+ // Wait while queue is busy, see below for when this occurs
+ while (group_commit_queue_busy)
+ cond_wait(COND_group_commit)
+
+ // Grab and reverse the queue to get correct order of transactions
+ queue= atomic_grab_reverse_queue()
+
+ // This call will set individual error codes in thd->xid_error
+ // It also sets the cookie for unlog() in thd->xid_cookie
+ group_log_xid(queue)
+
+ lock(LOCK_commit_ordered)
+ for (other IN queue)
+ if (!other->xid_error)
+ ht->commit_ordered()
+ unlock(LOCK_commit_ordered)
+
+ unlock(LOCK_group_commit)
+
+ // Now we are done, so wake up all the others.
+ for (other IN TAIL(queue))
+ lock(other->LOCK_commit_ordered)
+ other->group_commit_ready= TRUE
+ cond_signal(other->COND_commit_ordered)
+ unlock(other->LOCK_commit_ordered)
+ else
+ // If not the leader, just wait until leader did the work for us.
+ lock(thd->LOCK_commit_ordered)
+ while (!thd->group_commit_ready)
+ cond_wait(thd->LOCK_commit_ordered, thd->COND_commit_ordered)
+ unlock(other->LOCK_commit_ordered)
+
+ // Finally do any error reporting now that we're back in own thread.
+ if (thd->xid_error)
+ xid_delayed_error(thd)
+ else
+ ht->commit(thd)
+ unlog(thd->xid_cookie, thd->xid)
+---- END ALGORITHM ----
+
+If the transaction coordinator does not support group_log_xid(), we have to do
+things differently. In this case after the serialisation point at
+prepare_ordered(), we have to parallelise again when running log_xid()
+(otherwise we would loose group commit). But then when log_xid() is done, we
+have to serialise again to check for any error and call commit_ordered() in
+correct sequence for any transaction where log_xid() did not return error.
+
+The central part of the algorithm in this case (when using log_xid()) is:
+
+---- BEGIN ALGORITHM ----
+ cookie= log_xid(thd)
+ error= (cookie == 0)
+
+ if (is_group_commit_leader)
+
+ // The first to enqueue grabs the queue and runs first.
+ // But we must wait until a previous queue run is fully done.
+
+ lock(LOCK_group_commit)
+ while (group_commit_queue_busy)
+ cond_wait(COND_group_commit)
+ queue= atomic_grab_reverse_queue()
+ // The queue will be busy until last thread in it is done.
+ group_commit_queue_busy= TRUE
+ unlock(LOCK_group_commit)
+ else
+ // Not first in queue -> wait for previous one to wake us up.
+ lock(thd->LOCK_commit_ordered)
+ while (!thd->group_commit_ready)
+ cond_wait(thd->LOCK_commit_ordered, thd->COND_commit_ordered)
+ unlock(other->LOCK_commit_ordered)
+
+ if (!error) // Only if log_xid() was successful
+ lock(LOCK_commit_ordered)
+ ht->commit_ordered()
+ unlock(LOCK_commit_ordered)
+
+ // Wake up the next thread, and release queue in last.
+ next= thd->next_commit_ordered
+
+ if (next)
+ lock(next->LOCK_commit_ordered)
+ next->group_commit_ready= TRUE
+ cond_signal(next->COND_commit_ordered)
+ unlock(next->LOCK_commit_ordered)
+ else
+ lock(LOCK_group_commit)
+ group_commit_queue_busy= FALSE
+ unlock(LOCK_group_commit)
+---- END ALGORITHM ----
+
+There are a number of locks taken in the algorithm, but in the group_log_xid()
+case most of them should be uncontended most of the time. The
+LOCK_group_commit of course will be contended, as new threads queue up waiting
+for the previous group commit (and binlog fsync()) to finish so they can do
+the next group commit. This is the whole point of implementing group commit.
+
+The LOCK_prepare_ordered and LOCK_commit_ordered mutexes should be not much
+contended as long as handlers follow the intension of having the corresponding
+handler calls execute quickly.
+
+The per-thread LOCK_commit_ordered mutexes should not be contended; they are
+only used to wake up a sleeping thread.
+
+
+1.4 Optimisations when not using all three new calls
+
+
+The prepare_ordered(), group_log_xid(), and commit_ordered() methods are
+optional, and if not implemented by a particular handler/transaction
+coordinator, we can optimise the algorithm to take advantage of not having to
+keep ordering for the missing parts.
+
+If there is no prepare_ordered(), then we need not take the
+LOCK_prepare_ordered mutex.
+
+If there is no commit_ordered(), then we need not take the LOCK_commit_ordered
+mutex.
+
+If there is no group_log_xid(), then we only need the queue to ensure same
+ordering of transactions for commit_ordered() as for prepare_ordered(). Thus,
+if either of these (or both) are also not present, we do not need to use the
+queue at all.
+
+
+2. Binlog code changes (log.cc)
+
+
+The bulk of the work needed for the binary log is to extend the code to allow
+group commit to the log. Unlike InnoDB/XtraDB, there is no existing support
+inside the binlog code for group commit.
+
+The existing code runs most of the write + fsync to the binary lock under the
+global LOCK_log mutex, preventing any group commit.
+
+To enable group commit, this code must be split into two parts:
+
+ - one part that runs per transaction, re-writing the embedded event positions
+ for the correct offset, and writing this into the in-memory log cache.
+
+ - another part that writes a set of transactions to the disk, and runs
+ fsync().
+
+Then in group_log_xid(), we can run the first part in a loop over all the
+transactions in the passed-in queue, and run the second part only once.
+
+The binlog code also has other code paths that write into the binlog,
+eg. non-transactional statements. These have to be adapted also to work with
+the new code.
+
+In order to get some group commit facility for these also, we change that part
+of the code in a similar way to ha_commit_trans. We keep another,
+binlog-internal queue of such non-transactional binlog writes, and such writes
+queue up here before sleeping on the LOCK_log mutex. Once a thread obtains the
+LOCK_log, it loops over the queue for the fast part, and does the slow part
+once, then finally wakes up the others in the queue.
+
+In the transactional case in group_log_xid(), before we run the passed-in
+queue, we add any members found in the binlog-internal queue. This allows
+these non-transactional writes to share the group commit.
+
+However, in the case where it is a non-transactional write that gets the
+LOCK_log, the transactional transactions from the ha_commit_trans() queue will
+not be able to take part (they will have to wait for their turn to do another
+fsync). It seems difficult to cleanly let the binlog code grab the queue from
+out of the ha_commit_trans() algorithm. I think the group commit is mostly
+useful in transactional workloads anyway (non-transactional engines will loose
+data anyway in case of crash, so why fsync() after each transaction?)
+
+
+3. XtraDB changes (ha_innodb.cc)
+
+The changes needed in XtraDB are comparatively simple, as XtraDB already
+implements group commit, it just needs to be enabled with the new
+commit_ordered() call.
+
+The existing commit() method already is logically in two parts. The first part
+runs under the prepare_commit_mutex() and must be run in same order as binlog
+commit. This part needs to be moved to commit_ordered(). The second part runs
+after releasing prepare_commit_mutex and does transaction log write+fsync; it
+can remain.
+
+Then the prepare_commit_mutex is removed (and the enable_unsafe_group_commit
+XtraDB option to disable it).
+
+There are two asserts that check that the thread running the first part of
+XtraDB commit is the same as the thread running the other operations for the
+transaction. These have to be removed (as commit_ordered() can run in a
+different thread). Also an error reporting with sql_print_error() has to be
+delayed until commit() time.
+
+
+4. Proof-of-concept implementation
+
+There is a proof-of-concept implementation of this architecture, in the form
+of a quilt patch series [3].
+
+A quick benchmark was done, with sync_binlog=1 and
+innodb_flush_log_at_trx_commit=1. 64 parallel threads doing single-row
+transactions against one table.
+
+Without the patch, we get only 25 queries per second.
+
+With the patch, we get 650 queries per second.
+
+
+5. Open issues/tasks
+
+5.1 XA / other prepare() and commit() call sites.
+
+Check that user-level XA is handled correctly and working. And covered
+sufficiently with tests. Also check that any other calls of ha->prepare() and
+ha->commit() outside of ha_commit_trans() are handled correctly.
+
+5.2 Testing
+
+This worklog needs additions to the test suite, including error inserts to
+check error handling, and synchronisation points to check thread parallelism
+correctness.
+
+
+6. Alternative implementations
+
+ - The binlog code maintains its own extra atomic transaction queue to handle
+ non-transactional commits in a good way together with transactional (with
+ respect to group commit). Alternatively, we could ignore this issue and
+ just give up on group commit for non-transactional statements, for some
+ code simplifications.
+
+ - The binlog code has two ways to prepare end_event and similar, one that
+ uses stack-allocation, and another for when stack allocation is not
+ possible that uses thd->mem_root. Probably the overhead of thd->mem_root is
+ so small that it would make sense to use the same code for both cases.
+
+ - Instead of adding extra fields to THD, we could allocate a separate
+ structure on the thd->mem_root() with the required extra fields (including
+ the THD pointer). Would seem to require initialising mutexes at every
+ commit though.
+
+ - It would probably be a good idea to implement TC_LOG_MMAP::group_log_xid()
+ (should not be hard).
+
+
+-----------------------------------------------------------------------
+
+References:
+
+[2] https://secure.wikimedia.org/wikipedia/en/wiki/ABA_problem
+
+[3] https://knielsen-hq.org/maria/patches.mwl116/
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High-Level Specification modified.
--- /tmp/wklog.116.old.14249 2010-05-25 13:18:34.000000000 +0000
+++ /tmp/wklog.116.new.14249 2010-05-25 13:18:34.000000000 +0000
@@ -1 +1,157 @@
+The basic idea in group commit is that multiple threads, each handling one
+transaction, prepare for commit and then queue up together waiting to do an
+fsync() on the transaction log. Then once the log is available, a single
+thread does the fsync() + other necessary book-keeping for all of the threads
+at once. After this, the single thread signals the other threads that it's
+done and they can finish up and return success (or failure) from the commit
+operation.
+
+So group commit has a parallel part, and a sequential part. So we need a
+facility for engines/binlog to participate in both the parallel and the
+sequential part.
+
+To do this, we add two new handlerton methods:
+
+ int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
+ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
+
+The idea is that the existing prepare() and commit() methods run in the
+parallel part of group commit, and the new prepare_ordered() and
+commit_ordered() run in the sequential part.
+
+The prepare_ordered() method is called after prepare(). The order of
+tranctions that call into prepare_ordered() is guaranteed to be the same among
+all storage engines and binlog, and it is serialised so no two calls can be
+running inside the same engine at the same time.
+
+The commit_ordered() method is called before commit(), and similarly is
+guaranteed to have same transaction order in all participants, and to be
+serialised within one engine.
+
+As the prepare_ordered() and commit_ordered() calls are serialised, the idea
+is that handlers should do the minimum amount of work needed in these calls,
+relaying most of the work (eg. fsync() ...) to prepare() and commit().
+
+As a concrete example, for InnoDB the commit_ordered() method will do the
+first part of commit that fixed the commit order in the transaction log
+buffer, and the commit() method will write the log to disk and fsync()
+it. This split already exists inside the InnoDB code, running before
+respectively after releasing the prepare_commit_mutex.
+
+In addition, the XA transaction coordinator (TC_LOG) is special, since it is
+the one responsible for deciding whether to commit or rollback the
+transaction. For this we need an extra method, since this decision can be done
+only after we know that all prepare() and prepare_ordered() calls succeed, and
+must be done to know whether to call commit_ordered()/commit(), or do rollback.
+
+The existing method for this is TC_LOG::log_xid(). To make implementing group
+commit simpler to implement in a transaction coordinator and more efficient,
+we introduce a new method:
+
+ void group_log_xid(THD *first_thd);
+
+This method runs in the sequential part of group commit. It receives a list of
+transactions to perform log_xid() on, in the correct commit order. (Note that
+TC_LOG can do parallel parts of group commit in its own prepare() and commit()
+methods).
+
+This method can make it easier to implement the group commit in TC_LOG, as it
+gets directly the list of transactions in the right order. Without it, it
+might need to compute such order anyway in a prepare_ordered() method, and the
+server has to create this ordered list anyway to implement the order guarantee
+for prepare_ordered() and commit_ordered().
+
+This group_log_xid() method also is more efficient, as it avoids some
+inter-thread synchronisation. Since group_log_xid() is serialised, we can run
+it together with all the commit_ordered() method calls and need only a single
+sequential code section. With the log_xid() methods, we would need first a
+sequential part for the prepare_ordered() calls, then a parallel part with
+log_xid() calls (to not loose group commit ability for log_xid()), then again
+a sequential part for the commit_ordered() method calls.
+
+The extra synchronisation is needed, as each commit_ordered() call will have
+to wait for log_xid() in one thread (if log_xid() fails then commit_ordered()
+should not be called), and also wait for commit_ordered() to finish in all
+threads handling earlier commits. In effect we will need to bounce the
+execution from one thread to the other among all participants in the group
+commit.
+
+As a consequence of the group_log_xid() optimisation, handlers must be aware
+that the commit_ordered() call can happen in another thread than the one
+running commit() (so thread local storage is not available). This should not
+be a big issue as the THD is available for storing any needed information.
+
+Since group_log_xid() runs for multiple transactions in a single thread, it
+can not do error reporting (my_error()) as that relies on thread local
+storage. Instead it sets an error code in THD::xid_error, and if there is an
+error then later another method will be called (in correct thread context) to
+actually report the error:
+
+ int xid_delayed_error(THD *thd)
+
+The three new methods prepare_ordered(), group_log_xid(), and commit_ordered()
+are optional (as is xid_delayed_error). A storage engine or transaction
+coordinator is free to not implement them if they are not needed. In this case
+there will be no order guarantee for the corresponding stage of group commit
+for that engine. For example, InnoDB needs no ordering of the prepare phase,
+so can omit implementing prepare_ordered(); TC_LOG_MMAP needs no ordering at
+all, so does not need to implement any of them.
+
+Note in particular that all existing engines (/binlog implementations if they
+exist) will work unmodified (and also without any change in group commit
+facilities or commit order guaranteed).
+
+Using these new APIs, the work will be to
+
+ - In ha_commit_trans(), implement the correct semantics for the three new
+ calls.
+
+ - In XtraDB, use the new commit_ordered() call to remove the
+ prepare_commit_mutex (and resurrect group commit) without loosing the
+ consistency with binlog commit order.
+
+ - In log.cc (binlog module), implement group_log_xid() to do group commit of
+ multiple transactions to the binlog with a single shared fsync() call.
+
+-----------------------------------------------------------------------
+Some possible alternative for this worklog:
+
+ - We could eliminate the group_log_xid() method for a simpler API, at the
+ cost of extra synchronisation between threads to do in-order
+ commit_ordered() method calls. This would also allow to call
+ commit_ordered() in the correct thread context.
+
+ - Alternatively, we could eliminate log_xid() and require that all
+ transaction coordinators implement group_log_xid() instead, again for some
+ moderate simplification.
+
+ - At the moment there is no plugin actually using prepare_ordered(), so, it
+ could be removed from the design. But it fits in well, is efficient to
+ implement, and could be useful later (eg. for the requested feature of
+ releasing locks early in InnoDB).
+
+-----------------------------------------------------------------------
+Some possible follow-up projects after this is implemented:
+
+ - Add statistics about how efficient group commit is (#fsyncs/#commits in
+ each engine and binlog).
+
+ - Implement an XtraDB prepare_ordered() methods that can release row locks
+ early (Mark Callaghan from Facebook advocates this, but need to determine
+ exactly how to do this safely).
+
+ - Implement a new crash recovery algorithm that uses the consistent commit
+ ordering to need only fsync() for the binlog. At crash recovery, any
+ missing transactions in an engine is replayed from the correct point in the
+ binlog (this point must be stored transactionally inside the engine, as
+ XtraDB already does today).
+
+ - Implement that START TRANSACTION WITH CONSISTENT SNAPSHOT 1) really gets a
+ consistent snapshow, with same set of committed and not committed
+ transactions in all engines, 2) returns a corresponding consistent binlog
+ position. This should be easy by piggybacking on the synchronisation
+ implemented for ha_commit_trans().
+
+ - Use this in XtraBackup to get consistent binlog position without having to
+ block all updates with FLUSH TABLES WITH READ LOCK.
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High Level Description modified.
--- /tmp/wklog.116.old.14234 2010-05-25 13:18:07.000000000 +0000
+++ /tmp/wklog.116.new.14234 2010-05-25 13:18:07.000000000 +0000
@@ -21,3 +21,69 @@
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
+----
+
+Implementing group commit in MySQL faces some challenges from the handler
+plugin architecture:
+
+1. Because storage engine handlers have separate transaction log from the
+mysql binlog (and from each other), there are multiple fsync() calls per
+commit that need the group commit optimisation (2 per participating storage
+engine + 1 for binlog).
+
+2. The code handling commit is split in several places, in main server code
+and in storage engine code. With pluggable binlog it will be split even
+more. This requires a good abstract yet powerful API to be able to implement
+group commit simply and efficiently in plugins without the different parts
+having to rely on iternals of the others.
+
+3. We want the order of commits to be the same in all engines participating in
+multiple transactions. This requirement is the reason that InnoDB currently
+breaks group commit with the infamous prepare_commit_mutex.
+
+While currently there is no server guarantee to get same commit order in
+engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
+several reasons why this could be desirable:
+
+ - InnoDB hot backup needs to be able to extract a binlog position that is
+ consistent with the hot backup to be able to provision a new slave, and
+ this is impossible without imposing at least partial consistent ordering
+ between InnoDB and binlog.
+
+ - Other backup methods could have similar needs, eg. XtraBackup or
+ `mysqldump --single-transaction`, to have consistent commit order between
+ binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
+ or similar expensive blocking operation. (other backup methods, like LVM
+ snapshot, don't need consistent commit order, as they can restore
+ out-of-order commits during crash recovery using XA).
+
+ - If we have consistent commit order, we can think about optimising commit to
+ need only one fsync (for binlog); lost commits in storage engines can then
+ be recovered from the binlog at crash recovery by re-playing against the
+ engine from a particular point in the binlog.
+
+ - With consistent commit order, we can get better semantics for START
+ TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
+ could even get it to return also a matching binlog position). Currently,
+ this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
+ engines.
+
+ - In InnoDB, the performance in the presense of hotspots can be improved if
+ we can release row locks early in the commit phase, but this requires that we
+release them in
+ the same order as commits in the binlog to ensure consistency between
+ master and slaves.
+
+ - There was some discussions around Galera [1] synchroneous replication and
+ global transaction ID that it needed consistent commit order among
+ participating engines.
+
+ - I believe there could be other applications for guaranteed consistent
+ commit order, and that the architecture described in this worklog can
+ implement such guarantee with reasonable overhead.
+
+
+References:
+
+[1] Galera: http://www.codership.com/products/galera_replication
+
-=-=(Knielsen - Tue, 25 May 2010, 08:28)=-=-
More thoughts on and changes to the archtecture. Got to something now that I am satisfied with and
that seems to be able to handle all issues.
Implement new prepare_ordered and commit_ordered handler methods and the logic in ha_commit_trans().
Implement TC_LOG::group_log_xid() method and logic in ha_commit_trans().
Implement XtraDB part, using commit_ordered() rather than prepare_commit_mutex.
Fix test suite failures.
Proof-of-concept patch series complete now.
Do initial benchmark, getting good results. With 64 threads, see 26x improvement in queries-per-sec.
Next step: write up the architecture description.
Worked 21 hours and estimate 0 hours remain (original estimate increased by 21 hours).
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
----
Implementing group commit in MySQL faces some challenges from the handler
plugin architecture:
1. Because storage engine handlers have separate transaction log from the
mysql binlog (and from each other), there are multiple fsync() calls per
commit that need the group commit optimisation (2 per participating storage
engine + 1 for binlog).
2. The code handling commit is split in several places, in main server code
and in storage engine code. With pluggable binlog it will be split even
more. This requires a good abstract yet powerful API to be able to implement
group commit simply and efficiently in plugins without the different parts
having to rely on iternals of the others.
3. We want the order of commits to be the same in all engines participating in
multiple transactions. This requirement is the reason that InnoDB currently
breaks group commit with the infamous prepare_commit_mutex.
While currently there is no server guarantee to get same commit order in
engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
several reasons why this could be desirable:
- InnoDB hot backup needs to be able to extract a binlog position that is
consistent with the hot backup to be able to provision a new slave, and
this is impossible without imposing at least partial consistent ordering
between InnoDB and binlog.
- Other backup methods could have similar needs, eg. XtraBackup or
`mysqldump --single-transaction`, to have consistent commit order between
binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
or similar expensive blocking operation. (other backup methods, like LVM
snapshot, don't need consistent commit order, as they can restore
out-of-order commits during crash recovery using XA).
- If we have consistent commit order, we can think about optimising commit to
need only one fsync (for binlog); lost commits in storage engines can then
be recovered from the binlog at crash recovery by re-playing against the
engine from a particular point in the binlog.
- With consistent commit order, we can get better semantics for START
TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
could even get it to return also a matching binlog position). Currently,
this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
engines.
- In InnoDB, the performance in the presense of hotspots can be improved if
we can release row locks early in the commit phase, but this requires that we
release them in
the same order as commits in the binlog to ensure consistency between
master and slaves.
- There was some discussions around Galera [1] synchroneous replication and
global transaction ID that it needed consistent commit order among
participating engines.
- I believe there could be other applications for guaranteed consistent
commit order, and that the architecture described in this worklog can
implement such guarantee with reasonable overhead.
References:
[1] Galera: http://www.codership.com/products/galera_replication
HIGH-LEVEL SPECIFICATION:
The basic idea in group commit is that multiple threads, each handling one
transaction, prepare for commit and then queue up together waiting to do an
fsync() on the transaction log. Then once the log is available, a single
thread does the fsync() + other necessary book-keeping for all of the threads
at once. After this, the single thread signals the other threads that it's
done and they can finish up and return success (or failure) from the commit
operation.
So group commit has a parallel part, and a sequential part. So we need a
facility for engines/binlog to participate in both the parallel and the
sequential part.
To do this, we add two new handlerton methods:
int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
The idea is that the existing prepare() and commit() methods run in the
parallel part of group commit, and the new prepare_ordered() and
commit_ordered() run in the sequential part.
The prepare_ordered() method is called after prepare(). The order of
tranctions that call into prepare_ordered() is guaranteed to be the same among
all storage engines and binlog, and it is serialised so no two calls can be
running inside the same engine at the same time.
The commit_ordered() method is called before commit(), and similarly is
guaranteed to have same transaction order in all participants, and to be
serialised within one engine.
As the prepare_ordered() and commit_ordered() calls are serialised, the idea
is that handlers should do the minimum amount of work needed in these calls,
relaying most of the work (eg. fsync() ...) to prepare() and commit().
As a concrete example, for InnoDB the commit_ordered() method will do the
first part of commit that fixed the commit order in the transaction log
buffer, and the commit() method will write the log to disk and fsync()
it. This split already exists inside the InnoDB code, running before
respectively after releasing the prepare_commit_mutex.
In addition, the XA transaction coordinator (TC_LOG) is special, since it is
the one responsible for deciding whether to commit or rollback the
transaction. For this we need an extra method, since this decision can be done
only after we know that all prepare() and prepare_ordered() calls succeed, and
must be done to know whether to call commit_ordered()/commit(), or do rollback.
The existing method for this is TC_LOG::log_xid(). To make implementing group
commit simpler to implement in a transaction coordinator and more efficient,
we introduce a new method:
void group_log_xid(THD *first_thd);
This method runs in the sequential part of group commit. It receives a list of
transactions to perform log_xid() on, in the correct commit order. (Note that
TC_LOG can do parallel parts of group commit in its own prepare() and commit()
methods).
This method can make it easier to implement the group commit in TC_LOG, as it
gets directly the list of transactions in the right order. Without it, it
might need to compute such order anyway in a prepare_ordered() method, and the
server has to create this ordered list anyway to implement the order guarantee
for prepare_ordered() and commit_ordered().
This group_log_xid() method also is more efficient, as it avoids some
inter-thread synchronisation. Since group_log_xid() is serialised, we can run
it together with all the commit_ordered() method calls and need only a single
sequential code section. With the log_xid() methods, we would need first a
sequential part for the prepare_ordered() calls, then a parallel part with
log_xid() calls (to not loose group commit ability for log_xid()), then again
a sequential part for the commit_ordered() method calls.
The extra synchronisation is needed, as each commit_ordered() call will have
to wait for log_xid() in one thread (if log_xid() fails then commit_ordered()
should not be called), and also wait for commit_ordered() to finish in all
threads handling earlier commits. In effect we will need to bounce the
execution from one thread to the other among all participants in the group
commit.
As a consequence of the group_log_xid() optimisation, handlers must be aware
that the commit_ordered() call can happen in another thread than the one
running commit() (so thread local storage is not available). This should not
be a big issue as the THD is available for storing any needed information.
Since group_log_xid() runs for multiple transactions in a single thread, it
can not do error reporting (my_error()) as that relies on thread local
storage. Instead it sets an error code in THD::xid_error, and if there is an
error then later another method will be called (in correct thread context) to
actually report the error:
int xid_delayed_error(THD *thd)
The three new methods prepare_ordered(), group_log_xid(), and commit_ordered()
are optional (as is xid_delayed_error). A storage engine or transaction
coordinator is free to not implement them if they are not needed. In this case
there will be no order guarantee for the corresponding stage of group commit
for that engine. For example, InnoDB needs no ordering of the prepare phase,
so can omit implementing prepare_ordered(); TC_LOG_MMAP needs no ordering at
all, so does not need to implement any of them.
Note in particular that all existing engines (/binlog implementations if they
exist) will work unmodified (and also without any change in group commit
facilities or commit order guaranteed).
Using these new APIs, the work will be to
- In ha_commit_trans(), implement the correct semantics for the three new
calls.
- In XtraDB, use the new commit_ordered() call to remove the
prepare_commit_mutex (and resurrect group commit) without loosing the
consistency with binlog commit order.
- In log.cc (binlog module), implement group_log_xid() to do group commit of
multiple transactions to the binlog with a single shared fsync() call.
-----------------------------------------------------------------------
Some possible alternative for this worklog:
- We could eliminate the group_log_xid() method for a simpler API, at the
cost of extra synchronisation between threads to do in-order
commit_ordered() method calls. This would also allow to call
commit_ordered() in the correct thread context.
- Alternatively, we could eliminate log_xid() and require that all
transaction coordinators implement group_log_xid() instead, again for some
moderate simplification.
- At the moment there is no plugin actually using prepare_ordered(), so, it
could be removed from the design. But it fits in well, is efficient to
implement, and could be useful later (eg. for the requested feature of
releasing locks early in InnoDB).
-----------------------------------------------------------------------
Some possible follow-up projects after this is implemented:
- Add statistics about how efficient group commit is (#fsyncs/#commits in
each engine and binlog).
- Implement an XtraDB prepare_ordered() methods that can release row locks
early (Mark Callaghan from Facebook advocates this, but need to determine
exactly how to do this safely).
- Implement a new crash recovery algorithm that uses the consistent commit
ordering to need only fsync() for the binlog. At crash recovery, any
missing transactions in an engine is replayed from the correct point in the
binlog (this point must be stored transactionally inside the engine, as
XtraDB already does today).
- Implement that START TRANSACTION WITH CONSISTENT SNAPSHOT 1) really gets a
consistent snapshow, with same set of committed and not committed
transactions in all engines, 2) returns a corresponding consistent binlog
position. This should be easy by piggybacking on the synchronisation
implemented for ha_commit_trans().
- Use this in XtraBackup to get consistent binlog position without having to
block all updates with FLUSH TABLES WITH READ LOCK.
LOW-LEVEL DESIGN:
1. Changes for ha_commit_trans()
The gut of the code for commit is in the function ha_commit_trans() (and in
commit_one_phase() which is called from it). This must be extended to use the
new prepare_ordered(), group_log_xid(), and commit_ordered() calls.
1.1 Atomic queue of committing transactions
To keep the right commit order among participants, we put transactions into a
queue. The operations on the queue are non-locking:
- Insert THD at the head of the queue, and return old queue.
THD *enqueue_atomic(THD *thd)
- Fetch (and delete) the whole queue.
THD *atomic_grab_reverse_queue()
These are simple to implement with atomic compare-and-set. Note that there is
no ABA problem [2], as we do not delete individual elements from the queue, we
grab the whole queue and replace it with NULL.
A transaction enters the queue when it does prepare_ordered(). This way, the
scheduling order for prepare_ordered() calls is what determines the sequence
in the queue and effectively the commit order.
The queue is grabbed by the code doing group_log_xid() and commit_ordered()
calls. The queue is passed directly to group_log_xid(), and afterwards
iterated to do individual commit_ordered() calls.
Using a lock-free queue allows prepare_ordered() (for one transaction) to run
in parallel with commit_ordered (in another transaction), increasing potential
parallelism.
The queue is simply a linked list of THD objects, linked through a
THD::next_commit_ordered field. Since we add at the head of the queue, the
list is actually in reverse order, so must be reversed when we grab and delete
it.
The reason that enqueue_atomic() returns the old queue is so that we can check
if an insert goes to the head of the queue. The thread at the head of the
queue will do the sequential part of group commit for everyone.
1.2 Locks
1.2.1 Global LOCK_prepare_ordered
This lock is taken to serialise calls to prepare_ordered(). Note that
effectively, the commit order is decided by the order in which threads obtain
this lock.
1.2.2 Global LOCK_group_commit and COND_group_commit
This lock is used to protect the serial part of group commit. It is taken
around the code where we grab the queue, call group_log_xid() on the queue,
and call commit_ordered() on each element of the queue, to make sure they
happen serialised and in consistent order. It also protects the variable
group_commit_queue_busy, which is used when not using group_log_xid() to delay
running over a new queue until the first queue is completely done.
1.2.3 Global LOCK_commit_ordered
This lock is taken around calls to commit_ordered(), to ensure they happen
serialised.
1.2.4 Per-thread thd->LOCK_commit_ordered and thd->COND_commit_ordered
This lock protects the thd->group_commit_ready variable, as well as the
condition variable used to wake up threads after log_xid() and
commit_ordered() finishes.
1.2.5 Global LOCK_group_commit_queue
This is only used on platforms with no native compare-and-set operations, to
make the queue operations atomic.
1.3 Commit algorithm.
This is the basic algorithm, simplified by
- omitting some error handling
- omitting looping over all handlers when invoking handler methods
- omitting some possible optimisations when not all calls needed (see next
section).
- Omitting the case where no group_log_xid() is used, see below.
---- BEGIN ALGORITHM ----
ht->prepare()
// Call prepare_ordered() and enqueue in correct commit order
lock(LOCK_prepare_ordered)
ht->prepare_ordered()
old_queue= enqueue_atomic(thd)
thd->group_commit_ready= FALSE
is_group_commit_leader= (old_queue == NULL)
unlock(LOCK_prepare_ordered)
if (is_group_commit_leader)
// The first in queue handles group commit for everyone
lock(LOCK_group_commit)
// Wait while queue is busy, see below for when this occurs
while (group_commit_queue_busy)
cond_wait(COND_group_commit)
// Grab and reverse the queue to get correct order of transactions
queue= atomic_grab_reverse_queue()
// This call will set individual error codes in thd->xid_error
// It also sets the cookie for unlog() in thd->xid_cookie
group_log_xid(queue)
lock(LOCK_commit_ordered)
for (other IN queue)
if (!other->xid_error)
ht->commit_ordered()
unlock(LOCK_commit_ordered)
unlock(LOCK_group_commit)
// Now we are done, so wake up all the others.
for (other IN TAIL(queue))
lock(other->LOCK_commit_ordered)
other->group_commit_ready= TRUE
cond_signal(other->COND_commit_ordered)
unlock(other->LOCK_commit_ordered)
else
// If not the leader, just wait until leader did the work for us.
lock(thd->LOCK_commit_ordered)
while (!thd->group_commit_ready)
cond_wait(thd->LOCK_commit_ordered, thd->COND_commit_ordered)
unlock(other->LOCK_commit_ordered)
// Finally do any error reporting now that we're back in own thread.
if (thd->xid_error)
xid_delayed_error(thd)
else
ht->commit(thd)
unlog(thd->xid_cookie, thd->xid)
---- END ALGORITHM ----
If the transaction coordinator does not support group_log_xid(), we have to do
things differently. In this case after the serialisation point at
prepare_ordered(), we have to parallelise again when running log_xid()
(otherwise we would loose group commit). But then when log_xid() is done, we
have to serialise again to check for any error and call commit_ordered() in
correct sequence for any transaction where log_xid() did not return error.
The central part of the algorithm in this case (when using log_xid()) is:
---- BEGIN ALGORITHM ----
cookie= log_xid(thd)
error= (cookie == 0)
if (is_group_commit_leader)
// The first to enqueue grabs the queue and runs first.
// But we must wait until a previous queue run is fully done.
lock(LOCK_group_commit)
while (group_commit_queue_busy)
cond_wait(COND_group_commit)
queue= atomic_grab_reverse_queue()
// The queue will be busy until last thread in it is done.
group_commit_queue_busy= TRUE
unlock(LOCK_group_commit)
else
// Not first in queue -> wait for previous one to wake us up.
lock(thd->LOCK_commit_ordered)
while (!thd->group_commit_ready)
cond_wait(thd->LOCK_commit_ordered, thd->COND_commit_ordered)
unlock(other->LOCK_commit_ordered)
if (!error) // Only if log_xid() was successful
lock(LOCK_commit_ordered)
ht->commit_ordered()
unlock(LOCK_commit_ordered)
// Wake up the next thread, and release queue in last.
next= thd->next_commit_ordered
if (next)
lock(next->LOCK_commit_ordered)
next->group_commit_ready= TRUE
cond_signal(next->COND_commit_ordered)
unlock(next->LOCK_commit_ordered)
else
lock(LOCK_group_commit)
group_commit_queue_busy= FALSE
unlock(LOCK_group_commit)
---- END ALGORITHM ----
There are a number of locks taken in the algorithm, but in the group_log_xid()
case most of them should be uncontended most of the time. The
LOCK_group_commit of course will be contended, as new threads queue up waiting
for the previous group commit (and binlog fsync()) to finish so they can do
the next group commit. This is the whole point of implementing group commit.
The LOCK_prepare_ordered and LOCK_commit_ordered mutexes should be not much
contended as long as handlers follow the intension of having the corresponding
handler calls execute quickly.
The per-thread LOCK_commit_ordered mutexes should not be contended; they are
only used to wake up a sleeping thread.
1.4 Optimisations when not using all three new calls
The prepare_ordered(), group_log_xid(), and commit_ordered() methods are
optional, and if not implemented by a particular handler/transaction
coordinator, we can optimise the algorithm to take advantage of not having to
keep ordering for the missing parts.
If there is no prepare_ordered(), then we need not take the
LOCK_prepare_ordered mutex.
If there is no commit_ordered(), then we need not take the LOCK_commit_ordered
mutex.
If there is no group_log_xid(), then we only need the queue to ensure same
ordering of transactions for commit_ordered() as for prepare_ordered(). Thus,
if either of these (or both) are also not present, we do not need to use the
queue at all.
2. Binlog code changes (log.cc)
The bulk of the work needed for the binary log is to extend the code to allow
group commit to the log. Unlike InnoDB/XtraDB, there is no existing support
inside the binlog code for group commit.
The existing code runs most of the write + fsync to the binary lock under the
global LOCK_log mutex, preventing any group commit.
To enable group commit, this code must be split into two parts:
- one part that runs per transaction, re-writing the embedded event positions
for the correct offset, and writing this into the in-memory log cache.
- another part that writes a set of transactions to the disk, and runs
fsync().
Then in group_log_xid(), we can run the first part in a loop over all the
transactions in the passed-in queue, and run the second part only once.
The binlog code also has other code paths that write into the binlog,
eg. non-transactional statements. These have to be adapted also to work with
the new code.
In order to get some group commit facility for these also, we change that part
of the code in a similar way to ha_commit_trans. We keep another,
binlog-internal queue of such non-transactional binlog writes, and such writes
queue up here before sleeping on the LOCK_log mutex. Once a thread obtains the
LOCK_log, it loops over the queue for the fast part, and does the slow part
once, then finally wakes up the others in the queue.
In the transactional case in group_log_xid(), before we run the passed-in
queue, we add any members found in the binlog-internal queue. This allows
these non-transactional writes to share the group commit.
However, in the case where it is a non-transactional write that gets the
LOCK_log, the transactional transactions from the ha_commit_trans() queue will
not be able to take part (they will have to wait for their turn to do another
fsync). It seems difficult to cleanly let the binlog code grab the queue from
out of the ha_commit_trans() algorithm. I think the group commit is mostly
useful in transactional workloads anyway (non-transactional engines will loose
data anyway in case of crash, so why fsync() after each transaction?)
3. XtraDB changes (ha_innodb.cc)
The changes needed in XtraDB are comparatively simple, as XtraDB already
implements group commit, it just needs to be enabled with the new
commit_ordered() call.
The existing commit() method already is logically in two parts. The first part
runs under the prepare_commit_mutex() and must be run in same order as binlog
commit. This part needs to be moved to commit_ordered(). The second part runs
after releasing prepare_commit_mutex and does transaction log write+fsync; it
can remain.
Then the prepare_commit_mutex is removed (and the enable_unsafe_group_commit
XtraDB option to disable it).
There are two asserts that check that the thread running the first part of
XtraDB commit is the same as the thread running the other operations for the
transaction. These have to be removed (as commit_ordered() can run in a
different thread). Also an error reporting with sql_print_error() has to be
delayed until commit() time.
4. Proof-of-concept implementation
There is a proof-of-concept implementation of this architecture, in the form
of a quilt patch series [3].
A quick benchmark was done, with sync_binlog=1 and
innodb_flush_log_at_trx_commit=1. 64 parallel threads doing single-row
transactions against one table.
Without the patch, we get only 25 queries per second.
With the patch, we get 650 queries per second.
5. Open issues/tasks
5.1 XA / other prepare() and commit() call sites.
Check that user-level XA is handled correctly and working. And covered
sufficiently with tests. Also check that any other calls of ha->prepare() and
ha->commit() outside of ha_commit_trans() are handled correctly.
5.2 Testing
This worklog needs additions to the test suite, including error inserts to
check error handling, and synchronisation points to check thread parallelism
correctness.
6. Alternative implementations
- The binlog code maintains its own extra atomic transaction queue to handle
non-transactional commits in a good way together with transactional (with
respect to group commit). Alternatively, we could ignore this issue and
just give up on group commit for non-transactional statements, for some
code simplifications.
- The binlog code has two ways to prepare end_event and similar, one that
uses stack-allocation, and another for when stack allocation is not
possible that uses thd->mem_root. Probably the overhead of thd->mem_root is
so small that it would make sense to use the same code for both cases.
- Instead of adding extra fields to THD, we could allocate a separate
structure on the thd->mem_root() with the required extra fields (including
the THD pointer). Would seem to require initialising mutexes at every
commit though.
- It would probably be a good idea to implement TC_LOG_MMAP::group_log_xid()
(should not be hard).
-----------------------------------------------------------------------
References:
[2] https://secure.wikimedia.org/wikipedia/en/wiki/ABA_problem
[3] https://knielsen-hq.org/maria/patches.mwl116/
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 49
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High-Level Specification modified.
--- /tmp/wklog.116.old.14249 2010-05-25 13:18:34.000000000 +0000
+++ /tmp/wklog.116.new.14249 2010-05-25 13:18:34.000000000 +0000
@@ -1 +1,157 @@
+The basic idea in group commit is that multiple threads, each handling one
+transaction, prepare for commit and then queue up together waiting to do an
+fsync() on the transaction log. Then once the log is available, a single
+thread does the fsync() + other necessary book-keeping for all of the threads
+at once. After this, the single thread signals the other threads that it's
+done and they can finish up and return success (or failure) from the commit
+operation.
+
+So group commit has a parallel part, and a sequential part. So we need a
+facility for engines/binlog to participate in both the parallel and the
+sequential part.
+
+To do this, we add two new handlerton methods:
+
+ int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
+ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
+
+The idea is that the existing prepare() and commit() methods run in the
+parallel part of group commit, and the new prepare_ordered() and
+commit_ordered() run in the sequential part.
+
+The prepare_ordered() method is called after prepare(). The order of
+tranctions that call into prepare_ordered() is guaranteed to be the same among
+all storage engines and binlog, and it is serialised so no two calls can be
+running inside the same engine at the same time.
+
+The commit_ordered() method is called before commit(), and similarly is
+guaranteed to have same transaction order in all participants, and to be
+serialised within one engine.
+
+As the prepare_ordered() and commit_ordered() calls are serialised, the idea
+is that handlers should do the minimum amount of work needed in these calls,
+relaying most of the work (eg. fsync() ...) to prepare() and commit().
+
+As a concrete example, for InnoDB the commit_ordered() method will do the
+first part of commit that fixed the commit order in the transaction log
+buffer, and the commit() method will write the log to disk and fsync()
+it. This split already exists inside the InnoDB code, running before
+respectively after releasing the prepare_commit_mutex.
+
+In addition, the XA transaction coordinator (TC_LOG) is special, since it is
+the one responsible for deciding whether to commit or rollback the
+transaction. For this we need an extra method, since this decision can be done
+only after we know that all prepare() and prepare_ordered() calls succeed, and
+must be done to know whether to call commit_ordered()/commit(), or do rollback.
+
+The existing method for this is TC_LOG::log_xid(). To make implementing group
+commit simpler to implement in a transaction coordinator and more efficient,
+we introduce a new method:
+
+ void group_log_xid(THD *first_thd);
+
+This method runs in the sequential part of group commit. It receives a list of
+transactions to perform log_xid() on, in the correct commit order. (Note that
+TC_LOG can do parallel parts of group commit in its own prepare() and commit()
+methods).
+
+This method can make it easier to implement the group commit in TC_LOG, as it
+gets directly the list of transactions in the right order. Without it, it
+might need to compute such order anyway in a prepare_ordered() method, and the
+server has to create this ordered list anyway to implement the order guarantee
+for prepare_ordered() and commit_ordered().
+
+This group_log_xid() method also is more efficient, as it avoids some
+inter-thread synchronisation. Since group_log_xid() is serialised, we can run
+it together with all the commit_ordered() method calls and need only a single
+sequential code section. With the log_xid() methods, we would need first a
+sequential part for the prepare_ordered() calls, then a parallel part with
+log_xid() calls (to not loose group commit ability for log_xid()), then again
+a sequential part for the commit_ordered() method calls.
+
+The extra synchronisation is needed, as each commit_ordered() call will have
+to wait for log_xid() in one thread (if log_xid() fails then commit_ordered()
+should not be called), and also wait for commit_ordered() to finish in all
+threads handling earlier commits. In effect we will need to bounce the
+execution from one thread to the other among all participants in the group
+commit.
+
+As a consequence of the group_log_xid() optimisation, handlers must be aware
+that the commit_ordered() call can happen in another thread than the one
+running commit() (so thread local storage is not available). This should not
+be a big issue as the THD is available for storing any needed information.
+
+Since group_log_xid() runs for multiple transactions in a single thread, it
+can not do error reporting (my_error()) as that relies on thread local
+storage. Instead it sets an error code in THD::xid_error, and if there is an
+error then later another method will be called (in correct thread context) to
+actually report the error:
+
+ int xid_delayed_error(THD *thd)
+
+The three new methods prepare_ordered(), group_log_xid(), and commit_ordered()
+are optional (as is xid_delayed_error). A storage engine or transaction
+coordinator is free to not implement them if they are not needed. In this case
+there will be no order guarantee for the corresponding stage of group commit
+for that engine. For example, InnoDB needs no ordering of the prepare phase,
+so can omit implementing prepare_ordered(); TC_LOG_MMAP needs no ordering at
+all, so does not need to implement any of them.
+
+Note in particular that all existing engines (/binlog implementations if they
+exist) will work unmodified (and also without any change in group commit
+facilities or commit order guaranteed).
+
+Using these new APIs, the work will be to
+
+ - In ha_commit_trans(), implement the correct semantics for the three new
+ calls.
+
+ - In XtraDB, use the new commit_ordered() call to remove the
+ prepare_commit_mutex (and resurrect group commit) without loosing the
+ consistency with binlog commit order.
+
+ - In log.cc (binlog module), implement group_log_xid() to do group commit of
+ multiple transactions to the binlog with a single shared fsync() call.
+
+-----------------------------------------------------------------------
+Some possible alternative for this worklog:
+
+ - We could eliminate the group_log_xid() method for a simpler API, at the
+ cost of extra synchronisation between threads to do in-order
+ commit_ordered() method calls. This would also allow to call
+ commit_ordered() in the correct thread context.
+
+ - Alternatively, we could eliminate log_xid() and require that all
+ transaction coordinators implement group_log_xid() instead, again for some
+ moderate simplification.
+
+ - At the moment there is no plugin actually using prepare_ordered(), so, it
+ could be removed from the design. But it fits in well, is efficient to
+ implement, and could be useful later (eg. for the requested feature of
+ releasing locks early in InnoDB).
+
+-----------------------------------------------------------------------
+Some possible follow-up projects after this is implemented:
+
+ - Add statistics about how efficient group commit is (#fsyncs/#commits in
+ each engine and binlog).
+
+ - Implement an XtraDB prepare_ordered() methods that can release row locks
+ early (Mark Callaghan from Facebook advocates this, but need to determine
+ exactly how to do this safely).
+
+ - Implement a new crash recovery algorithm that uses the consistent commit
+ ordering to need only fsync() for the binlog. At crash recovery, any
+ missing transactions in an engine is replayed from the correct point in the
+ binlog (this point must be stored transactionally inside the engine, as
+ XtraDB already does today).
+
+ - Implement that START TRANSACTION WITH CONSISTENT SNAPSHOT 1) really gets a
+ consistent snapshow, with same set of committed and not committed
+ transactions in all engines, 2) returns a corresponding consistent binlog
+ position. This should be easy by piggybacking on the synchronisation
+ implemented for ha_commit_trans().
+
+ - Use this in XtraBackup to get consistent binlog position without having to
+ block all updates with FLUSH TABLES WITH READ LOCK.
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High Level Description modified.
--- /tmp/wklog.116.old.14234 2010-05-25 13:18:07.000000000 +0000
+++ /tmp/wklog.116.new.14234 2010-05-25 13:18:07.000000000 +0000
@@ -21,3 +21,69 @@
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
+----
+
+Implementing group commit in MySQL faces some challenges from the handler
+plugin architecture:
+
+1. Because storage engine handlers have separate transaction log from the
+mysql binlog (and from each other), there are multiple fsync() calls per
+commit that need the group commit optimisation (2 per participating storage
+engine + 1 for binlog).
+
+2. The code handling commit is split in several places, in main server code
+and in storage engine code. With pluggable binlog it will be split even
+more. This requires a good abstract yet powerful API to be able to implement
+group commit simply and efficiently in plugins without the different parts
+having to rely on iternals of the others.
+
+3. We want the order of commits to be the same in all engines participating in
+multiple transactions. This requirement is the reason that InnoDB currently
+breaks group commit with the infamous prepare_commit_mutex.
+
+While currently there is no server guarantee to get same commit order in
+engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
+several reasons why this could be desirable:
+
+ - InnoDB hot backup needs to be able to extract a binlog position that is
+ consistent with the hot backup to be able to provision a new slave, and
+ this is impossible without imposing at least partial consistent ordering
+ between InnoDB and binlog.
+
+ - Other backup methods could have similar needs, eg. XtraBackup or
+ `mysqldump --single-transaction`, to have consistent commit order between
+ binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
+ or similar expensive blocking operation. (other backup methods, like LVM
+ snapshot, don't need consistent commit order, as they can restore
+ out-of-order commits during crash recovery using XA).
+
+ - If we have consistent commit order, we can think about optimising commit to
+ need only one fsync (for binlog); lost commits in storage engines can then
+ be recovered from the binlog at crash recovery by re-playing against the
+ engine from a particular point in the binlog.
+
+ - With consistent commit order, we can get better semantics for START
+ TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
+ could even get it to return also a matching binlog position). Currently,
+ this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
+ engines.
+
+ - In InnoDB, the performance in the presense of hotspots can be improved if
+ we can release row locks early in the commit phase, but this requires that we
+release them in
+ the same order as commits in the binlog to ensure consistency between
+ master and slaves.
+
+ - There was some discussions around Galera [1] synchroneous replication and
+ global transaction ID that it needed consistent commit order among
+ participating engines.
+
+ - I believe there could be other applications for guaranteed consistent
+ commit order, and that the architecture described in this worklog can
+ implement such guarantee with reasonable overhead.
+
+
+References:
+
+[1] Galera: http://www.codership.com/products/galera_replication
+
-=-=(Knielsen - Tue, 25 May 2010, 08:28)=-=-
More thoughts on and changes to the archtecture. Got to something now that I am satisfied with and
that seems to be able to handle all issues.
Implement new prepare_ordered and commit_ordered handler methods and the logic in ha_commit_trans().
Implement TC_LOG::group_log_xid() method and logic in ha_commit_trans().
Implement XtraDB part, using commit_ordered() rather than prepare_commit_mutex.
Fix test suite failures.
Proof-of-concept patch series complete now.
Do initial benchmark, getting good results. With 64 threads, see 26x improvement in queries-per-sec.
Next step: write up the architecture description.
Worked 21 hours and estimate 0 hours remain (original estimate increased by 21 hours).
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
----
Implementing group commit in MySQL faces some challenges from the handler
plugin architecture:
1. Because storage engine handlers have separate transaction log from the
mysql binlog (and from each other), there are multiple fsync() calls per
commit that need the group commit optimisation (2 per participating storage
engine + 1 for binlog).
2. The code handling commit is split in several places, in main server code
and in storage engine code. With pluggable binlog it will be split even
more. This requires a good abstract yet powerful API to be able to implement
group commit simply and efficiently in plugins without the different parts
having to rely on iternals of the others.
3. We want the order of commits to be the same in all engines participating in
multiple transactions. This requirement is the reason that InnoDB currently
breaks group commit with the infamous prepare_commit_mutex.
While currently there is no server guarantee to get same commit order in
engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
several reasons why this could be desirable:
- InnoDB hot backup needs to be able to extract a binlog position that is
consistent with the hot backup to be able to provision a new slave, and
this is impossible without imposing at least partial consistent ordering
between InnoDB and binlog.
- Other backup methods could have similar needs, eg. XtraBackup or
`mysqldump --single-transaction`, to have consistent commit order between
binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
or similar expensive blocking operation. (other backup methods, like LVM
snapshot, don't need consistent commit order, as they can restore
out-of-order commits during crash recovery using XA).
- If we have consistent commit order, we can think about optimising commit to
need only one fsync (for binlog); lost commits in storage engines can then
be recovered from the binlog at crash recovery by re-playing against the
engine from a particular point in the binlog.
- With consistent commit order, we can get better semantics for START
TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
could even get it to return also a matching binlog position). Currently,
this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
engines.
- In InnoDB, the performance in the presense of hotspots can be improved if
we can release row locks early in the commit phase, but this requires that we
release them in
the same order as commits in the binlog to ensure consistency between
master and slaves.
- There was some discussions around Galera [1] synchroneous replication and
global transaction ID that it needed consistent commit order among
participating engines.
- I believe there could be other applications for guaranteed consistent
commit order, and that the architecture described in this worklog can
implement such guarantee with reasonable overhead.
References:
[1] Galera: http://www.codership.com/products/galera_replication
HIGH-LEVEL SPECIFICATION:
The basic idea in group commit is that multiple threads, each handling one
transaction, prepare for commit and then queue up together waiting to do an
fsync() on the transaction log. Then once the log is available, a single
thread does the fsync() + other necessary book-keeping for all of the threads
at once. After this, the single thread signals the other threads that it's
done and they can finish up and return success (or failure) from the commit
operation.
So group commit has a parallel part, and a sequential part. So we need a
facility for engines/binlog to participate in both the parallel and the
sequential part.
To do this, we add two new handlerton methods:
int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
The idea is that the existing prepare() and commit() methods run in the
parallel part of group commit, and the new prepare_ordered() and
commit_ordered() run in the sequential part.
The prepare_ordered() method is called after prepare(). The order of
tranctions that call into prepare_ordered() is guaranteed to be the same among
all storage engines and binlog, and it is serialised so no two calls can be
running inside the same engine at the same time.
The commit_ordered() method is called before commit(), and similarly is
guaranteed to have same transaction order in all participants, and to be
serialised within one engine.
As the prepare_ordered() and commit_ordered() calls are serialised, the idea
is that handlers should do the minimum amount of work needed in these calls,
relaying most of the work (eg. fsync() ...) to prepare() and commit().
As a concrete example, for InnoDB the commit_ordered() method will do the
first part of commit that fixed the commit order in the transaction log
buffer, and the commit() method will write the log to disk and fsync()
it. This split already exists inside the InnoDB code, running before
respectively after releasing the prepare_commit_mutex.
In addition, the XA transaction coordinator (TC_LOG) is special, since it is
the one responsible for deciding whether to commit or rollback the
transaction. For this we need an extra method, since this decision can be done
only after we know that all prepare() and prepare_ordered() calls succeed, and
must be done to know whether to call commit_ordered()/commit(), or do rollback.
The existing method for this is TC_LOG::log_xid(). To make implementing group
commit simpler to implement in a transaction coordinator and more efficient,
we introduce a new method:
void group_log_xid(THD *first_thd);
This method runs in the sequential part of group commit. It receives a list of
transactions to perform log_xid() on, in the correct commit order. (Note that
TC_LOG can do parallel parts of group commit in its own prepare() and commit()
methods).
This method can make it easier to implement the group commit in TC_LOG, as it
gets directly the list of transactions in the right order. Without it, it
might need to compute such order anyway in a prepare_ordered() method, and the
server has to create this ordered list anyway to implement the order guarantee
for prepare_ordered() and commit_ordered().
This group_log_xid() method also is more efficient, as it avoids some
inter-thread synchronisation. Since group_log_xid() is serialised, we can run
it together with all the commit_ordered() method calls and need only a single
sequential code section. With the log_xid() methods, we would need first a
sequential part for the prepare_ordered() calls, then a parallel part with
log_xid() calls (to not loose group commit ability for log_xid()), then again
a sequential part for the commit_ordered() method calls.
The extra synchronisation is needed, as each commit_ordered() call will have
to wait for log_xid() in one thread (if log_xid() fails then commit_ordered()
should not be called), and also wait for commit_ordered() to finish in all
threads handling earlier commits. In effect we will need to bounce the
execution from one thread to the other among all participants in the group
commit.
As a consequence of the group_log_xid() optimisation, handlers must be aware
that the commit_ordered() call can happen in another thread than the one
running commit() (so thread local storage is not available). This should not
be a big issue as the THD is available for storing any needed information.
Since group_log_xid() runs for multiple transactions in a single thread, it
can not do error reporting (my_error()) as that relies on thread local
storage. Instead it sets an error code in THD::xid_error, and if there is an
error then later another method will be called (in correct thread context) to
actually report the error:
int xid_delayed_error(THD *thd)
The three new methods prepare_ordered(), group_log_xid(), and commit_ordered()
are optional (as is xid_delayed_error). A storage engine or transaction
coordinator is free to not implement them if they are not needed. In this case
there will be no order guarantee for the corresponding stage of group commit
for that engine. For example, InnoDB needs no ordering of the prepare phase,
so can omit implementing prepare_ordered(); TC_LOG_MMAP needs no ordering at
all, so does not need to implement any of them.
Note in particular that all existing engines (/binlog implementations if they
exist) will work unmodified (and also without any change in group commit
facilities or commit order guaranteed).
Using these new APIs, the work will be to
- In ha_commit_trans(), implement the correct semantics for the three new
calls.
- In XtraDB, use the new commit_ordered() call to remove the
prepare_commit_mutex (and resurrect group commit) without loosing the
consistency with binlog commit order.
- In log.cc (binlog module), implement group_log_xid() to do group commit of
multiple transactions to the binlog with a single shared fsync() call.
-----------------------------------------------------------------------
Some possible alternative for this worklog:
- We could eliminate the group_log_xid() method for a simpler API, at the
cost of extra synchronisation between threads to do in-order
commit_ordered() method calls. This would also allow to call
commit_ordered() in the correct thread context.
- Alternatively, we could eliminate log_xid() and require that all
transaction coordinators implement group_log_xid() instead, again for some
moderate simplification.
- At the moment there is no plugin actually using prepare_ordered(), so, it
could be removed from the design. But it fits in well, is efficient to
implement, and could be useful later (eg. for the requested feature of
releasing locks early in InnoDB).
-----------------------------------------------------------------------
Some possible follow-up projects after this is implemented:
- Add statistics about how efficient group commit is (#fsyncs/#commits in
each engine and binlog).
- Implement an XtraDB prepare_ordered() methods that can release row locks
early (Mark Callaghan from Facebook advocates this, but need to determine
exactly how to do this safely).
- Implement a new crash recovery algorithm that uses the consistent commit
ordering to need only fsync() for the binlog. At crash recovery, any
missing transactions in an engine is replayed from the correct point in the
binlog (this point must be stored transactionally inside the engine, as
XtraDB already does today).
- Implement that START TRANSACTION WITH CONSISTENT SNAPSHOT 1) really gets a
consistent snapshow, with same set of committed and not committed
transactions in all engines, 2) returns a corresponding consistent binlog
position. This should be easy by piggybacking on the synchronisation
implemented for ha_commit_trans().
- Use this in XtraBackup to get consistent binlog position without having to
block all updates with FLUSH TABLES WITH READ LOCK.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 49
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High-Level Specification modified.
--- /tmp/wklog.116.old.14249 2010-05-25 13:18:34.000000000 +0000
+++ /tmp/wklog.116.new.14249 2010-05-25 13:18:34.000000000 +0000
@@ -1 +1,157 @@
+The basic idea in group commit is that multiple threads, each handling one
+transaction, prepare for commit and then queue up together waiting to do an
+fsync() on the transaction log. Then once the log is available, a single
+thread does the fsync() + other necessary book-keeping for all of the threads
+at once. After this, the single thread signals the other threads that it's
+done and they can finish up and return success (or failure) from the commit
+operation.
+
+So group commit has a parallel part, and a sequential part. So we need a
+facility for engines/binlog to participate in both the parallel and the
+sequential part.
+
+To do this, we add two new handlerton methods:
+
+ int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
+ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
+
+The idea is that the existing prepare() and commit() methods run in the
+parallel part of group commit, and the new prepare_ordered() and
+commit_ordered() run in the sequential part.
+
+The prepare_ordered() method is called after prepare(). The order of
+tranctions that call into prepare_ordered() is guaranteed to be the same among
+all storage engines and binlog, and it is serialised so no two calls can be
+running inside the same engine at the same time.
+
+The commit_ordered() method is called before commit(), and similarly is
+guaranteed to have same transaction order in all participants, and to be
+serialised within one engine.
+
+As the prepare_ordered() and commit_ordered() calls are serialised, the idea
+is that handlers should do the minimum amount of work needed in these calls,
+relaying most of the work (eg. fsync() ...) to prepare() and commit().
+
+As a concrete example, for InnoDB the commit_ordered() method will do the
+first part of commit that fixed the commit order in the transaction log
+buffer, and the commit() method will write the log to disk and fsync()
+it. This split already exists inside the InnoDB code, running before
+respectively after releasing the prepare_commit_mutex.
+
+In addition, the XA transaction coordinator (TC_LOG) is special, since it is
+the one responsible for deciding whether to commit or rollback the
+transaction. For this we need an extra method, since this decision can be done
+only after we know that all prepare() and prepare_ordered() calls succeed, and
+must be done to know whether to call commit_ordered()/commit(), or do rollback.
+
+The existing method for this is TC_LOG::log_xid(). To make implementing group
+commit simpler to implement in a transaction coordinator and more efficient,
+we introduce a new method:
+
+ void group_log_xid(THD *first_thd);
+
+This method runs in the sequential part of group commit. It receives a list of
+transactions to perform log_xid() on, in the correct commit order. (Note that
+TC_LOG can do parallel parts of group commit in its own prepare() and commit()
+methods).
+
+This method can make it easier to implement the group commit in TC_LOG, as it
+gets directly the list of transactions in the right order. Without it, it
+might need to compute such order anyway in a prepare_ordered() method, and the
+server has to create this ordered list anyway to implement the order guarantee
+for prepare_ordered() and commit_ordered().
+
+This group_log_xid() method also is more efficient, as it avoids some
+inter-thread synchronisation. Since group_log_xid() is serialised, we can run
+it together with all the commit_ordered() method calls and need only a single
+sequential code section. With the log_xid() methods, we would need first a
+sequential part for the prepare_ordered() calls, then a parallel part with
+log_xid() calls (to not loose group commit ability for log_xid()), then again
+a sequential part for the commit_ordered() method calls.
+
+The extra synchronisation is needed, as each commit_ordered() call will have
+to wait for log_xid() in one thread (if log_xid() fails then commit_ordered()
+should not be called), and also wait for commit_ordered() to finish in all
+threads handling earlier commits. In effect we will need to bounce the
+execution from one thread to the other among all participants in the group
+commit.
+
+As a consequence of the group_log_xid() optimisation, handlers must be aware
+that the commit_ordered() call can happen in another thread than the one
+running commit() (so thread local storage is not available). This should not
+be a big issue as the THD is available for storing any needed information.
+
+Since group_log_xid() runs for multiple transactions in a single thread, it
+can not do error reporting (my_error()) as that relies on thread local
+storage. Instead it sets an error code in THD::xid_error, and if there is an
+error then later another method will be called (in correct thread context) to
+actually report the error:
+
+ int xid_delayed_error(THD *thd)
+
+The three new methods prepare_ordered(), group_log_xid(), and commit_ordered()
+are optional (as is xid_delayed_error). A storage engine or transaction
+coordinator is free to not implement them if they are not needed. In this case
+there will be no order guarantee for the corresponding stage of group commit
+for that engine. For example, InnoDB needs no ordering of the prepare phase,
+so can omit implementing prepare_ordered(); TC_LOG_MMAP needs no ordering at
+all, so does not need to implement any of them.
+
+Note in particular that all existing engines (/binlog implementations if they
+exist) will work unmodified (and also without any change in group commit
+facilities or commit order guaranteed).
+
+Using these new APIs, the work will be to
+
+ - In ha_commit_trans(), implement the correct semantics for the three new
+ calls.
+
+ - In XtraDB, use the new commit_ordered() call to remove the
+ prepare_commit_mutex (and resurrect group commit) without loosing the
+ consistency with binlog commit order.
+
+ - In log.cc (binlog module), implement group_log_xid() to do group commit of
+ multiple transactions to the binlog with a single shared fsync() call.
+
+-----------------------------------------------------------------------
+Some possible alternative for this worklog:
+
+ - We could eliminate the group_log_xid() method for a simpler API, at the
+ cost of extra synchronisation between threads to do in-order
+ commit_ordered() method calls. This would also allow to call
+ commit_ordered() in the correct thread context.
+
+ - Alternatively, we could eliminate log_xid() and require that all
+ transaction coordinators implement group_log_xid() instead, again for some
+ moderate simplification.
+
+ - At the moment there is no plugin actually using prepare_ordered(), so, it
+ could be removed from the design. But it fits in well, is efficient to
+ implement, and could be useful later (eg. for the requested feature of
+ releasing locks early in InnoDB).
+
+-----------------------------------------------------------------------
+Some possible follow-up projects after this is implemented:
+
+ - Add statistics about how efficient group commit is (#fsyncs/#commits in
+ each engine and binlog).
+
+ - Implement an XtraDB prepare_ordered() methods that can release row locks
+ early (Mark Callaghan from Facebook advocates this, but need to determine
+ exactly how to do this safely).
+
+ - Implement a new crash recovery algorithm that uses the consistent commit
+ ordering to need only fsync() for the binlog. At crash recovery, any
+ missing transactions in an engine is replayed from the correct point in the
+ binlog (this point must be stored transactionally inside the engine, as
+ XtraDB already does today).
+
+ - Implement that START TRANSACTION WITH CONSISTENT SNAPSHOT 1) really gets a
+ consistent snapshow, with same set of committed and not committed
+ transactions in all engines, 2) returns a corresponding consistent binlog
+ position. This should be easy by piggybacking on the synchronisation
+ implemented for ha_commit_trans().
+
+ - Use this in XtraBackup to get consistent binlog position without having to
+ block all updates with FLUSH TABLES WITH READ LOCK.
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High Level Description modified.
--- /tmp/wklog.116.old.14234 2010-05-25 13:18:07.000000000 +0000
+++ /tmp/wklog.116.new.14234 2010-05-25 13:18:07.000000000 +0000
@@ -21,3 +21,69 @@
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
+----
+
+Implementing group commit in MySQL faces some challenges from the handler
+plugin architecture:
+
+1. Because storage engine handlers have separate transaction log from the
+mysql binlog (and from each other), there are multiple fsync() calls per
+commit that need the group commit optimisation (2 per participating storage
+engine + 1 for binlog).
+
+2. The code handling commit is split in several places, in main server code
+and in storage engine code. With pluggable binlog it will be split even
+more. This requires a good abstract yet powerful API to be able to implement
+group commit simply and efficiently in plugins without the different parts
+having to rely on iternals of the others.
+
+3. We want the order of commits to be the same in all engines participating in
+multiple transactions. This requirement is the reason that InnoDB currently
+breaks group commit with the infamous prepare_commit_mutex.
+
+While currently there is no server guarantee to get same commit order in
+engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
+several reasons why this could be desirable:
+
+ - InnoDB hot backup needs to be able to extract a binlog position that is
+ consistent with the hot backup to be able to provision a new slave, and
+ this is impossible without imposing at least partial consistent ordering
+ between InnoDB and binlog.
+
+ - Other backup methods could have similar needs, eg. XtraBackup or
+ `mysqldump --single-transaction`, to have consistent commit order between
+ binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
+ or similar expensive blocking operation. (other backup methods, like LVM
+ snapshot, don't need consistent commit order, as they can restore
+ out-of-order commits during crash recovery using XA).
+
+ - If we have consistent commit order, we can think about optimising commit to
+ need only one fsync (for binlog); lost commits in storage engines can then
+ be recovered from the binlog at crash recovery by re-playing against the
+ engine from a particular point in the binlog.
+
+ - With consistent commit order, we can get better semantics for START
+ TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
+ could even get it to return also a matching binlog position). Currently,
+ this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
+ engines.
+
+ - In InnoDB, the performance in the presense of hotspots can be improved if
+ we can release row locks early in the commit phase, but this requires that we
+release them in
+ the same order as commits in the binlog to ensure consistency between
+ master and slaves.
+
+ - There was some discussions around Galera [1] synchroneous replication and
+ global transaction ID that it needed consistent commit order among
+ participating engines.
+
+ - I believe there could be other applications for guaranteed consistent
+ commit order, and that the architecture described in this worklog can
+ implement such guarantee with reasonable overhead.
+
+
+References:
+
+[1] Galera: http://www.codership.com/products/galera_replication
+
-=-=(Knielsen - Tue, 25 May 2010, 08:28)=-=-
More thoughts on and changes to the archtecture. Got to something now that I am satisfied with and
that seems to be able to handle all issues.
Implement new prepare_ordered and commit_ordered handler methods and the logic in ha_commit_trans().
Implement TC_LOG::group_log_xid() method and logic in ha_commit_trans().
Implement XtraDB part, using commit_ordered() rather than prepare_commit_mutex.
Fix test suite failures.
Proof-of-concept patch series complete now.
Do initial benchmark, getting good results. With 64 threads, see 26x improvement in queries-per-sec.
Next step: write up the architecture description.
Worked 21 hours and estimate 0 hours remain (original estimate increased by 21 hours).
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
----
Implementing group commit in MySQL faces some challenges from the handler
plugin architecture:
1. Because storage engine handlers have separate transaction log from the
mysql binlog (and from each other), there are multiple fsync() calls per
commit that need the group commit optimisation (2 per participating storage
engine + 1 for binlog).
2. The code handling commit is split in several places, in main server code
and in storage engine code. With pluggable binlog it will be split even
more. This requires a good abstract yet powerful API to be able to implement
group commit simply and efficiently in plugins without the different parts
having to rely on iternals of the others.
3. We want the order of commits to be the same in all engines participating in
multiple transactions. This requirement is the reason that InnoDB currently
breaks group commit with the infamous prepare_commit_mutex.
While currently there is no server guarantee to get same commit order in
engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
several reasons why this could be desirable:
- InnoDB hot backup needs to be able to extract a binlog position that is
consistent with the hot backup to be able to provision a new slave, and
this is impossible without imposing at least partial consistent ordering
between InnoDB and binlog.
- Other backup methods could have similar needs, eg. XtraBackup or
`mysqldump --single-transaction`, to have consistent commit order between
binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
or similar expensive blocking operation. (other backup methods, like LVM
snapshot, don't need consistent commit order, as they can restore
out-of-order commits during crash recovery using XA).
- If we have consistent commit order, we can think about optimising commit to
need only one fsync (for binlog); lost commits in storage engines can then
be recovered from the binlog at crash recovery by re-playing against the
engine from a particular point in the binlog.
- With consistent commit order, we can get better semantics for START
TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
could even get it to return also a matching binlog position). Currently,
this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
engines.
- In InnoDB, the performance in the presense of hotspots can be improved if
we can release row locks early in the commit phase, but this requires that we
release them in
the same order as commits in the binlog to ensure consistency between
master and slaves.
- There was some discussions around Galera [1] synchroneous replication and
global transaction ID that it needed consistent commit order among
participating engines.
- I believe there could be other applications for guaranteed consistent
commit order, and that the architecture described in this worklog can
implement such guarantee with reasonable overhead.
References:
[1] Galera: http://www.codership.com/products/galera_replication
HIGH-LEVEL SPECIFICATION:
The basic idea in group commit is that multiple threads, each handling one
transaction, prepare for commit and then queue up together waiting to do an
fsync() on the transaction log. Then once the log is available, a single
thread does the fsync() + other necessary book-keeping for all of the threads
at once. After this, the single thread signals the other threads that it's
done and they can finish up and return success (or failure) from the commit
operation.
So group commit has a parallel part, and a sequential part. So we need a
facility for engines/binlog to participate in both the parallel and the
sequential part.
To do this, we add two new handlerton methods:
int (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
The idea is that the existing prepare() and commit() methods run in the
parallel part of group commit, and the new prepare_ordered() and
commit_ordered() run in the sequential part.
The prepare_ordered() method is called after prepare(). The order of
tranctions that call into prepare_ordered() is guaranteed to be the same among
all storage engines and binlog, and it is serialised so no two calls can be
running inside the same engine at the same time.
The commit_ordered() method is called before commit(), and similarly is
guaranteed to have same transaction order in all participants, and to be
serialised within one engine.
As the prepare_ordered() and commit_ordered() calls are serialised, the idea
is that handlers should do the minimum amount of work needed in these calls,
relaying most of the work (eg. fsync() ...) to prepare() and commit().
As a concrete example, for InnoDB the commit_ordered() method will do the
first part of commit that fixed the commit order in the transaction log
buffer, and the commit() method will write the log to disk and fsync()
it. This split already exists inside the InnoDB code, running before
respectively after releasing the prepare_commit_mutex.
In addition, the XA transaction coordinator (TC_LOG) is special, since it is
the one responsible for deciding whether to commit or rollback the
transaction. For this we need an extra method, since this decision can be done
only after we know that all prepare() and prepare_ordered() calls succeed, and
must be done to know whether to call commit_ordered()/commit(), or do rollback.
The existing method for this is TC_LOG::log_xid(). To make implementing group
commit simpler to implement in a transaction coordinator and more efficient,
we introduce a new method:
void group_log_xid(THD *first_thd);
This method runs in the sequential part of group commit. It receives a list of
transactions to perform log_xid() on, in the correct commit order. (Note that
TC_LOG can do parallel parts of group commit in its own prepare() and commit()
methods).
This method can make it easier to implement the group commit in TC_LOG, as it
gets directly the list of transactions in the right order. Without it, it
might need to compute such order anyway in a prepare_ordered() method, and the
server has to create this ordered list anyway to implement the order guarantee
for prepare_ordered() and commit_ordered().
This group_log_xid() method also is more efficient, as it avoids some
inter-thread synchronisation. Since group_log_xid() is serialised, we can run
it together with all the commit_ordered() method calls and need only a single
sequential code section. With the log_xid() methods, we would need first a
sequential part for the prepare_ordered() calls, then a parallel part with
log_xid() calls (to not loose group commit ability for log_xid()), then again
a sequential part for the commit_ordered() method calls.
The extra synchronisation is needed, as each commit_ordered() call will have
to wait for log_xid() in one thread (if log_xid() fails then commit_ordered()
should not be called), and also wait for commit_ordered() to finish in all
threads handling earlier commits. In effect we will need to bounce the
execution from one thread to the other among all participants in the group
commit.
As a consequence of the group_log_xid() optimisation, handlers must be aware
that the commit_ordered() call can happen in another thread than the one
running commit() (so thread local storage is not available). This should not
be a big issue as the THD is available for storing any needed information.
Since group_log_xid() runs for multiple transactions in a single thread, it
can not do error reporting (my_error()) as that relies on thread local
storage. Instead it sets an error code in THD::xid_error, and if there is an
error then later another method will be called (in correct thread context) to
actually report the error:
int xid_delayed_error(THD *thd)
The three new methods prepare_ordered(), group_log_xid(), and commit_ordered()
are optional (as is xid_delayed_error). A storage engine or transaction
coordinator is free to not implement them if they are not needed. In this case
there will be no order guarantee for the corresponding stage of group commit
for that engine. For example, InnoDB needs no ordering of the prepare phase,
so can omit implementing prepare_ordered(); TC_LOG_MMAP needs no ordering at
all, so does not need to implement any of them.
Note in particular that all existing engines (/binlog implementations if they
exist) will work unmodified (and also without any change in group commit
facilities or commit order guaranteed).
Using these new APIs, the work will be to
- In ha_commit_trans(), implement the correct semantics for the three new
calls.
- In XtraDB, use the new commit_ordered() call to remove the
prepare_commit_mutex (and resurrect group commit) without loosing the
consistency with binlog commit order.
- In log.cc (binlog module), implement group_log_xid() to do group commit of
multiple transactions to the binlog with a single shared fsync() call.
-----------------------------------------------------------------------
Some possible alternative for this worklog:
- We could eliminate the group_log_xid() method for a simpler API, at the
cost of extra synchronisation between threads to do in-order
commit_ordered() method calls. This would also allow to call
commit_ordered() in the correct thread context.
- Alternatively, we could eliminate log_xid() and require that all
transaction coordinators implement group_log_xid() instead, again for some
moderate simplification.
- At the moment there is no plugin actually using prepare_ordered(), so, it
could be removed from the design. But it fits in well, is efficient to
implement, and could be useful later (eg. for the requested feature of
releasing locks early in InnoDB).
-----------------------------------------------------------------------
Some possible follow-up projects after this is implemented:
- Add statistics about how efficient group commit is (#fsyncs/#commits in
each engine and binlog).
- Implement an XtraDB prepare_ordered() methods that can release row locks
early (Mark Callaghan from Facebook advocates this, but need to determine
exactly how to do this safely).
- Implement a new crash recovery algorithm that uses the consistent commit
ordering to need only fsync() for the binlog. At crash recovery, any
missing transactions in an engine is replayed from the correct point in the
binlog (this point must be stored transactionally inside the engine, as
XtraDB already does today).
- Implement that START TRANSACTION WITH CONSISTENT SNAPSHOT 1) really gets a
consistent snapshow, with same set of committed and not committed
transactions in all engines, 2) returns a corresponding consistent binlog
position. This should be easy by piggybacking on the synchronisation
implemented for ha_commit_trans().
- Use this in XtraBackup to get consistent binlog position without having to
block all updates with FLUSH TABLES WITH READ LOCK.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 49
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High Level Description modified.
--- /tmp/wklog.116.old.14234 2010-05-25 13:18:07.000000000 +0000
+++ /tmp/wklog.116.new.14234 2010-05-25 13:18:07.000000000 +0000
@@ -21,3 +21,69 @@
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
+----
+
+Implementing group commit in MySQL faces some challenges from the handler
+plugin architecture:
+
+1. Because storage engine handlers have separate transaction log from the
+mysql binlog (and from each other), there are multiple fsync() calls per
+commit that need the group commit optimisation (2 per participating storage
+engine + 1 for binlog).
+
+2. The code handling commit is split in several places, in main server code
+and in storage engine code. With pluggable binlog it will be split even
+more. This requires a good abstract yet powerful API to be able to implement
+group commit simply and efficiently in plugins without the different parts
+having to rely on iternals of the others.
+
+3. We want the order of commits to be the same in all engines participating in
+multiple transactions. This requirement is the reason that InnoDB currently
+breaks group commit with the infamous prepare_commit_mutex.
+
+While currently there is no server guarantee to get same commit order in
+engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
+several reasons why this could be desirable:
+
+ - InnoDB hot backup needs to be able to extract a binlog position that is
+ consistent with the hot backup to be able to provision a new slave, and
+ this is impossible without imposing at least partial consistent ordering
+ between InnoDB and binlog.
+
+ - Other backup methods could have similar needs, eg. XtraBackup or
+ `mysqldump --single-transaction`, to have consistent commit order between
+ binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
+ or similar expensive blocking operation. (other backup methods, like LVM
+ snapshot, don't need consistent commit order, as they can restore
+ out-of-order commits during crash recovery using XA).
+
+ - If we have consistent commit order, we can think about optimising commit to
+ need only one fsync (for binlog); lost commits in storage engines can then
+ be recovered from the binlog at crash recovery by re-playing against the
+ engine from a particular point in the binlog.
+
+ - With consistent commit order, we can get better semantics for START
+ TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
+ could even get it to return also a matching binlog position). Currently,
+ this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
+ engines.
+
+ - In InnoDB, the performance in the presense of hotspots can be improved if
+ we can release row locks early in the commit phase, but this requires that we
+release them in
+ the same order as commits in the binlog to ensure consistency between
+ master and slaves.
+
+ - There was some discussions around Galera [1] synchroneous replication and
+ global transaction ID that it needed consistent commit order among
+ participating engines.
+
+ - I believe there could be other applications for guaranteed consistent
+ commit order, and that the architecture described in this worklog can
+ implement such guarantee with reasonable overhead.
+
+
+References:
+
+[1] Galera: http://www.codership.com/products/galera_replication
+
-=-=(Knielsen - Tue, 25 May 2010, 08:28)=-=-
More thoughts on and changes to the archtecture. Got to something now that I am satisfied with and
that seems to be able to handle all issues.
Implement new prepare_ordered and commit_ordered handler methods and the logic in ha_commit_trans().
Implement TC_LOG::group_log_xid() method and logic in ha_commit_trans().
Implement XtraDB part, using commit_ordered() rather than prepare_commit_mutex.
Fix test suite failures.
Proof-of-concept patch series complete now.
Do initial benchmark, getting good results. With 64 threads, see 26x improvement in queries-per-sec.
Next step: write up the architecture description.
Worked 21 hours and estimate 0 hours remain (original estimate increased by 21 hours).
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
----
Implementing group commit in MySQL faces some challenges from the handler
plugin architecture:
1. Because storage engine handlers have separate transaction log from the
mysql binlog (and from each other), there are multiple fsync() calls per
commit that need the group commit optimisation (2 per participating storage
engine + 1 for binlog).
2. The code handling commit is split in several places, in main server code
and in storage engine code. With pluggable binlog it will be split even
more. This requires a good abstract yet powerful API to be able to implement
group commit simply and efficiently in plugins without the different parts
having to rely on iternals of the others.
3. We want the order of commits to be the same in all engines participating in
multiple transactions. This requirement is the reason that InnoDB currently
breaks group commit with the infamous prepare_commit_mutex.
While currently there is no server guarantee to get same commit order in
engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
several reasons why this could be desirable:
- InnoDB hot backup needs to be able to extract a binlog position that is
consistent with the hot backup to be able to provision a new slave, and
this is impossible without imposing at least partial consistent ordering
between InnoDB and binlog.
- Other backup methods could have similar needs, eg. XtraBackup or
`mysqldump --single-transaction`, to have consistent commit order between
binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
or similar expensive blocking operation. (other backup methods, like LVM
snapshot, don't need consistent commit order, as they can restore
out-of-order commits during crash recovery using XA).
- If we have consistent commit order, we can think about optimising commit to
need only one fsync (for binlog); lost commits in storage engines can then
be recovered from the binlog at crash recovery by re-playing against the
engine from a particular point in the binlog.
- With consistent commit order, we can get better semantics for START
TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
could even get it to return also a matching binlog position). Currently,
this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
engines.
- In InnoDB, the performance in the presense of hotspots can be improved if
we can release row locks early in the commit phase, but this requires that we
release them in
the same order as commits in the binlog to ensure consistency between
master and slaves.
- There was some discussions around Galera [1] synchroneous replication and
global transaction ID that it needed consistent commit order among
participating engines.
- I believe there could be other applications for guaranteed consistent
commit order, and that the architecture described in this worklog can
implement such guarantee with reasonable overhead.
References:
[1] Galera: http://www.codership.com/products/galera_replication
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 49
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 13:18)=-=-
High Level Description modified.
--- /tmp/wklog.116.old.14234 2010-05-25 13:18:07.000000000 +0000
+++ /tmp/wklog.116.new.14234 2010-05-25 13:18:07.000000000 +0000
@@ -21,3 +21,69 @@
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
+----
+
+Implementing group commit in MySQL faces some challenges from the handler
+plugin architecture:
+
+1. Because storage engine handlers have separate transaction log from the
+mysql binlog (and from each other), there are multiple fsync() calls per
+commit that need the group commit optimisation (2 per participating storage
+engine + 1 for binlog).
+
+2. The code handling commit is split in several places, in main server code
+and in storage engine code. With pluggable binlog it will be split even
+more. This requires a good abstract yet powerful API to be able to implement
+group commit simply and efficiently in plugins without the different parts
+having to rely on iternals of the others.
+
+3. We want the order of commits to be the same in all engines participating in
+multiple transactions. This requirement is the reason that InnoDB currently
+breaks group commit with the infamous prepare_commit_mutex.
+
+While currently there is no server guarantee to get same commit order in
+engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
+several reasons why this could be desirable:
+
+ - InnoDB hot backup needs to be able to extract a binlog position that is
+ consistent with the hot backup to be able to provision a new slave, and
+ this is impossible without imposing at least partial consistent ordering
+ between InnoDB and binlog.
+
+ - Other backup methods could have similar needs, eg. XtraBackup or
+ `mysqldump --single-transaction`, to have consistent commit order between
+ binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
+ or similar expensive blocking operation. (other backup methods, like LVM
+ snapshot, don't need consistent commit order, as they can restore
+ out-of-order commits during crash recovery using XA).
+
+ - If we have consistent commit order, we can think about optimising commit to
+ need only one fsync (for binlog); lost commits in storage engines can then
+ be recovered from the binlog at crash recovery by re-playing against the
+ engine from a particular point in the binlog.
+
+ - With consistent commit order, we can get better semantics for START
+ TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
+ could even get it to return also a matching binlog position). Currently,
+ this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
+ engines.
+
+ - In InnoDB, the performance in the presense of hotspots can be improved if
+ we can release row locks early in the commit phase, but this requires that we
+release them in
+ the same order as commits in the binlog to ensure consistency between
+ master and slaves.
+
+ - There was some discussions around Galera [1] synchroneous replication and
+ global transaction ID that it needed consistent commit order among
+ participating engines.
+
+ - I believe there could be other applications for guaranteed consistent
+ commit order, and that the architecture described in this worklog can
+ implement such guarantee with reasonable overhead.
+
+
+References:
+
+[1] Galera: http://www.codership.com/products/galera_replication
+
-=-=(Knielsen - Tue, 25 May 2010, 08:28)=-=-
More thoughts on and changes to the archtecture. Got to something now that I am satisfied with and
that seems to be able to handle all issues.
Implement new prepare_ordered and commit_ordered handler methods and the logic in ha_commit_trans().
Implement TC_LOG::group_log_xid() method and logic in ha_commit_trans().
Implement XtraDB part, using commit_ordered() rather than prepare_commit_mutex.
Fix test suite failures.
Proof-of-concept patch series complete now.
Do initial benchmark, getting good results. With 64 threads, see 26x improvement in queries-per-sec.
Next step: write up the architecture description.
Worked 21 hours and estimate 0 hours remain (original estimate increased by 21 hours).
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
----
Implementing group commit in MySQL faces some challenges from the handler
plugin architecture:
1. Because storage engine handlers have separate transaction log from the
mysql binlog (and from each other), there are multiple fsync() calls per
commit that need the group commit optimisation (2 per participating storage
engine + 1 for binlog).
2. The code handling commit is split in several places, in main server code
and in storage engine code. With pluggable binlog it will be split even
more. This requires a good abstract yet powerful API to be able to implement
group commit simply and efficiently in plugins without the different parts
having to rely on iternals of the others.
3. We want the order of commits to be the same in all engines participating in
multiple transactions. This requirement is the reason that InnoDB currently
breaks group commit with the infamous prepare_commit_mutex.
While currently there is no server guarantee to get same commit order in
engines an binlog (except for the InnoDB prepare_commit_mutex hack), there are
several reasons why this could be desirable:
- InnoDB hot backup needs to be able to extract a binlog position that is
consistent with the hot backup to be able to provision a new slave, and
this is impossible without imposing at least partial consistent ordering
between InnoDB and binlog.
- Other backup methods could have similar needs, eg. XtraBackup or
`mysqldump --single-transaction`, to have consistent commit order between
binlog and storage engines without having to do FLUSH TABLES WITH READ LOCK
or similar expensive blocking operation. (other backup methods, like LVM
snapshot, don't need consistent commit order, as they can restore
out-of-order commits during crash recovery using XA).
- If we have consistent commit order, we can think about optimising commit to
need only one fsync (for binlog); lost commits in storage engines can then
be recovered from the binlog at crash recovery by re-playing against the
engine from a particular point in the binlog.
- With consistent commit order, we can get better semantics for START
TRANSACTION WITH CONSISTENT SNAPSHOT with multi-engine transactions (and we
could even get it to return also a matching binlog position). Currently,
this "CONSISTENT SNAPSHOT" can be inconsistent among multiple storage
engines.
- In InnoDB, the performance in the presense of hotspots can be improved if
we can release row locks early in the commit phase, but this requires that we
release them in
the same order as commits in the binlog to ensure consistency between
master and slaves.
- There was some discussions around Galera [1] synchroneous replication and
global transaction ID that it needed consistent commit order among
participating engines.
- I believe there could be other applications for guaranteed consistent
commit order, and that the architecture described in this worklog can
implement such guarantee with reasonable overhead.
References:
[1] Galera: http://www.codership.com/products/galera_replication
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Rev 2791: Prevent cacheing subqueries with random parameters and side effect functions. in file:///home/bell/maria/bzr/work-maria-5.3-scache/
by sanja@askmonty.org 25 May '10
by sanja@askmonty.org 25 May '10
25 May '10
At file:///home/bell/maria/bzr/work-maria-5.3-scache/
------------------------------------------------------------
revno: 2791
revision-id: sanja(a)askmonty.org-20100525125457-5rwbiihh0vtghdrj
parent: sanja(a)askmonty.org-20100525104536-zw06otfk8ut7fias
committer: sanja(a)askmonty.org
branch nick: work-maria-5.3-scache
timestamp: Tue 2010-05-25 15:54:57 +0300
message:
Prevent cacheing subqueries with random parameters and side effect functions.
=== modified file 'mysql-test/r/subquery_cache.result'
--- a/mysql-test/r/subquery_cache.result 2010-05-25 10:45:36 +0000
+++ b/mysql-test/r/subquery_cache.result 2010-05-25 12:54:57 +0000
@@ -1,4 +1,5 @@
set optimizer_switch='subquery_cache=on';
+flush status;
create table t1 (a int, b int);
insert into t1 values (1,2),(3,4),(1,2),(3,4),(3,4),(4,5),(4,5),(5,6),(5,6),(4,5);
create table t2 (c int, d int);
@@ -552,4 +553,38 @@
POINT(1 1)
POINT(3 3)
DROP TABLE t1;
+#uncacheable queries test (random and side effect)
+flush status;
+CREATE TABLE t1 (a int);
+INSERT INTO t1 VALUES (2), (4), (1), (3);
+select a, a in (select a from t1) from t1 as ext;
+a a in (select a from t1)
+2 1
+4 1
+1 1
+3 1
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 0
+Subquery_cache_miss 4
+select a, a in (select a from t1 where -1 < rand()) from t1 as ext;
+a a in (select a from t1 where -1 < rand())
+2 1
+4 1
+1 1
+3 1
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 0
+Subquery_cache_miss 4
+select a, a in (select a from t1 where -1 < benchmark(a,100)) from t1 as ext;
+a a in (select a from t1 where -1 < benchmark(a,100))
+2 1
+4 1
+1 1
+3 1
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 0
+Subquery_cache_miss 4
set optimizer_switch='subquery_cache=default';
=== modified file 'mysql-test/t/subquery_cache.test'
--- a/mysql-test/t/subquery_cache.test 2010-05-25 10:45:36 +0000
+++ b/mysql-test/t/subquery_cache.test 2010-05-25 12:54:57 +0000
@@ -1,5 +1,6 @@
set optimizer_switch='subquery_cache=on';
+flush status;
create table t1 (a int, b int);
insert into t1 values (1,2),(3,4),(1,2),(3,4),(3,4),(4,5),(4,5),(5,6),(5,6),(4,5);
@@ -188,4 +189,15 @@
DROP TABLE t1;
+--echo #uncacheable queries test (random and side effect)
+flush status;
+CREATE TABLE t1 (a int);
+INSERT INTO t1 VALUES (2), (4), (1), (3);
+select a, a in (select a from t1) from t1 as ext;
+show status like "subquery_cache%";
+select a, a in (select a from t1 where -1 < rand()) from t1 as ext;
+show status like "subquery_cache%";
+select a, a in (select a from t1 where -1 < benchmark(a,100)) from t1 as ext;
+show status like "subquery_cache%";
+
set optimizer_switch='subquery_cache=default';
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc 2010-05-24 17:29:56 +0000
+++ b/sql/item_cmpfunc.cc 2010-05-25 12:54:57 +0000
@@ -1738,7 +1738,9 @@
const_item_cache&= args[1]->const_item();
DBUG_ASSERT(scache == NULL);
if (args[0]->cols() ==1 &&
- thd->variables.optimizer_switch & OPTIMIZER_SWITCH_SUBQUERY_CACHE)
+ thd->variables.optimizer_switch & OPTIMIZER_SWITCH_SUBQUERY_CACHE &&
+ !(sub->engine->uncacheable() & (UNCACHEABLE_RAND |
+ UNCACHEABLE_SIDEEFFECT)))
{
sub->depends_on.push_front((Item**)&cache);
scache= new Subquery_cache_tmptable(thd, sub->depends_on, &result);
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-05-24 17:29:56 +0000
+++ b/sql/item_subselect.cc 2010-05-25 12:54:57 +0000
@@ -760,7 +760,10 @@
(uint)depends_on.elements,
(uint)test(thd->variables.optimizer_switch & OPTIMIZER_SWITCH_SUBQUERY_CACHE)));
engine->fix_length_and_dec(row= &value);
- if (depends_on.elements && optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE))
+ if (depends_on.elements &&
+ optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) &&
+ !(engine->uncacheable() & (UNCACHEABLE_RAND |
+ UNCACHEABLE_SIDEEFFECT)))
{
DBUG_ASSERT(scache == NULL);
scache= new Subquery_cache_tmptable(thd, depends_on, value);
@@ -1100,7 +1103,9 @@
/* We need only 1 row to determine existence */
unit->global_parameters->select_limit= new Item_int((int32) 1);
if (substype() == EXISTS_SUBS && depends_on.elements &&
- optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE))
+ optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE) &&
+ !(engine->uncacheable() & (UNCACHEABLE_RAND |
+ UNCACHEABLE_SIDEEFFECT)))
{
DBUG_ASSERT(scache == NULL);
scache= new Subquery_cache_tmptable(thd, depends_on, &result);
1
0

[Maria-developers] Rev 2790: Fixed sum functions dependency for subqueries in file:///home/bell/maria/bzr/work-maria-5.3-scache/
by sanja@askmonty.org 25 May '10
by sanja@askmonty.org 25 May '10
25 May '10
At file:///home/bell/maria/bzr/work-maria-5.3-scache/
------------------------------------------------------------
revno: 2790
revision-id: sanja(a)askmonty.org-20100525104536-zw06otfk8ut7fias
parent: sanja(a)askmonty.org-20100524172956-7b14x01aodizr3sq
committer: sanja(a)askmonty.org
branch nick: work-maria-5.3-scache
timestamp: Tue 2010-05-25 13:45:36 +0300
message:
Fixed sum functions dependency for subqueries
Forgoten files added.
=== added file 'mysql-test/r/subquery_cache.result'
--- a/mysql-test/r/subquery_cache.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/r/subquery_cache.result 2010-05-25 10:45:36 +0000
@@ -0,0 +1,555 @@
+set optimizer_switch='subquery_cache=on';
+create table t1 (a int, b int);
+insert into t1 values (1,2),(3,4),(1,2),(3,4),(3,4),(4,5),(4,5),(5,6),(5,6),(4,5);
+create table t2 (c int, d int);
+insert into t2 values (2,3),(3,4),(5,6);
+#single value subquery test
+select a, (select d from t2 where b=c) + 1 from t1;
+a (select d from t2 where b=c) + 1
+1 4
+3 NULL
+1 4
+3 NULL
+3 NULL
+4 7
+4 7
+5 NULL
+5 NULL
+4 7
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 6
+Subquery_cache_miss 4
+#single value subquery test (PS)
+prepare stmt1 from 'select a, (select d from t2 where b=c) + 1 from t1';
+execute stmt1;
+a (select d from t2 where b=c) + 1
+1 4
+3 NULL
+1 4
+3 NULL
+3 NULL
+4 7
+4 7
+5 NULL
+5 NULL
+4 7
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 12
+Subquery_cache_miss 8
+execute stmt1;
+a (select d from t2 where b=c) + 1
+1 4
+3 NULL
+1 4
+3 NULL
+3 NULL
+4 7
+4 7
+5 NULL
+5 NULL
+4 7
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 18
+Subquery_cache_miss 12
+deallocate prepare stmt1;
+#single value subquery test (SP)
+CREATE PROCEDURE p1() select a, (select d from t2 where b=c) + 1 from t1;
+call p1;
+a (select d from t2 where b=c) + 1
+1 4
+3 NULL
+1 4
+3 NULL
+3 NULL
+4 7
+4 7
+5 NULL
+5 NULL
+4 7
+call p1;
+a (select d from t2 where b=c) + 1
+1 4
+3 NULL
+1 4
+3 NULL
+3 NULL
+4 7
+4 7
+5 NULL
+5 NULL
+4 7
+drop procedure p1;
+#IN subquery test
+flush status;
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 0
+Subquery_cache_miss 0
+select a, b , b in (select d from t2) as SUBS from t1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 6
+Subquery_cache_miss 4
+insert into t1 values (7,8),(9,NULL);
+select a, b , b in (select d from t2) as SUBS from t1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+7 8 0
+9 NULL NULL
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 12
+Subquery_cache_miss 10
+insert into t2 values (8,NULL);
+select a, b , b in (select d from t2) as SUBS from t1;
+a b SUBS
+1 2 NULL
+3 4 1
+1 2 NULL
+3 4 1
+3 4 1
+4 5 NULL
+4 5 NULL
+5 6 1
+5 6 1
+4 5 NULL
+7 8 NULL
+9 NULL NULL
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 18
+Subquery_cache_miss 16
+#IN subquery tesy (PS)
+delete from t1 where a > 6;
+delete from t2 where c > 6;
+prepare stmt1 from 'select a, b , b in (select d from t2) as SUBS from t1';
+execute stmt1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 24
+Subquery_cache_miss 20
+execute stmt1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 30
+Subquery_cache_miss 24
+insert into t1 values (7,8),(9,NULL);
+execute stmt1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL NULL
+7 8 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 36
+Subquery_cache_miss 30
+execute stmt1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL NULL
+7 8 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 42
+Subquery_cache_miss 36
+insert into t2 values (8,NULL);
+execute stmt1;
+a b SUBS
+1 2 NULL
+3 4 1
+1 2 NULL
+3 4 1
+3 4 1
+4 5 NULL
+4 5 NULL
+5 6 1
+5 6 1
+4 5 NULL
+9 NULL NULL
+7 8 NULL
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 48
+Subquery_cache_miss 42
+execute stmt1;
+a b SUBS
+1 2 NULL
+3 4 1
+1 2 NULL
+3 4 1
+3 4 1
+4 5 NULL
+4 5 NULL
+5 6 1
+5 6 1
+4 5 NULL
+9 NULL NULL
+7 8 NULL
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 54
+Subquery_cache_miss 48
+deallocate prepare stmt1;
+#IN subquery tesy (SP)
+delete from t1 where a > 6;
+delete from t2 where c > 6;
+CREATE PROCEDURE p1() select a, b , b in (select d from t2) as SUBS from t1;
+call p1();
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 60
+Subquery_cache_miss 52
+call p1();
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 66
+Subquery_cache_miss 56
+insert into t1 values (7,8),(9,NULL);
+call p1();
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL NULL
+7 8 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 72
+Subquery_cache_miss 62
+call p1();
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL NULL
+7 8 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 78
+Subquery_cache_miss 68
+insert into t2 values (8,NULL);
+call p1();
+a b SUBS
+1 2 NULL
+3 4 1
+1 2 NULL
+3 4 1
+3 4 1
+4 5 NULL
+4 5 NULL
+5 6 1
+5 6 1
+4 5 NULL
+9 NULL NULL
+7 8 NULL
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 84
+Subquery_cache_miss 74
+call p1();
+a b SUBS
+1 2 NULL
+3 4 1
+1 2 NULL
+3 4 1
+3 4 1
+4 5 NULL
+4 5 NULL
+5 6 1
+5 6 1
+4 5 NULL
+9 NULL NULL
+7 8 NULL
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 90
+Subquery_cache_miss 80
+drop procedure p1;
+# test of simple exists
+select a, b , exists (select * from t2 where b=d) as SUBS from t1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL 0
+7 8 0
+# test of prepared statement exists
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 96
+Subquery_cache_miss 86
+prepare stmt1 from 'select a, b , exists (select * from t2 where b=d) as SUBS from t1';
+execute stmt1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL 0
+7 8 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 102
+Subquery_cache_miss 92
+execute stmt1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL 0
+7 8 0
+show status like "subquery_cache%";
+Variable_name Value
+Subquery_cache_hit 108
+Subquery_cache_miss 98
+deallocate prepare stmt1;
+# test of stored procedure exists
+CREATE PROCEDURE p1() select a, b , exists (select * from t2 where b=d) as SUBS from t1;
+call p1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL 0
+7 8 0
+call p1;
+a b SUBS
+1 2 0
+3 4 1
+1 2 0
+3 4 1
+3 4 1
+4 5 0
+4 5 0
+5 6 1
+5 6 1
+4 5 0
+9 NULL 0
+7 8 0
+drop procedure p1;
+#clean up
+drop table t1,t2;
+test different types
+#int
+CREATE TABLE t1 ( a int, b int);
+INSERT INTO t1 VALUES(1,1),(2,2),(3,3);
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = 2);
+a
+1
+3
+DROP TABLE t1;
+#char
+CREATE TABLE t1 ( a char(1), b char (1));
+INSERT INTO t1 VALUES('1','1'),('2','2'),('3','3');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2');
+a
+1
+3
+DROP TABLE t1;
+#decimal
+CREATE TABLE t1 ( a decimal(3,1), b decimal(3,1));
+INSERT INTO t1 VALUES(1,1),(2,2),(3,3);
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = 2);
+a
+1.0
+3.0
+DROP TABLE t1;
+#date
+CREATE TABLE t1 ( a date, b date);
+INSERT INTO t1 VALUES('1000-01-01','1000-01-01'),('2000-02-01','2000-02-01'),('3000-03-03','3000-03-03');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2000-02-01');
+a
+1000-01-01
+3000-03-03
+DROP TABLE t1;
+#datetime
+CREATE TABLE t1 ( a datetime, b datetime);
+INSERT INTO t1 VALUES('1000-01-01 01:01:01','1000-01-01 01:01:01'),('2000-02-02 02:02:02','2000-02-02 02:02:02'),('3000-03-03 03:03:03','3000-03-03 03:03:03');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2000-02-02 02:02:02');
+a
+1000-01-01 01:01:01
+3000-03-03 03:03:03
+DROP TABLE t1;
+#time
+CREATE TABLE t1 ( a time, b time);
+INSERT INTO t1 VALUES('01:01:01','01:01:01'),('02:02:02','02:02:02'),('03:03:03','03:03:03');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '02:02:02');
+a
+01:01:01
+03:03:03
+DROP TABLE t1;
+#timestamp
+CREATE TABLE t1 ( a timestamp, b timestamp);
+INSERT INTO t1 VALUES('2000-02-02 01:01:01','2000-02-02 01:01:01'),('2000-02-02 02:02:02','2000-02-02 02:02:02'),('2000-02-02 03:03:03','2000-02-02 03:03:03');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2000-02-02 02:02:02');
+a
+2000-02-02 01:01:01
+2000-02-02 03:03:03
+DROP TABLE t1;
+#bit
+CREATE TABLE t1 ( a bit(20), b bit(20));
+INSERT INTO t1 VALUES(1,1),(2,2),(3,3);
+SELECT a+0 FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = 2);
+a+0
+1
+3
+DROP TABLE t1;
+#enum
+CREATE TABLE t1 ( a enum('1','2','3'), b enum('1','2','3'));
+INSERT INTO t1 VALUES('1','1'),('2','2'),('3','3');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2');
+a
+1
+3
+DROP TABLE t1;
+#set
+CREATE TABLE t1 ( a set('1','2','3'), b set('1','2','3'));
+INSERT INTO t1 VALUES('1','1'),('2','2'),('3','3');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2');
+a
+1
+3
+DROP TABLE t1;
+#blob
+CREATE TABLE t1 ( a blob, b blob);
+INSERT INTO t1 VALUES('1','1'),('2','2'),('3','3');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2');
+a
+1
+3
+DROP TABLE t1;
+#geometry
+CREATE TABLE t1 ( a geometry, b geometry);
+INSERT INTO t1 VALUES(POINT(1,1),POINT(1,1)),(POINT(2,2),POINT(2,2)),(POINT(3,3),POINT(3,3));
+SELECT astext(a) FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = POINT(2,2));
+astext(a)
+POINT(1 1)
+POINT(3 3)
+DROP TABLE t1;
+set optimizer_switch='subquery_cache=default';
=== added file 'mysql-test/t/subquery_cache.test'
--- a/mysql-test/t/subquery_cache.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/t/subquery_cache.test 2010-05-25 10:45:36 +0000
@@ -0,0 +1,191 @@
+
+set optimizer_switch='subquery_cache=on';
+
+create table t1 (a int, b int);
+insert into t1 values (1,2),(3,4),(1,2),(3,4),(3,4),(4,5),(4,5),(5,6),(5,6),(4,5);
+create table t2 (c int, d int);
+insert into t2 values (2,3),(3,4),(5,6);
+
+--echo #single value subquery test
+select a, (select d from t2 where b=c) + 1 from t1;
+
+show status like "subquery_cache%";
+
+--echo #single value subquery test (PS)
+prepare stmt1 from 'select a, (select d from t2 where b=c) + 1 from t1';
+execute stmt1;
+show status like "subquery_cache%";
+execute stmt1;
+show status like "subquery_cache%";
+deallocate prepare stmt1;
+
+--echo #single value subquery test (SP)
+CREATE PROCEDURE p1() select a, (select d from t2 where b=c) + 1 from t1;
+
+call p1;
+call p1;
+
+drop procedure p1;
+
+--echo #IN subquery test
+flush status;
+
+show status like "subquery_cache%";
+select a, b , b in (select d from t2) as SUBS from t1;
+show status like "subquery_cache%";
+
+insert into t1 values (7,8),(9,NULL);
+select a, b , b in (select d from t2) as SUBS from t1;
+show status like "subquery_cache%";
+
+insert into t2 values (8,NULL);
+select a, b , b in (select d from t2) as SUBS from t1;
+show status like "subquery_cache%";
+
+--echo #IN subquery tesy (PS)
+delete from t1 where a > 6;
+delete from t2 where c > 6;
+
+prepare stmt1 from 'select a, b , b in (select d from t2) as SUBS from t1';
+execute stmt1;
+show status like "subquery_cache%";
+execute stmt1;
+show status like "subquery_cache%";
+
+insert into t1 values (7,8),(9,NULL);
+execute stmt1;
+show status like "subquery_cache%";
+execute stmt1;
+show status like "subquery_cache%";
+
+insert into t2 values (8,NULL);
+execute stmt1;
+show status like "subquery_cache%";
+execute stmt1;
+show status like "subquery_cache%";
+
+deallocate prepare stmt1;
+
+
+--echo #IN subquery tesy (SP)
+delete from t1 where a > 6;
+delete from t2 where c > 6;
+
+CREATE PROCEDURE p1() select a, b , b in (select d from t2) as SUBS from t1;
+
+call p1();
+show status like "subquery_cache%";
+call p1();
+show status like "subquery_cache%";
+
+insert into t1 values (7,8),(9,NULL);
+call p1();
+show status like "subquery_cache%";
+call p1();
+show status like "subquery_cache%";
+
+insert into t2 values (8,NULL);
+call p1();
+show status like "subquery_cache%";
+call p1();
+show status like "subquery_cache%";
+
+drop procedure p1;
+
+
+--echo # test of simple exists
+select a, b , exists (select * from t2 where b=d) as SUBS from t1;
+
+--echo # test of prepared statement exists
+show status like "subquery_cache%";
+prepare stmt1 from 'select a, b , exists (select * from t2 where b=d) as SUBS from t1';
+execute stmt1;
+show status like "subquery_cache%";
+execute stmt1;
+show status like "subquery_cache%";
+deallocate prepare stmt1;
+
+--echo # test of stored procedure exists
+CREATE PROCEDURE p1() select a, b , exists (select * from t2 where b=d) as SUBS from t1;
+call p1;
+call p1;
+drop procedure p1;
+
+--echo #clean up
+drop table t1,t2;
+
+--echo test different types
+--echo #int
+CREATE TABLE t1 ( a int, b int);
+INSERT INTO t1 VALUES(1,1),(2,2),(3,3);
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = 2);
+DROP TABLE t1;
+
+--echo #char
+CREATE TABLE t1 ( a char(1), b char (1));
+INSERT INTO t1 VALUES('1','1'),('2','2'),('3','3');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2');
+DROP TABLE t1;
+
+--echo #decimal
+CREATE TABLE t1 ( a decimal(3,1), b decimal(3,1));
+INSERT INTO t1 VALUES(1,1),(2,2),(3,3);
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = 2);
+DROP TABLE t1;
+
+--echo #date
+CREATE TABLE t1 ( a date, b date);
+INSERT INTO t1 VALUES('1000-01-01','1000-01-01'),('2000-02-01','2000-02-01'),('3000-03-03','3000-03-03');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2000-02-01');
+DROP TABLE t1;
+
+--echo #datetime
+CREATE TABLE t1 ( a datetime, b datetime);
+INSERT INTO t1 VALUES('1000-01-01 01:01:01','1000-01-01 01:01:01'),('2000-02-02 02:02:02','2000-02-02 02:02:02'),('3000-03-03 03:03:03','3000-03-03 03:03:03');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2000-02-02 02:02:02');
+DROP TABLE t1;
+
+--echo #time
+CREATE TABLE t1 ( a time, b time);
+INSERT INTO t1 VALUES('01:01:01','01:01:01'),('02:02:02','02:02:02'),('03:03:03','03:03:03');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '02:02:02');
+DROP TABLE t1;
+
+--echo #timestamp
+CREATE TABLE t1 ( a timestamp, b timestamp);
+INSERT INTO t1 VALUES('2000-02-02 01:01:01','2000-02-02 01:01:01'),('2000-02-02 02:02:02','2000-02-02 02:02:02'),('2000-02-02 03:03:03','2000-02-02 03:03:03');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2000-02-02 02:02:02');
+DROP TABLE t1;
+
+--echo #bit
+CREATE TABLE t1 ( a bit(20), b bit(20));
+INSERT INTO t1 VALUES(1,1),(2,2),(3,3);
+SELECT a+0 FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = 2);
+DROP TABLE t1;
+
+--echo #enum
+CREATE TABLE t1 ( a enum('1','2','3'), b enum('1','2','3'));
+INSERT INTO t1 VALUES('1','1'),('2','2'),('3','3');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2');
+DROP TABLE t1;
+
+--echo #set
+CREATE TABLE t1 ( a set('1','2','3'), b set('1','2','3'));
+INSERT INTO t1 VALUES('1','1'),('2','2'),('3','3');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2');
+DROP TABLE t1;
+
+--echo #blob
+CREATE TABLE t1 ( a blob, b blob);
+INSERT INTO t1 VALUES('1','1'),('2','2'),('3','3');
+SELECT a FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = '2');
+DROP TABLE t1;
+
+--echo #geometry
+CREATE TABLE t1 ( a geometry, b geometry);
+INSERT INTO t1 VALUES(POINT(1,1),POINT(1,1)),(POINT(2,2),POINT(2,2)),(POINT(3,3),POINT(3,3));
+SELECT astext(a) FROM t1 WHERE NOT a IN (SELECT a FROM t1 WHERE b = POINT(2,2));
+DROP TABLE t1;
+
+
+set optimizer_switch='subquery_cache=default';
=== modified file 'sql/item_sum.cc'
--- a/sql/item_sum.cc 2010-03-20 12:01:47 +0000
+++ b/sql/item_sum.cc 2010-05-25 10:45:36 +0000
@@ -319,6 +319,7 @@
if (aggr_level >= 0)
{
ref_by= ref;
+ thd->lex->current_select->register_dependency_item(aggr_sel, ref);
/* Add the object to the list of registered objects assigned to aggr_sel */
if (!aggr_sel->inner_sum_func_list)
next= this;
=== added file 'sql/sql_subquery_cache.cc'
--- a/sql/sql_subquery_cache.cc 1970-01-01 00:00:00 +0000
+++ b/sql/sql_subquery_cache.cc 2010-05-25 10:45:36 +0000
@@ -0,0 +1,319 @@
+
+#include "mysql_priv.h"
+#include "sql_select.h"
+
+ulonglong subquery_cache_miss, subquery_cache_hit;
+
+/**
+ Creates structures which we need for index look up
+
+ @retval FALSE OK
+ @retval TRUE Error
+*/
+
+static my_bool createtmp_table_search_structures(THD *thd,
+ TABLE *table,
+ List_iterator_fast<Item> &li,
+ TABLE_REF **ref)
+{
+ /*
+ Create/initialize everything we will need to index lookups into the
+ temptable.
+ */
+ TABLE_REF *tab_ref;
+ KEY *tmp_key; /* The only index on the temporary table. */
+ Item *item;
+ uint tmp_key_parts; /* Number of keyparts in tmp_key. */
+ uint i;
+
+ DBUG_ENTER("createtmp_table_search_structures");
+
+ tmp_key= table->key_info;
+ tmp_key_parts= tmp_key->key_parts;
+
+ if (!(tab_ref= (TABLE_REF*) thd->alloc(sizeof(TABLE_REF))))
+ DBUG_RETURN(TRUE);
+
+ tab_ref->key= 0; /* The only temp table index. */
+ tab_ref->key_length= tmp_key->key_length;
+ if (!(tab_ref->key_buff=
+ (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+ !(tab_ref->key_copy=
+ (store_key**) thd->alloc((sizeof(store_key*) *
+ (tmp_key_parts + 1)))) ||
+ !(tab_ref->items=
+ (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+ DBUG_RETURN(TRUE); /* purecov: inspected */
+
+ tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length);
+ tab_ref->key_err=1;
+ tab_ref->null_rejecting= 1;
+ tab_ref->disable_cache= FALSE;
+ tab_ref->has_record= 0;
+
+ KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+ store_key **ref_key= tab_ref->key_copy;
+ uchar *cur_ref_buff= tab_ref->key_buff;
+
+ for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+ {
+ item= li++;
+ DBUG_ASSERT(item);
+ tab_ref->items[i]= item;
+ int null_count= test(cur_key_part->field->real_maybe_null());
+ *ref_key= new store_key_item(thd, cur_key_part->field,
+ /* TODO:
+ the NULL byte is taken into account in
+ cur_key_part->store_length, so instead of
+ cur_ref_buff + test(maybe_null), we could
+ use that information instead.
+ */
+ cur_ref_buff + null_count,
+ null_count ? tab_ref->key_buff : 0,
+ cur_key_part->length, tab_ref->items[i]);
+ cur_ref_buff+= cur_key_part->store_length;
+ }
+ *ref_key= NULL; /* End marker. */
+ tab_ref->key_err= 1;
+ tab_ref->key_parts= tmp_key_parts;
+ *ref= tab_ref;
+
+ DBUG_RETURN(FALSE);
+}
+
+
+Subquery_cache_tmptable::Subquery_cache_tmptable(THD *thd,
+ List<Item*> &dependance,
+ Item *value)
+ :cache_table(NULL), table_thd(thd), list(&dependance), val(value),
+ equalities(NULL), inited (0)
+{
+ DBUG_ENTER("Subquery_cache_tmptable::Subquery_cache_tmptable");
+ DBUG_VOID_RETURN;
+};
+
+
+/**
+ Creates equalities expression.
+
+ @retval FALSE OK
+ @retval TRUE Error
+*/
+
+bool Subquery_cache_tmptable::make_equalities()
+{
+ List<Item> args;
+ List_iterator_fast<Item*> li(*list);
+ Item **ref;
+ Name_resolution_context *cn= NULL;
+ DBUG_ENTER("Subquery_cache_tmptable::make_equalities");
+
+ for (uint i= 1 /* skip result filed */; (ref= li++); i++)
+ {
+ Field *fld= cache_table->field[i];
+ if (fld->type() == MYSQL_TYPE_VARCHAR ||
+ fld->type() == MYSQL_TYPE_TINY_BLOB ||
+ fld->type() == MYSQL_TYPE_MEDIUM_BLOB ||
+ fld->type() == MYSQL_TYPE_LONG_BLOB ||
+ fld->type() == MYSQL_TYPE_BLOB ||
+ fld->type() == MYSQL_TYPE_VAR_STRING ||
+ fld->type() == MYSQL_TYPE_STRING ||
+ fld->type() == MYSQL_TYPE_NEWDECIMAL ||
+ fld->type() == MYSQL_TYPE_DECIMAL)
+ {
+ if (!cn)
+ {
+ // dummy resolution context
+ cn= new Name_resolution_context();
+ cn->init();
+ }
+ args.push_front(new Item_func_eq(new Item_ref(cn, ref, "", "", FALSE),
+ new Item_field(fld)));
+ }
+ }
+ if (args.elements == 1)
+ equalities= args.head();
+ else
+ equalities= new Item_cond_and(args);
+
+ DBUG_RETURN(equalities->fix_fields(table_thd, &equalities));
+}
+
+void Subquery_cache_tmptable::init()
+{
+ ulonglong keymap;
+ List_iterator_fast<Item*> li(*list);
+ List_iterator_fast<Item> li_items(items);
+ Item **item;
+ DBUG_ENTER("Subquery_cache_tmptable::init");
+ DBUG_ASSERT(!inited);
+ inited= TRUE;
+
+ if (!(ULONGLONG_MAX >> (list->elements + 1)))
+ {
+ DBUG_PRINT("info", ("Too many dependencies"));
+ DBUG_VOID_RETURN;
+ }
+
+ cache_table= NULL;
+ while ((item= li++))
+ {
+ DBUG_ASSERT(item);
+ DBUG_ASSERT(*item);
+ DBUG_ASSERT((*item)->fixed);
+ items.push_back((*item));
+ }
+
+ cache_table_param.init();
+ /* dependance items and result */
+ cache_table_param.field_count= list->elements + 1;
+ /* postpone table creation to index description */
+ cache_table_param.skip_create_table= 1;
+
+
+ items.push_front(val);
+ if (!(cache_table= create_tmp_table(table_thd, &cache_table_param,
+ items, (ORDER*) NULL,
+ FALSE, FALSE,
+ (table_thd->options |
+ TMP_TABLE_ALL_COLUMNS),
+ HA_POS_ERROR,
+ (char *)"subquery-cache-table")))
+ {
+ DBUG_PRINT("error", ("create_tmp_table failed, caching switched off"));
+ DBUG_VOID_RETURN;
+ }
+
+ if (cache_table->s->blob_fields)
+ {
+ DBUG_PRINT("error", ("we do not need blobs"));
+ goto error;
+ }
+
+ /* makes all bits set for keys */
+ keymap= 1 << (items.elements); /* + 1 - 1 */
+ if (!keymap)
+ keymap= ULONGLONG_MAX;
+ else
+ keymap--;
+ keymap&=~1;
+
+ li_items++;
+ if (cache_table->alloc_keys(1) ||
+ (cache_table->add_tmp_key(keymap, "cache-table-key") < 0) ||
+ createtmp_table_search_structures(table_thd, cache_table, li_items,
+ &tab_ref) ||
+ !(tab= create_index_lookup_join_tab(cache_table)))
+ {
+ DBUG_PRINT("error", ("creating index failed"));
+ goto error;
+ }
+ cache_table->s->keys= 1;
+ cache_table->s->uniques= 1;
+
+ if (open_tmp_table(cache_table))
+ {
+ DBUG_PRINT("error", ("Opening (creating) temporary table failed"));
+ goto error;
+ }
+
+ if (!(chached_result= new Item_field(cache_table->field[0])))
+ {
+ DBUG_PRINT("error", ("Creating Item_field failed"));
+ goto error;
+ }
+ if (make_equalities())
+ {
+ DBUG_PRINT("error", ("Creating equalities failed"));
+ goto error;
+ }
+
+ DBUG_VOID_RETURN;
+
+error:
+ /* switch off cache */
+ free_tmp_table(table_thd, cache_table);
+ cache_table= NULL;
+ DBUG_VOID_RETURN;
+}
+
+
+Subquery_cache_tmptable::~Subquery_cache_tmptable()
+{
+ if (cache_table)
+ free_tmp_table(table_thd, cache_table);
+}
+
+
+Subquery_cache::result Subquery_cache_tmptable::check_value(Item **value)
+{
+ int res;
+ DBUG_ENTER("Subquery_cache_tmptable::check_value");
+
+ if (!inited)
+ init();
+
+ if (cache_table)
+ {
+ DBUG_PRINT("info", ("status: %u has_record %u",
+ (uint)cache_table->status, (uint)tab_ref->has_record));
+ if ((res= join_read_key2(table_thd, tab, cache_table, tab_ref)) == 1)
+ DBUG_RETURN(ERROR);
+ if (res || (equalities && !equalities->val_int()))
+ {
+ subquery_cache_miss++;
+ DBUG_RETURN(MISS);
+ }
+
+ subquery_cache_hit++;
+ *value= chached_result;
+ DBUG_RETURN(Subquery_cache::HIT);
+ }
+ DBUG_RETURN(Subquery_cache::MISS);
+}
+
+
+my_bool Subquery_cache_tmptable::put_value(Item *value)
+{
+ int error;
+ DBUG_ENTER("Subquery_cache_tmptable::put_value");
+ DBUG_ASSERT(inited);
+
+ if (!cache_table)
+ {
+ DBUG_PRINT("info", ("No table so behave as we successfully put value"));
+ DBUG_RETURN(FALSE);
+ }
+
+ *(items.head_ref())= value;
+ fill_record(table_thd, cache_table->field, items, 1);
+ if (table_thd->is_error())
+ goto err;;
+
+ if ((error= cache_table->file->ha_write_row(cache_table->record[0])))
+ {
+ /* create_myisam_from_heap will generate error if needed */
+ if (cache_table->file->is_fatal_error(error, HA_CHECK_DUP) &&
+ create_internal_tmp_table_from_heap(table_thd, cache_table,
+ cache_table_param.start_recinfo,
+ &cache_table_param.recinfo,
+ error, 1))
+ goto err;
+ }
+ cache_table->status= 0; /* cache_table->record contains an existed record */
+ tab_ref->has_record= TRUE; /* the same as above */
+ DBUG_PRINT("info", ("has_record: TRUE status: 0"));
+
+ DBUG_RETURN(FALSE);
+
+err:
+ free_tmp_table(table_thd, cache_table);
+ cache_table= NULL;
+ DBUG_RETURN(TRUE);
+}
+
+
+void Subquery_cache_tmptable::cleanup()
+{
+ cache_table->file->ha_delete_all_rows();
+}
=== added file 'sql/sql_subquery_cache.h'
--- a/sql/sql_subquery_cache.h 1970-01-01 00:00:00 +0000
+++ b/sql/sql_subquery_cache.h 2010-05-25 10:45:36 +0000
@@ -0,0 +1,79 @@
+#ifndef _SQL_SUBQUERY_CACHE_H_
+#define _SQL_SUBQUERY_CACHE_H_
+
+/**
+ Interface for subquery cache
+*/
+
+extern ulonglong subquery_cache_miss, subquery_cache_hit;
+
+class Subquery_cache :public Sql_alloc
+{
+public:
+ enum result {ERROR, HIT, MISS};
+
+ Subquery_cache(){};
+ virtual ~Subquery_cache() {};
+ /**
+ Checks presence of the key (taken from cache owner) and if found return
+ it via value parameter
+ */
+ virtual result check_value(Item **value)= 0;
+ /**
+ Puts value into this cache (key should be taken from cache owner)
+ */
+ virtual my_bool put_value(Item *value)= 0;
+ /**
+ Cleans up and reset cache before reusing
+ */
+ virtual void cleanup()= 0;
+};
+
+struct st_table_ref;
+struct st_join_table;
+//class Item_cache;
+class Item_field;
+
+/**
+ Implementation of subquery cache over temporary table
+*/
+
+class Subquery_cache_tmptable :public Subquery_cache
+{
+public:
+ Subquery_cache_tmptable(THD *thd, List<Item*> &dependance, Item *value);
+ virtual ~Subquery_cache_tmptable();
+ virtual result check_value(Item **value);
+ virtual my_bool put_value(Item *value);
+ virtual void cleanup();
+ void init();
+
+private:
+ bool make_equalities();
+
+ /* tmp table parameters */
+ TMP_TABLE_PARAM cache_table_param;
+ /* temporary table to store this cache */
+ TABLE *cache_table;
+ /* Thread handler for the temporary table */
+ THD *table_thd;
+ /* tab_ref for index search */
+ struct st_table_ref *tab_ref;
+ /* cache of subquery value to avoid evaluating it twice */
+ //Item_cache *value_cache;
+ /* JOIN_TAB for index lookup */
+ st_join_table *tab;
+ /* Chached result */
+ Item_field *chached_result;
+ /* List of references to items */
+ List<Item*> *list;
+ /* List of items */
+ List<Item> items;
+ /* Value Item example */
+ Item *val;
+ /* Expression to check after index lookup */
+ Item *equalities;
+ /* set if structures are inited */
+ bool inited;
+};
+#endif
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 31
ESTIMATE.......: 4 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 08:29)=-=-
Help debug strange problem in mysqlbinlog.test.
Worked 1 hour and estimate 4 hours remain (original estimate unchanged).
-=-=(Knielsen - Mon, 17 May 2010, 08:45)=-=-
Merge with latest trunk and run Buildbot tests.
Worked 1 hour and estimate 5 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
------------------------------------------------------------
-=-=(View All Progress Notes, 31 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 31
ESTIMATE.......: 4 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 08:29)=-=-
Help debug strange problem in mysqlbinlog.test.
Worked 1 hour and estimate 4 hours remain (original estimate unchanged).
-=-=(Knielsen - Mon, 17 May 2010, 08:45)=-=-
Merge with latest trunk and run Buildbot tests.
Worked 1 hour and estimate 5 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
------------------------------------------------------------
-=-=(View All Progress Notes, 31 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 31
ESTIMATE.......: 4 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 08:29)=-=-
Help debug strange problem in mysqlbinlog.test.
Worked 1 hour and estimate 4 hours remain (original estimate unchanged).
-=-=(Knielsen - Mon, 17 May 2010, 08:45)=-=-
Merge with latest trunk and run Buildbot tests.
Worked 1 hour and estimate 5 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
------------------------------------------------------------
-=-=(View All Progress Notes, 31 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 31
ESTIMATE.......: 4 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 08:29)=-=-
Help debug strange problem in mysqlbinlog.test.
Worked 1 hour and estimate 4 hours remain (original estimate unchanged).
-=-=(Knielsen - Mon, 17 May 2010, 08:45)=-=-
Merge with latest trunk and run Buildbot tests.
Worked 1 hour and estimate 5 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
------------------------------------------------------------
-=-=(View All Progress Notes, 31 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 49
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 08:28)=-=-
More thoughts on and changes to the archtecture. Got to something now that I am satisfied with and
that seems to be able to handle all issues.
Implement new prepare_ordered and commit_ordered handler methods and the logic in ha_commit_trans().
Implement TC_LOG::group_log_xid() method and logic in ha_commit_trans().
Implement XtraDB part, using commit_ordered() rather than prepare_commit_mutex.
Fix test suite failures.
Proof-of-concept patch series complete now.
Do initial benchmark, getting good results. With 64 threads, see 26x improvement in queries-per-sec.
Next step: write up the architecture description.
Worked 21 hours and estimate 0 hours remain (original estimate increased by 21 hours).
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 49
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Tue, 25 May 2010, 08:28)=-=-
More thoughts on and changes to the archtecture. Got to something now that I am satisfied with and
that seems to be able to handle all issues.
Implement new prepare_ordered and commit_ordered handler methods and the logic in ha_commit_trans().
Implement TC_LOG::group_log_xid() method and logic in ha_commit_trans().
Implement XtraDB part, using commit_ordered() rather than prepare_commit_mutex.
Fix test suite failures.
Proof-of-concept patch series complete now.
Do initial benchmark, getting good results. With 64 threads, see 26x improvement in queries-per-sec.
Next step: write up the architecture description.
Worked 21 hours and estimate 0 hours remain (original estimate increased by 21 hours).
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Psergey): Push conditions down into non-mergeable VIEWs when possible (119)
by worklog-noreply@askmonty.org 25 May '10
by worklog-noreply@askmonty.org 25 May '10
25 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Push conditions down into non-mergeable VIEWs when possible
CREATION DATE..: Mon, 24 May 2010, 20:52
SUPERVISOR.....: Igor
IMPLEMENTOR....:
COPIES TO......: Psergey, Timour
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 119 (http://askmonty.org/worklog/?tid=119)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Psergey - Mon, 24 May 2010, 20:59)=-=-
High-Level Specification modified.
--- /tmp/wklog.119.old.25116 2010-05-24 20:59:40.000000000 +0000
+++ /tmp/wklog.119.new.25116 2010-05-24 20:59:40.000000000 +0000
@@ -1 +1,113 @@
+<contents>
+HLS
+1. Problems to be addressed in this WL
+2. Pushdown of conditions into non-mergeable VIEWs
+2.1 A note about VIEWs with ORDER BY ... LIMIT
+2.2 What condition can be pushed
+3. Pushdown from HAVING into WHERE
+4. When to do the pushdown
+5. Other things to take care of
+
+
+</contents>
+
+1. Problems to be addressed in this WL
+======================================
+The problem actually consists of two parts:
+1. Condition on VIEW columns are not pushed down into VIEWs.
+2. Even if conditions were pushed, they would have been put into VIEW's
+HAVING clause, which would not give enough of speedup. In order to get a
+real speedup, the optimizer must be able to move relevant part of HAVING
+into WHERE (and then use it for further optimizations) in order to provide
+the desired speedup. Note that HAVING->WHERE condition move is orthogonal
+to VIEW processing.
+
+2. Pushdown of conditions into non-mergeable VIEWs
+==================================================
+We can push a condition into non-mergeable VIEW when VIEW's top-most operation
+is selection (i.e., filtering). This is true, for example, when the VIEW is
+defined as
+
+ SELECT select_list FROM from_clause [WHERE where_cond] [HAVING having_cond]
+
+and not true when the VIEW is defined as
+
+ SELECT select_list FROM from_clause [WHERE where_cond] ORDER BY expr LIMIT n
+
+Generalizing the above, we arrive at the following rule:
+
+ For non-mergeable VIEWs,
+ - pushdown must not be done if VIEW uses ORDER BY .. LIMIT
+ - when pushdown is done, the pushed condition should be added to the WHERE
+ clause.
+
+Note: In scope of this WL, we will not hande VIEWs that have UNION [ALL] as
+top operation.
+
+(TODO: what about SELECT DISTINCT?)
+(TODO: pushdown down into IN subqueries?)
+
+2.1 A note about VIEWs with ORDER BY ... LIMIT
+----------------------------------------------
+Although it is not possible to push a condition below the ORDER BY ... LIMIT
+operation, there is still some benefit from checking the condition early as
+that would allow to avoid writing non-matching rows into temporary table.
+
+We could do that if we introduced a post-ORDERBY selection operation. That
+operation would also allow to support ORDER BY ... LIMIT inside subqueries
+(we don't currently support those because default subquery strategy,
+IN->EXISTS rewrite, also needs to push a condition into subquery).
+
+2.2 What condition can be pushed
+--------------------------------
+Assuming simplify_joins() operation has done normalization:
+* If the VIEW is in top-level join list, or inside a semi-join that's in
+ top-level join list, we can push parts of WHERE condition.
+* If the VIEW is inside an outer join, we can push it's ON expression.
+
+We can reuse make_cond_for_index()/make_remainder_cond() code to extract part
+of condition that can be pushed, and the remainder, respectively.
+
+Pushability criteria for an atomic (i.e. not AND/OR) condition is that
+
+ the condition only uses VIEW's fields.
+
+(TODO: what about fields of const tables? Do we have const tables already
+retrived by the time VIEW is materialized? If yes, we could push down
+expressions that refer to const tables, too)
+
+3. Pushdown from HAVING into WHERE
+==================================
+The idea is:
+
+ Parts of HAVING that refer to columns/expressions we're doing GROUP BY on
+ can be put into WHERE.
+
+(TODO: do we need to handle case of grouping over expressions?)
+
+(TODO: when moving expression for HAVING to WHERE, do we need
+to do something with it? Replace all Item_ref objects with items that
+they refer to?
+ - In case of referring to expression, do we get
+ Item_ref(where_clause_expr) or expr( Item_ref(..), .., Item_ref(..))?
+)
+
+4. When to do the pushdown
+==========================
+In order to do pushdown, we must have prepare phase finished
+for both parent (so that we can make sense of its WHERE condition) and
+child (so that we know what it has in its select list).
+
+We can do pushdown before we've done join optimization (i.e. choose_plan()
+call) of the parent.
+
+We must do pushdown before we've done JOIN::optimize() of the child
+(in particular, it must be done before we do update_ref_and_keys() and
+range analysis in the child).
+
+
+5. Other things to take care of
+===============================
+* Pushing down fulltext predicates (it seems one needs to "register" a
+ fulltext predicate when it is moved from one select from another? Ask Serg)
DESCRIPTION:
There are complaints (see links below) about cases with non-mergeable
VIEW (because it has a GROUP BY), a query that has restrictions on
the grouped column, and poor performance that is caused by VIEW
processing code ignoring the restriction.
This WL is about addressing this issue.
links to complaints:
http://code.openark.org/blog/mysql/views-better-performance-with-condition-…
http://www.mysqlperformanceblog.com/2010/05/19/a-workaround-for-the-perform…
The target version is MariaDB 5.3+, because it has late optimization/execution
for FROM-subqueries/non mergeable VIEWs, which makes it much more feasible to
inject something into VIEW before it is optimized/executed.
HIGH-LEVEL SPECIFICATION:
<contents>
HLS
1. Problems to be addressed in this WL
2. Pushdown of conditions into non-mergeable VIEWs
2.1 A note about VIEWs with ORDER BY ... LIMIT
2.2 What condition can be pushed
3. Pushdown from HAVING into WHERE
4. When to do the pushdown
5. Other things to take care of
</contents>
1. Problems to be addressed in this WL
======================================
The problem actually consists of two parts:
1. Condition on VIEW columns are not pushed down into VIEWs.
2. Even if conditions were pushed, they would have been put into VIEW's
HAVING clause, which would not give enough of speedup. In order to get a
real speedup, the optimizer must be able to move relevant part of HAVING
into WHERE (and then use it for further optimizations) in order to provide
the desired speedup. Note that HAVING->WHERE condition move is orthogonal
to VIEW processing.
2. Pushdown of conditions into non-mergeable VIEWs
==================================================
We can push a condition into non-mergeable VIEW when VIEW's top-most operation
is selection (i.e., filtering). This is true, for example, when the VIEW is
defined as
SELECT select_list FROM from_clause [WHERE where_cond] [HAVING having_cond]
and not true when the VIEW is defined as
SELECT select_list FROM from_clause [WHERE where_cond] ORDER BY expr LIMIT n
Generalizing the above, we arrive at the following rule:
For non-mergeable VIEWs,
- pushdown must not be done if VIEW uses ORDER BY .. LIMIT
- when pushdown is done, the pushed condition should be added to the WHERE
clause.
Note: In scope of this WL, we will not hande VIEWs that have UNION [ALL] as
top operation.
(TODO: what about SELECT DISTINCT?)
(TODO: pushdown down into IN subqueries?)
2.1 A note about VIEWs with ORDER BY ... LIMIT
----------------------------------------------
Although it is not possible to push a condition below the ORDER BY ... LIMIT
operation, there is still some benefit from checking the condition early as
that would allow to avoid writing non-matching rows into temporary table.
We could do that if we introduced a post-ORDERBY selection operation. That
operation would also allow to support ORDER BY ... LIMIT inside subqueries
(we don't currently support those because default subquery strategy,
IN->EXISTS rewrite, also needs to push a condition into subquery).
2.2 What condition can be pushed
--------------------------------
Assuming simplify_joins() operation has done normalization:
* If the VIEW is in top-level join list, or inside a semi-join that's in
top-level join list, we can push parts of WHERE condition.
* If the VIEW is inside an outer join, we can push it's ON expression.
We can reuse make_cond_for_index()/make_remainder_cond() code to extract part
of condition that can be pushed, and the remainder, respectively.
Pushability criteria for an atomic (i.e. not AND/OR) condition is that
the condition only uses VIEW's fields.
(TODO: what about fields of const tables? Do we have const tables already
retrived by the time VIEW is materialized? If yes, we could push down
expressions that refer to const tables, too)
3. Pushdown from HAVING into WHERE
==================================
The idea is:
Parts of HAVING that refer to columns/expressions we're doing GROUP BY on
can be put into WHERE.
(TODO: do we need to handle case of grouping over expressions?)
(TODO: when moving expression for HAVING to WHERE, do we need
to do something with it? Replace all Item_ref objects with items that
they refer to?
- In case of referring to expression, do we get
Item_ref(where_clause_expr) or expr( Item_ref(..), .., Item_ref(..))?
)
4. When to do the pushdown
==========================
In order to do pushdown, we must have prepare phase finished
for both parent (so that we can make sense of its WHERE condition) and
child (so that we know what it has in its select list).
We can do pushdown before we've done join optimization (i.e. choose_plan()
call) of the parent.
We must do pushdown before we've done JOIN::optimize() of the child
(in particular, it must be done before we do update_ref_and_keys() and
range analysis in the child).
5. Other things to take care of
===============================
* Pushing down fulltext predicates (it seems one needs to "register" a
fulltext predicate when it is moved from one select from another? Ask Serg)
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
3
2

[Maria-developers] Updated (by Psergey): Push conditions down into non-mergeable VIEWs when possible (119)
by worklog-noreply@askmonty.org 24 May '10
by worklog-noreply@askmonty.org 24 May '10
24 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Push conditions down into non-mergeable VIEWs when possible
CREATION DATE..: Mon, 24 May 2010, 20:52
SUPERVISOR.....: Igor
IMPLEMENTOR....:
COPIES TO......: Psergey, Timour
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 119 (http://askmonty.org/worklog/?tid=119)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Psergey - Mon, 24 May 2010, 20:59)=-=-
High-Level Specification modified.
--- /tmp/wklog.119.old.25116 2010-05-24 20:59:40.000000000 +0000
+++ /tmp/wklog.119.new.25116 2010-05-24 20:59:40.000000000 +0000
@@ -1 +1,113 @@
+<contents>
+HLS
+1. Problems to be addressed in this WL
+2. Pushdown of conditions into non-mergeable VIEWs
+2.1 A note about VIEWs with ORDER BY ... LIMIT
+2.2 What condition can be pushed
+3. Pushdown from HAVING into WHERE
+4. When to do the pushdown
+5. Other things to take care of
+
+
+</contents>
+
+1. Problems to be addressed in this WL
+======================================
+The problem actually consists of two parts:
+1. Condition on VIEW columns are not pushed down into VIEWs.
+2. Even if conditions were pushed, they would have been put into VIEW's
+HAVING clause, which would not give enough of speedup. In order to get a
+real speedup, the optimizer must be able to move relevant part of HAVING
+into WHERE (and then use it for further optimizations) in order to provide
+the desired speedup. Note that HAVING->WHERE condition move is orthogonal
+to VIEW processing.
+
+2. Pushdown of conditions into non-mergeable VIEWs
+==================================================
+We can push a condition into non-mergeable VIEW when VIEW's top-most operation
+is selection (i.e., filtering). This is true, for example, when the VIEW is
+defined as
+
+ SELECT select_list FROM from_clause [WHERE where_cond] [HAVING having_cond]
+
+and not true when the VIEW is defined as
+
+ SELECT select_list FROM from_clause [WHERE where_cond] ORDER BY expr LIMIT n
+
+Generalizing the above, we arrive at the following rule:
+
+ For non-mergeable VIEWs,
+ - pushdown must not be done if VIEW uses ORDER BY .. LIMIT
+ - when pushdown is done, the pushed condition should be added to the WHERE
+ clause.
+
+Note: In scope of this WL, we will not hande VIEWs that have UNION [ALL] as
+top operation.
+
+(TODO: what about SELECT DISTINCT?)
+(TODO: pushdown down into IN subqueries?)
+
+2.1 A note about VIEWs with ORDER BY ... LIMIT
+----------------------------------------------
+Although it is not possible to push a condition below the ORDER BY ... LIMIT
+operation, there is still some benefit from checking the condition early as
+that would allow to avoid writing non-matching rows into temporary table.
+
+We could do that if we introduced a post-ORDERBY selection operation. That
+operation would also allow to support ORDER BY ... LIMIT inside subqueries
+(we don't currently support those because default subquery strategy,
+IN->EXISTS rewrite, also needs to push a condition into subquery).
+
+2.2 What condition can be pushed
+--------------------------------
+Assuming simplify_joins() operation has done normalization:
+* If the VIEW is in top-level join list, or inside a semi-join that's in
+ top-level join list, we can push parts of WHERE condition.
+* If the VIEW is inside an outer join, we can push it's ON expression.
+
+We can reuse make_cond_for_index()/make_remainder_cond() code to extract part
+of condition that can be pushed, and the remainder, respectively.
+
+Pushability criteria for an atomic (i.e. not AND/OR) condition is that
+
+ the condition only uses VIEW's fields.
+
+(TODO: what about fields of const tables? Do we have const tables already
+retrived by the time VIEW is materialized? If yes, we could push down
+expressions that refer to const tables, too)
+
+3. Pushdown from HAVING into WHERE
+==================================
+The idea is:
+
+ Parts of HAVING that refer to columns/expressions we're doing GROUP BY on
+ can be put into WHERE.
+
+(TODO: do we need to handle case of grouping over expressions?)
+
+(TODO: when moving expression for HAVING to WHERE, do we need
+to do something with it? Replace all Item_ref objects with items that
+they refer to?
+ - In case of referring to expression, do we get
+ Item_ref(where_clause_expr) or expr( Item_ref(..), .., Item_ref(..))?
+)
+
+4. When to do the pushdown
+==========================
+In order to do pushdown, we must have prepare phase finished
+for both parent (so that we can make sense of its WHERE condition) and
+child (so that we know what it has in its select list).
+
+We can do pushdown before we've done join optimization (i.e. choose_plan()
+call) of the parent.
+
+We must do pushdown before we've done JOIN::optimize() of the child
+(in particular, it must be done before we do update_ref_and_keys() and
+range analysis in the child).
+
+
+5. Other things to take care of
+===============================
+* Pushing down fulltext predicates (it seems one needs to "register" a
+ fulltext predicate when it is moved from one select from another? Ask Serg)
DESCRIPTION:
There are complaints (see links below) about cases with non-mergeable
VIEW (because it has a GROUP BY), a query that has restrictions on
the grouped column, and poor performance that is caused by VIEW
processing code ignoring the restriction.
This WL is about addressing this issue.
links to complaints:
http://code.openark.org/blog/mysql/views-better-performance-with-condition-…
http://www.mysqlperformanceblog.com/2010/05/19/a-workaround-for-the-perform…
The target version is MariaDB 5.3+, because it has late optimization/execution
for FROM-subqueries/non mergeable VIEWs, which makes it much more feasible to
inject something into VIEW before it is optimized/executed.
HIGH-LEVEL SPECIFICATION:
<contents>
HLS
1. Problems to be addressed in this WL
2. Pushdown of conditions into non-mergeable VIEWs
2.1 A note about VIEWs with ORDER BY ... LIMIT
2.2 What condition can be pushed
3. Pushdown from HAVING into WHERE
4. When to do the pushdown
5. Other things to take care of
</contents>
1. Problems to be addressed in this WL
======================================
The problem actually consists of two parts:
1. Condition on VIEW columns are not pushed down into VIEWs.
2. Even if conditions were pushed, they would have been put into VIEW's
HAVING clause, which would not give enough of speedup. In order to get a
real speedup, the optimizer must be able to move relevant part of HAVING
into WHERE (and then use it for further optimizations) in order to provide
the desired speedup. Note that HAVING->WHERE condition move is orthogonal
to VIEW processing.
2. Pushdown of conditions into non-mergeable VIEWs
==================================================
We can push a condition into non-mergeable VIEW when VIEW's top-most operation
is selection (i.e., filtering). This is true, for example, when the VIEW is
defined as
SELECT select_list FROM from_clause [WHERE where_cond] [HAVING having_cond]
and not true when the VIEW is defined as
SELECT select_list FROM from_clause [WHERE where_cond] ORDER BY expr LIMIT n
Generalizing the above, we arrive at the following rule:
For non-mergeable VIEWs,
- pushdown must not be done if VIEW uses ORDER BY .. LIMIT
- when pushdown is done, the pushed condition should be added to the WHERE
clause.
Note: In scope of this WL, we will not hande VIEWs that have UNION [ALL] as
top operation.
(TODO: what about SELECT DISTINCT?)
(TODO: pushdown down into IN subqueries?)
2.1 A note about VIEWs with ORDER BY ... LIMIT
----------------------------------------------
Although it is not possible to push a condition below the ORDER BY ... LIMIT
operation, there is still some benefit from checking the condition early as
that would allow to avoid writing non-matching rows into temporary table.
We could do that if we introduced a post-ORDERBY selection operation. That
operation would also allow to support ORDER BY ... LIMIT inside subqueries
(we don't currently support those because default subquery strategy,
IN->EXISTS rewrite, also needs to push a condition into subquery).
2.2 What condition can be pushed
--------------------------------
Assuming simplify_joins() operation has done normalization:
* If the VIEW is in top-level join list, or inside a semi-join that's in
top-level join list, we can push parts of WHERE condition.
* If the VIEW is inside an outer join, we can push it's ON expression.
We can reuse make_cond_for_index()/make_remainder_cond() code to extract part
of condition that can be pushed, and the remainder, respectively.
Pushability criteria for an atomic (i.e. not AND/OR) condition is that
the condition only uses VIEW's fields.
(TODO: what about fields of const tables? Do we have const tables already
retrived by the time VIEW is materialized? If yes, we could push down
expressions that refer to const tables, too)
3. Pushdown from HAVING into WHERE
==================================
The idea is:
Parts of HAVING that refer to columns/expressions we're doing GROUP BY on
can be put into WHERE.
(TODO: do we need to handle case of grouping over expressions?)
(TODO: when moving expression for HAVING to WHERE, do we need
to do something with it? Replace all Item_ref objects with items that
they refer to?
- In case of referring to expression, do we get
Item_ref(where_clause_expr) or expr( Item_ref(..), .., Item_ref(..))?
)
4. When to do the pushdown
==========================
In order to do pushdown, we must have prepare phase finished
for both parent (so that we can make sense of its WHERE condition) and
child (so that we know what it has in its select list).
We can do pushdown before we've done join optimization (i.e. choose_plan()
call) of the parent.
We must do pushdown before we've done JOIN::optimize() of the child
(in particular, it must be done before we do update_ref_and_keys() and
range analysis in the child).
5. Other things to take care of
===============================
* Pushing down fulltext predicates (it seems one needs to "register" a
fulltext predicate when it is moved from one select from another? Ask Serg)
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Psergey): Push conditions down into non-mergeable VIEWs when possible (119)
by worklog-noreply@askmonty.org 24 May '10
by worklog-noreply@askmonty.org 24 May '10
24 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Push conditions down into non-mergeable VIEWs when possible
CREATION DATE..: Mon, 24 May 2010, 20:52
SUPERVISOR.....: Igor
IMPLEMENTOR....:
COPIES TO......: Psergey, Timour
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 119 (http://askmonty.org/worklog/?tid=119)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Psergey - Mon, 24 May 2010, 20:59)=-=-
High-Level Specification modified.
--- /tmp/wklog.119.old.25116 2010-05-24 20:59:40.000000000 +0000
+++ /tmp/wklog.119.new.25116 2010-05-24 20:59:40.000000000 +0000
@@ -1 +1,113 @@
+<contents>
+HLS
+1. Problems to be addressed in this WL
+2. Pushdown of conditions into non-mergeable VIEWs
+2.1 A note about VIEWs with ORDER BY ... LIMIT
+2.2 What condition can be pushed
+3. Pushdown from HAVING into WHERE
+4. When to do the pushdown
+5. Other things to take care of
+
+
+</contents>
+
+1. Problems to be addressed in this WL
+======================================
+The problem actually consists of two parts:
+1. Condition on VIEW columns are not pushed down into VIEWs.
+2. Even if conditions were pushed, they would have been put into VIEW's
+HAVING clause, which would not give enough of speedup. In order to get a
+real speedup, the optimizer must be able to move relevant part of HAVING
+into WHERE (and then use it for further optimizations) in order to provide
+the desired speedup. Note that HAVING->WHERE condition move is orthogonal
+to VIEW processing.
+
+2. Pushdown of conditions into non-mergeable VIEWs
+==================================================
+We can push a condition into non-mergeable VIEW when VIEW's top-most operation
+is selection (i.e., filtering). This is true, for example, when the VIEW is
+defined as
+
+ SELECT select_list FROM from_clause [WHERE where_cond] [HAVING having_cond]
+
+and not true when the VIEW is defined as
+
+ SELECT select_list FROM from_clause [WHERE where_cond] ORDER BY expr LIMIT n
+
+Generalizing the above, we arrive at the following rule:
+
+ For non-mergeable VIEWs,
+ - pushdown must not be done if VIEW uses ORDER BY .. LIMIT
+ - when pushdown is done, the pushed condition should be added to the WHERE
+ clause.
+
+Note: In scope of this WL, we will not hande VIEWs that have UNION [ALL] as
+top operation.
+
+(TODO: what about SELECT DISTINCT?)
+(TODO: pushdown down into IN subqueries?)
+
+2.1 A note about VIEWs with ORDER BY ... LIMIT
+----------------------------------------------
+Although it is not possible to push a condition below the ORDER BY ... LIMIT
+operation, there is still some benefit from checking the condition early as
+that would allow to avoid writing non-matching rows into temporary table.
+
+We could do that if we introduced a post-ORDERBY selection operation. That
+operation would also allow to support ORDER BY ... LIMIT inside subqueries
+(we don't currently support those because default subquery strategy,
+IN->EXISTS rewrite, also needs to push a condition into subquery).
+
+2.2 What condition can be pushed
+--------------------------------
+Assuming simplify_joins() operation has done normalization:
+* If the VIEW is in top-level join list, or inside a semi-join that's in
+ top-level join list, we can push parts of WHERE condition.
+* If the VIEW is inside an outer join, we can push it's ON expression.
+
+We can reuse make_cond_for_index()/make_remainder_cond() code to extract part
+of condition that can be pushed, and the remainder, respectively.
+
+Pushability criteria for an atomic (i.e. not AND/OR) condition is that
+
+ the condition only uses VIEW's fields.
+
+(TODO: what about fields of const tables? Do we have const tables already
+retrived by the time VIEW is materialized? If yes, we could push down
+expressions that refer to const tables, too)
+
+3. Pushdown from HAVING into WHERE
+==================================
+The idea is:
+
+ Parts of HAVING that refer to columns/expressions we're doing GROUP BY on
+ can be put into WHERE.
+
+(TODO: do we need to handle case of grouping over expressions?)
+
+(TODO: when moving expression for HAVING to WHERE, do we need
+to do something with it? Replace all Item_ref objects with items that
+they refer to?
+ - In case of referring to expression, do we get
+ Item_ref(where_clause_expr) or expr( Item_ref(..), .., Item_ref(..))?
+)
+
+4. When to do the pushdown
+==========================
+In order to do pushdown, we must have prepare phase finished
+for both parent (so that we can make sense of its WHERE condition) and
+child (so that we know what it has in its select list).
+
+We can do pushdown before we've done join optimization (i.e. choose_plan()
+call) of the parent.
+
+We must do pushdown before we've done JOIN::optimize() of the child
+(in particular, it must be done before we do update_ref_and_keys() and
+range analysis in the child).
+
+
+5. Other things to take care of
+===============================
+* Pushing down fulltext predicates (it seems one needs to "register" a
+ fulltext predicate when it is moved from one select from another? Ask Serg)
DESCRIPTION:
There are complaints (see links below) about cases with non-mergeable
VIEW (because it has a GROUP BY), a query that has restrictions on
the grouped column, and poor performance that is caused by VIEW
processing code ignoring the restriction.
This WL is about addressing this issue.
links to complaints:
http://code.openark.org/blog/mysql/views-better-performance-with-condition-…
http://www.mysqlperformanceblog.com/2010/05/19/a-workaround-for-the-perform…
The target version is MariaDB 5.3+, because it has late optimization/execution
for FROM-subqueries/non mergeable VIEWs, which makes it much more feasible to
inject something into VIEW before it is optimized/executed.
HIGH-LEVEL SPECIFICATION:
<contents>
HLS
1. Problems to be addressed in this WL
2. Pushdown of conditions into non-mergeable VIEWs
2.1 A note about VIEWs with ORDER BY ... LIMIT
2.2 What condition can be pushed
3. Pushdown from HAVING into WHERE
4. When to do the pushdown
5. Other things to take care of
</contents>
1. Problems to be addressed in this WL
======================================
The problem actually consists of two parts:
1. Condition on VIEW columns are not pushed down into VIEWs.
2. Even if conditions were pushed, they would have been put into VIEW's
HAVING clause, which would not give enough of speedup. In order to get a
real speedup, the optimizer must be able to move relevant part of HAVING
into WHERE (and then use it for further optimizations) in order to provide
the desired speedup. Note that HAVING->WHERE condition move is orthogonal
to VIEW processing.
2. Pushdown of conditions into non-mergeable VIEWs
==================================================
We can push a condition into non-mergeable VIEW when VIEW's top-most operation
is selection (i.e., filtering). This is true, for example, when the VIEW is
defined as
SELECT select_list FROM from_clause [WHERE where_cond] [HAVING having_cond]
and not true when the VIEW is defined as
SELECT select_list FROM from_clause [WHERE where_cond] ORDER BY expr LIMIT n
Generalizing the above, we arrive at the following rule:
For non-mergeable VIEWs,
- pushdown must not be done if VIEW uses ORDER BY .. LIMIT
- when pushdown is done, the pushed condition should be added to the WHERE
clause.
Note: In scope of this WL, we will not hande VIEWs that have UNION [ALL] as
top operation.
(TODO: what about SELECT DISTINCT?)
(TODO: pushdown down into IN subqueries?)
2.1 A note about VIEWs with ORDER BY ... LIMIT
----------------------------------------------
Although it is not possible to push a condition below the ORDER BY ... LIMIT
operation, there is still some benefit from checking the condition early as
that would allow to avoid writing non-matching rows into temporary table.
We could do that if we introduced a post-ORDERBY selection operation. That
operation would also allow to support ORDER BY ... LIMIT inside subqueries
(we don't currently support those because default subquery strategy,
IN->EXISTS rewrite, also needs to push a condition into subquery).
2.2 What condition can be pushed
--------------------------------
Assuming simplify_joins() operation has done normalization:
* If the VIEW is in top-level join list, or inside a semi-join that's in
top-level join list, we can push parts of WHERE condition.
* If the VIEW is inside an outer join, we can push it's ON expression.
We can reuse make_cond_for_index()/make_remainder_cond() code to extract part
of condition that can be pushed, and the remainder, respectively.
Pushability criteria for an atomic (i.e. not AND/OR) condition is that
the condition only uses VIEW's fields.
(TODO: what about fields of const tables? Do we have const tables already
retrived by the time VIEW is materialized? If yes, we could push down
expressions that refer to const tables, too)
3. Pushdown from HAVING into WHERE
==================================
The idea is:
Parts of HAVING that refer to columns/expressions we're doing GROUP BY on
can be put into WHERE.
(TODO: do we need to handle case of grouping over expressions?)
(TODO: when moving expression for HAVING to WHERE, do we need
to do something with it? Replace all Item_ref objects with items that
they refer to?
- In case of referring to expression, do we get
Item_ref(where_clause_expr) or expr( Item_ref(..), .., Item_ref(..))?
)
4. When to do the pushdown
==========================
In order to do pushdown, we must have prepare phase finished
for both parent (so that we can make sense of its WHERE condition) and
child (so that we know what it has in its select list).
We can do pushdown before we've done join optimization (i.e. choose_plan()
call) of the parent.
We must do pushdown before we've done JOIN::optimize() of the child
(in particular, it must be done before we do update_ref_and_keys() and
range analysis in the child).
5. Other things to take care of
===============================
* Pushing down fulltext predicates (it seems one needs to "register" a
fulltext predicate when it is moved from one select from another? Ask Serg)
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] New (by Psergey): Push conditions down into non-mergeable VIEWs when possible (119)
by worklog-noreply@askmonty.org 24 May '10
by worklog-noreply@askmonty.org 24 May '10
24 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Push conditions down into non-mergeable VIEWs when possible
CREATION DATE..: Mon, 24 May 2010, 20:52
SUPERVISOR.....: Igor
IMPLEMENTOR....:
COPIES TO......: Psergey, Timour
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 119 (http://askmonty.org/worklog/?tid=119)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
DESCRIPTION:
There are complaints (see links below) about cases with non-mergeable
VIEW (because it has a GROUP BY), a query that has restrictions on
the grouped column, and poor performance that is caused by VIEW
processing code ignoring the restriction.
This WL is about addressing this issue.
links to complaints:
http://code.openark.org/blog/mysql/views-better-performance-with-condition-…
http://www.mysqlperformanceblog.com/2010/05/19/a-workaround-for-the-perform…
The target version is MariaDB 5.3+, because it has late optimization/execution
for FROM-subqueries/non mergeable VIEWs, which makes it much more feasible to
inject something into VIEW before it is optimized/executed.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] New (by Psergey): Push conditions down into non-mergeable VIEWs when possible (119)
by worklog-noreply@askmonty.org 24 May '10
by worklog-noreply@askmonty.org 24 May '10
24 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Push conditions down into non-mergeable VIEWs when possible
CREATION DATE..: Mon, 24 May 2010, 20:52
SUPERVISOR.....: Igor
IMPLEMENTOR....:
COPIES TO......: Psergey, Timour
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 119 (http://askmonty.org/worklog/?tid=119)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
DESCRIPTION:
There are complaints (see links below) about cases with non-mergeable
VIEW (because it has a GROUP BY), a query that has restrictions on
the grouped column, and poor performance that is caused by VIEW
processing code ignoring the restriction.
This WL is about addressing this issue.
links to complaints:
http://code.openark.org/blog/mysql/views-better-performance-with-condition-…
http://www.mysqlperformanceblog.com/2010/05/19/a-workaround-for-the-perform…
The target version is MariaDB 5.3+, because it has late optimization/execution
for FROM-subqueries/non mergeable VIEWs, which makes it much more feasible to
inject something into VIEW before it is optimized/executed.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] New (by Psergey): Push conditions down into non-mergeable VIEWs when possible (119)
by worklog-noreply@askmonty.org 24 May '10
by worklog-noreply@askmonty.org 24 May '10
24 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Push conditions down into non-mergeable VIEWs when possible
CREATION DATE..: Mon, 24 May 2010, 20:52
SUPERVISOR.....: Igor
IMPLEMENTOR....:
COPIES TO......: Psergey, Timour
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 119 (http://askmonty.org/worklog/?tid=119)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
DESCRIPTION:
There are complaints (see links below) about cases with non-mergeable
VIEW (because it has a GROUP BY), a query that has restrictions on
the grouped column, and poor performance that is caused by VIEW
processing code ignoring the restriction.
This WL is about addressing this issue.
links to complaints:
http://code.openark.org/blog/mysql/views-better-performance-with-condition-…
http://www.mysqlperformanceblog.com/2010/05/19/a-workaround-for-the-perform…
The target version is MariaDB 5.3+, because it has late optimization/execution
for FROM-subqueries/non mergeable VIEWs, which makes it much more feasible to
inject something into VIEW before it is optimized/executed.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Rev 2789: Subquery cache (as is) in file:///home/bell/maria/bzr/work-maria-5.3-scache/
by sanja@askmonty.org 24 May '10
by sanja@askmonty.org 24 May '10
24 May '10
At file:///home/bell/maria/bzr/work-maria-5.3-scache/
------------------------------------------------------------
revno: 2789
revision-id: sanja(a)askmonty.org-20100524172956-7b14x01aodizr3sq
parent: sergii(a)pisem.net-20100510134608-oyi2vznyghgcrt0x
committer: sanja(a)askmonty.org
branch nick: work-maria-5.3-scache
timestamp: Mon 2010-05-24 20:29:56 +0300
message:
Subquery cache (as is)
=== modified file 'libmysqld/Makefile.am'
--- a/libmysqld/Makefile.am 2010-03-20 12:01:47 +0000
+++ b/libmysqld/Makefile.am 2010-05-24 17:29:56 +0000
@@ -80,7 +80,8 @@
sql_tablespace.cc \
rpl_injector.cc my_user.c partition_info.cc \
sql_servers.cc event_parse_data.cc opt_table_elimination.cc \
- multi_range_read.cc opt_index_cond_pushdown.cc
+ multi_range_read.cc opt_index_cond_pushdown.cc \
+ sql_subquery_cache.cc
libmysqld_int_a_SOURCES= $(libmysqld_sources)
nodist_libmysqld_int_a_SOURCES= $(libmysqlsources) $(sqlsources)
=== modified file 'sql/CMakeLists.txt'
--- a/sql/CMakeLists.txt 2010-03-20 12:01:47 +0000
+++ b/sql/CMakeLists.txt 2010-05-24 17:29:56 +0000
@@ -78,7 +78,7 @@
rpl_rli.cc rpl_mi.cc sql_servers.cc
sql_connect.cc scheduler.cc
sql_profile.cc event_parse_data.cc opt_table_elimination.cc
- ds_mrr.cc
+ ds_mrr.cc sql_subquery_cache.cc
${PROJECT_SOURCE_DIR}/sql/sql_yacc.cc
${PROJECT_SOURCE_DIR}/sql/sql_yacc.h
${PROJECT_SOURCE_DIR}/include/mysqld_error.h
=== modified file 'sql/Makefile.am'
--- a/sql/Makefile.am 2010-03-20 12:01:47 +0000
+++ b/sql/Makefile.am 2010-05-24 17:29:56 +0000
@@ -80,7 +80,7 @@
event_data_objects.h event_scheduler.h \
sql_partition.h partition_info.h partition_element.h \
contributors.h sql_servers.h \
- multi_range_read.h
+ multi_range_read.h sql_subquery_cache.h
mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \
item.cc item_sum.cc item_buff.cc item_func.cc \
@@ -130,7 +130,7 @@
sql_servers.cc event_parse_data.cc \
opt_table_elimination.cc \
multi_range_read.cc \
- opt_index_cond_pushdown.cc
+ opt_index_cond_pushdown.cc sql_subquery_cache.cc
nodist_mysqld_SOURCES = mini_client_errors.c pack.c client.c my_time.c my_user.c
=== modified file 'sql/item.cc'
--- a/sql/item.cc 2010-03-20 12:01:47 +0000
+++ b/sql/item.cc 2010-05-24 17:29:56 +0000
@@ -2273,6 +2273,13 @@
str->append(str_value);
}
+void Item_bool_cache::print(String *str, enum_query_type query_type)
+{
+ if (null_value)
+ str->append("NULL", 4);
+ else
+ Item_int::print(str, query_type);
+}
Item_uint::Item_uint(const char *str_arg, uint length):
Item_int(str_arg, length)
@@ -3646,12 +3653,17 @@
resolved_item->db_name : "");
const char *table_name= (resolved_item->table_name ?
resolved_item->table_name : "");
+ DBUG_ENTER("mark_as_dependent");
+ DBUG_PRINT("enter", ("Field '%s.%s.%s in select %d resolved in %d",
+ db_name, table_name,
+ resolved_item->field_name, current->select_number,
+ last->select_number));
/* store pointer on SELECT_LEX from which item is dependent */
if (mark_item)
mark_item->depended_from= last;
if (current->mark_as_dependent(thd, last, /** resolved_item psergey-thu
**/mark_item))
- return TRUE;
+ DBUG_RETURN(TRUE);
if (thd->lex->describe & DESCRIBE_EXTENDED)
{
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
@@ -3661,7 +3673,7 @@
resolved_item->field_name,
current->select_number, last->select_number);
}
- return FALSE;
+ DBUG_RETURN(FALSE);
}
@@ -3698,6 +3710,7 @@
resolving)
*/
SELECT_LEX *previous_select= current_sel;
+
for (; previous_select->outer_select() != last_select;
previous_select= previous_select->outer_select())
{
@@ -3726,6 +3739,7 @@
mark_as_dependent(thd, last_select, current_sel, resolved_item,
dependent);
}
+ return;
}
@@ -4098,6 +4112,9 @@
((ref_type == REF_ITEM ||
ref_type == FIELD_ITEM) ?
(Item_ident*) (*reference) : 0));
+ context->select_lex->
+ register_dependency_item(last_checked_context->select_lex,
+ reference);
return 0;
}
}
@@ -4113,7 +4130,9 @@
((ref_type == REF_ITEM || ref_type == FIELD_ITEM) ?
(Item_ident*) (*reference) :
0));
-
+ context->select_lex->
+ register_dependency_item(last_checked_context->select_lex,
+ reference);
/*
A reference to a view field had been found and we
substituted it instead of this Item (find_field_in_tables
@@ -4215,6 +4234,10 @@
mark_as_dependent(thd, last_checked_context->select_lex,
context->select_lex, rf,
rf);
+ context->select_lex->
+ register_dependency_item(last_checked_context->select_lex,
+ reference);
+
return 0;
}
else
@@ -4222,6 +4245,9 @@
mark_as_dependent(thd, last_checked_context->select_lex,
context->select_lex,
this, (Item_ident*)*reference);
+ context->select_lex->
+ register_dependency_item(last_checked_context->select_lex,
+ reference);
if (last_checked_context->select_lex->having_fix_field)
{
Item_ref *rf;
@@ -5973,6 +5999,9 @@
refer_type == FIELD_ITEM) ?
(Item_ident*) (*reference) :
0));
+ context->select_lex->
+ register_dependency_item(last_checked_context->select_lex,
+ reference);
/*
view reference found, we substituted it instead of this
Item, so can quit
@@ -6023,6 +6052,9 @@
thd->change_item_tree(reference, fld);
mark_as_dependent(thd, last_checked_context->select_lex,
thd->lex->current_select, fld, fld);
+ context->select_lex->
+ register_dependency_item(last_checked_context->select_lex,
+ reference);
/*
A reference is resolved to a nest level that's outer or the same as
the nest level of the enclosing set function : adjust the value of
@@ -6046,6 +6078,9 @@
DBUG_ASSERT(*ref && (*ref)->fixed);
mark_as_dependent(thd, last_checked_context->select_lex,
context->select_lex, this, this);
+ context->select_lex->
+ register_dependency_item(last_checked_context->select_lex,
+ ref);
/*
A reference is resolved to a nest level that's outer or the same as
the nest level of the enclosing set function : adjust the value of
=== modified file 'sql/item.h'
--- a/sql/item.h 2010-03-20 12:01:47 +0000
+++ b/sql/item.h 2010-05-24 17:29:56 +0000
@@ -1143,6 +1143,11 @@
{ return Field::GEOM_GEOMETRY; };
String *check_well_formed_result(String *str, bool send_error= 0);
bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs);
+
+ /**
+ Used to get reference on real item (not Item_ref)
+ */
+ virtual Item **unref(Item **my_ref) { return my_ref; };
};
@@ -1922,8 +1927,31 @@
virtual void print(String *str, enum_query_type query_type);
Item_num *neg ();
uint decimal_precision() const { return max_length; }
- bool check_partition_func_processor(uchar *bool_arg) { return FALSE;}
- bool check_vcol_func_processor(uchar *arg) { return FALSE;}
+};
+
+
+/**
+ Item represent TRUE/FALSE/NULL for subquery values
+*/
+
+class Item_bool_cache: public Item_int
+{
+public:
+ Item_bool_cache(): Item_int(0, 1)
+ {
+ unsigned_flag= maybe_null= null_value= TRUE;
+ name= (char *)"bool chache";
+ }
+ Item_bool_cache(my_bool val, my_bool null): Item_int(val, 1)
+ {
+ unsigned_flag= maybe_null= TRUE;
+ null_value= null;
+ name= (char *)"bool chache";
+ }
+ Item *clone_item() { return new Item_bool_cache(value, null_value); }
+ uint decimal_precision() const { return 1; }
+ virtual void print(String *str, enum_query_type query_type);
+ void set(my_bool val, my_bool null) {value= test(val); null_value= null;}
};
@@ -2479,6 +2507,11 @@
{
return trace_unsupported_by_check_vcol_func_processor("ref");
}
+
+ /**
+ Used to get reference on real item (not Item_ref)
+ */
+ virtual Item **unref(Item **my_ref) {return (*ref)->unref(ref); };
};
@@ -3146,7 +3179,8 @@
example(0), used_table_map(0), cached_field(0), cached_field_type(MYSQL_TYPE_STRING),
value_cached(0)
{
- fixed= 1;
+ fixed= 1;
+ maybe_null= 1;
null_value= 1;
}
Item_cache(enum_field_types field_type_arg):
@@ -3154,6 +3188,7 @@
value_cached(0)
{
fixed= 1;
+ maybe_null= 1;
null_value= 1;
}
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc 2010-03-20 12:01:47 +0000
+++ b/sql/item_cmpfunc.cc 2010-05-24 17:29:56 +0000
@@ -1736,6 +1736,13 @@
used_tables_cache|= args[1]->used_tables();
not_null_tables_cache|= args[1]->not_null_tables();
const_item_cache&= args[1]->const_item();
+ DBUG_ASSERT(scache == NULL);
+ if (args[0]->cols() ==1 &&
+ thd->variables.optimizer_switch & OPTIMIZER_SWITCH_SUBQUERY_CACHE)
+ {
+ sub->depends_on.push_front((Item**)&cache);
+ scache= new Subquery_cache_tmptable(thd, sub->depends_on, &result);
+ }
fixed= 1;
return FALSE;
}
@@ -1744,10 +1751,26 @@
longlong Item_in_optimizer::val_int()
{
bool tmp;
+ DBUG_ENTER("Item_in_optimizer::val_int");
+
DBUG_ASSERT(fixed == 1);
cache->store(args[0]);
cache->cache_value();
-
+
+ /* check if result is in the cache */
+ if (scache)
+ {
+ Subquery_cache_tmptable::result res;
+ Item *cached_value;
+ res= scache->check_value(&cached_value);
+ if (res == Subquery_cache_tmptable::HIT)
+ {
+ tmp= cached_value->val_int();
+ null_value= cached_value->null_value;
+ DBUG_RETURN(tmp);
+ }
+ }
+
if (cache->null_value)
{
/*
@@ -1818,11 +1841,18 @@
for (uint i= 0; i < ncols; i++)
item_subs->set_cond_guard_var(i, TRUE);
}
- return 0;
+ DBUG_RETURN(0);
}
tmp= args[1]->val_bool_result();
null_value= args[1]->null_value;
- return tmp;
+
+ /* put result in the cache */
+ if (scache)
+ {
+ result.set(tmp, null_value);
+ scache->put_value(&result);
+ }
+ DBUG_RETURN(tmp);
}
@@ -1839,6 +1869,11 @@
Item_bool_func::cleanup();
if (!save_cache)
cache= 0;
+ if (scache)
+ {
+ delete scache;
+ scache= 0;
+ }
DBUG_VOID_RETURN;
}
=== modified file 'sql/item_cmpfunc.h'
--- a/sql/item_cmpfunc.h 2010-03-20 12:01:47 +0000
+++ b/sql/item_cmpfunc.h 2010-05-24 17:29:56 +0000
@@ -215,6 +215,7 @@
class Item_cache;
+class Subquery_cache;
#define UNKNOWN ((my_bool)-1)
@@ -237,6 +238,10 @@
{
protected:
Item_cache *cache;
+ /* Subquery cache */
+ Subquery_cache *scache;
+ /* result representation for the subquery cache */
+ Item_bool_cache result;
bool save_cache;
/*
Stores the value of "NULL IN (SELECT ...)" for uncorrelated subqueries:
@@ -247,7 +252,7 @@
my_bool result_for_null_param;
public:
Item_in_optimizer(Item *a, Item_in_subselect *b):
- Item_bool_func(a, my_reinterpret_cast(Item *)(b)), cache(0),
+ Item_bool_func(a, my_reinterpret_cast(Item *)(b)), cache(0), scache(NULL),
save_cache(0), result_for_null_param(UNKNOWN)
{}
bool fix_fields(THD *, Item **);
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-03-29 14:04:35 +0000
+++ b/sql/item_subselect.cc 2010-05-24 17:29:56 +0000
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000 MySQL AB
+/* Copyrigh (C) 2000 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -34,11 +34,10 @@
Item_subselect::Item_subselect():
Item_result_field(), value_assigned(0), thd(0), substitution(0),
- engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0),
- const_item_cache(1),
- inside_first_fix_fields(0), done_first_fix_fields(FALSE),
- eliminated(FALSE),
- engine_changed(0), changed(0), is_correlated(FALSE)
+ engine(0), old_engine(0), scache(0), used_tables_cache(0),
+ have_to_be_excluded(0), const_item_cache(1), inside_first_fix_fields(0),
+ done_first_fix_fields(FALSE), eliminated(FALSE), engine_changed(0),
+ changed(0), is_correlated(FALSE)
{
with_subselect= 1;
reset();
@@ -116,6 +115,12 @@
}
if (engine)
engine->cleanup();
+ depends_on.empty();
+ if (scache)
+ {
+ delete scache;
+ scache= 0;
+ }
reset();
value_assigned= 0;
DBUG_VOID_RETURN;
@@ -148,6 +153,8 @@
Item_subselect::~Item_subselect()
{
delete engine;
+ if (scache)
+ delete scache;
}
Item_subselect::trans_res
@@ -746,9 +753,19 @@
void Item_singlerow_subselect::fix_length_and_dec()
{
+ DBUG_ENTER("Item_singlerow_subselect::fix_length_and_dec");
if ((max_columns= engine->cols()) == 1)
{
+ DBUG_PRINT("info", ("one, elements: %u flag %u",
+ (uint)depends_on.elements,
+ (uint)test(thd->variables.optimizer_switch & OPTIMIZER_SWITCH_SUBQUERY_CACHE)));
engine->fix_length_and_dec(row= &value);
+ if (depends_on.elements && optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE))
+ {
+ DBUG_ASSERT(scache == NULL);
+ scache= new Subquery_cache_tmptable(thd, depends_on, value);
+ DBUG_PRINT("info", ("cache: 0x%lx", (ulong) scache));
+ }
}
else
{
@@ -765,6 +782,7 @@
*/
if (engine->no_tables())
maybe_null= engine->may_be_null();
+ DBUG_VOID_RETURN;
}
uint Item_singlerow_subselect::cols()
@@ -797,77 +815,200 @@
exec();
}
+
+Item *Item_subselect::check_cache()
+{
+ DBUG_ENTER("Item_subselect::check_cache");
+ if (scache)
+ {
+ Subquery_cache_tmptable::result res;
+ Item *cached_value;
+ res= scache->check_value(&cached_value);
+ if (res == Subquery_cache_tmptable::HIT)
+ DBUG_RETURN(cached_value);
+ }
+ DBUG_RETURN(NULL);
+}
+
double Item_singlerow_subselect::val_real()
{
+ Item *cached_value;
+ bool err;
+ DBUG_ENTER("Item_singlerow_subselect::val_real");
DBUG_ASSERT(fixed == 1);
- if (!exec() && !value->null_value)
+
+ if ((cached_value = check_cache()))
+ {
+ double res= cached_value->val_real();
+ if ((null_value= cached_value->null_value))
+ {
+ reset();
+ DBUG_RETURN(0);
+ }
+ else
+ DBUG_RETURN(res);
+ }
+
+ if (!(err= exec()) && !value->null_value)
{
null_value= 0;
- return value->val_real();
+ if (scache)
+ scache->put_value(value);
+ DBUG_RETURN(value->val_real());
}
else
{
reset();
- return 0;
+ DBUG_PRINT("info", ("error: %u", (uint)err));
+ if (scache && !err)
+ scache->put_value(&const_null_value);
+ DBUG_RETURN(0);
}
}
longlong Item_singlerow_subselect::val_int()
{
+ Item *cached_value;
+ bool err;
+ DBUG_ENTER("Item_singlerow_subselect::val_int");
DBUG_ASSERT(fixed == 1);
- if (!exec() && !value->null_value)
+
+ if ((cached_value = check_cache()))
+ {
+ longlong res= cached_value->val_int();
+ if ((null_value= cached_value->null_value))
+ {
+ reset();
+ DBUG_RETURN(0);
+ }
+ else
+ DBUG_RETURN(res);
+ }
+
+ if (!(err= exec()) && !value->null_value)
{
null_value= 0;
- return value->val_int();
+ if (scache)
+ scache->put_value(value);
+ DBUG_RETURN(value->val_int());
}
else
{
reset();
- return 0;
+ DBUG_PRINT("info", ("error: %u", (uint)err));
+ if (scache && !err)
+ scache->put_value(&const_null_value);
+ DBUG_RETURN(0);
}
}
String *Item_singlerow_subselect::val_str(String *str)
{
- if (!exec() && !value->null_value)
+ Item *cached_value;
+ bool err;
+ DBUG_ENTER("Item_singlerow_subselect::val_str");
+ DBUG_ASSERT(fixed == 1);
+
+ if ((cached_value = check_cache()))
+ {
+ String *res= cached_value->val_str(str);
+ if ((null_value= cached_value->null_value))
+ {
+ reset();
+ DBUG_RETURN(0);
+ }
+ else
+ DBUG_RETURN(res);
+ }
+
+ if (!(err= exec()) && !value->null_value)
{
null_value= 0;
- return value->val_str(str);
+ if (scache)
+ scache->put_value(value);
+ DBUG_RETURN(value->val_str(str));
}
else
{
reset();
- return 0;
+ DBUG_PRINT("info", ("error: %u", (uint)err));
+ if (scache && !err)
+ scache->put_value(&const_null_value);
+ DBUG_RETURN(0);
}
}
my_decimal *Item_singlerow_subselect::val_decimal(my_decimal *decimal_value)
{
- if (!exec() && !value->null_value)
+ Item *cached_value;
+ bool err;
+ DBUG_ENTER("Item_singlerow_subselect::val_decimal");
+ DBUG_ASSERT(fixed == 1);
+
+ if ((cached_value = check_cache()))
+ {
+ my_decimal *res= cached_value->val_decimal(decimal_value);
+ if ((null_value= cached_value->null_value))
+ {
+ reset();
+ DBUG_RETURN(0);
+ }
+ else
+ DBUG_RETURN(res);
+ }
+
+ if (!(err= exec()) && !value->null_value)
{
null_value= 0;
- return value->val_decimal(decimal_value);
+ if (scache)
+ scache->put_value(value);
+ DBUG_RETURN(value->val_decimal(decimal_value));
}
else
{
reset();
- return 0;
+ DBUG_PRINT("info", ("error: %u", (uint)err));
+ if (scache && !err)
+ scache->put_value(&const_null_value);
+ DBUG_RETURN(0);
}
}
bool Item_singlerow_subselect::val_bool()
{
- if (!exec() && !value->null_value)
+ Item *cached_value;
+ bool err;
+ DBUG_ENTER("Item_singlerow_subselect::val_bool");
+ DBUG_ASSERT(fixed == 1);
+
+ if ((cached_value = check_cache()))
+ {
+ bool res= cached_value->val_bool();
+ if ((null_value= cached_value->null_value))
+ {
+ reset();
+ DBUG_RETURN(0);
+ }
+ else
+ DBUG_RETURN(res);
+ }
+
+ if (!(err= exec()) && !value->null_value)
{
null_value= 0;
- return value->val_bool();
+ if (scache)
+ scache->put_value(value);
+ DBUG_RETURN(value->val_bool());
}
else
{
reset();
- return 0;
+ DBUG_PRINT("info", ("error: %u", (uint)err));
+ if (scache && !err)
+ scache->put_value(&const_null_value);
+ DBUG_RETURN(0);
}
}
@@ -952,33 +1093,77 @@
void Item_exists_subselect::fix_length_and_dec()
{
+ DBUG_ENTER("Item_exists_subselect::fix_length_and_dec");
decimals= 0;
max_length= 1;
max_columns= engine->cols();
/* We need only 1 row to determine existence */
unit->global_parameters->select_limit= new Item_int((int32) 1);
+ if (substype() == EXISTS_SUBS && depends_on.elements &&
+ optimizer_flag(thd, OPTIMIZER_SWITCH_SUBQUERY_CACHE))
+ {
+ DBUG_ASSERT(scache == NULL);
+ scache= new Subquery_cache_tmptable(thd, depends_on, &result);
+ DBUG_PRINT("info", ("cache: 0x%lx", (ulong) scache));
+ }
+ DBUG_VOID_RETURN;
}
double Item_exists_subselect::val_real()
{
+ Item *cached_value;
+ DBUG_ENTER("Item_exists_subselect::val_int");
DBUG_ASSERT(fixed == 1);
+
+ if ((cached_value = check_cache()))
+ {
+ double res= cached_value->val_real();
+ DBUG_ASSERT(!cached_value->null_value);
+ DBUG_RETURN(res);
+ }
+
if (exec())
{
reset();
- return 0;
- }
- return (double) value;
+ DBUG_RETURN(0);
+ }
+
+ if (scache)
+ {
+ result.set(value, FALSE);
+ scache->put_value(&result);
+ }
+
+ DBUG_RETURN((double) value);
}
longlong Item_exists_subselect::val_int()
{
+ Item *cached_value;
+ DBUG_ENTER("Item_exists_subselect::val_real");
+ DBUG_ASSERT(fixed == 1);
+
+ if ((cached_value = check_cache()))
+ {
+ longlong res= cached_value->val_int();
+ DBUG_ASSERT(!cached_value->null_value);
+ DBUG_RETURN(res);
+ }
+
DBUG_ASSERT(fixed == 1);
if (exec())
{
reset();
- return 0;
- }
- return value;
+ DBUG_RETURN(0);
+ }
+
+ if (scache)
+ {
+ result.set(value, FALSE);
+ scache->put_value(&result);
+ }
+
+ DBUG_RETURN(value);
}
@@ -997,11 +1182,31 @@
String *Item_exists_subselect::val_str(String *str)
{
+ Item *cached_value;
+ DBUG_ENTER("Item_exists_subselect::val_str");
DBUG_ASSERT(fixed == 1);
+
+ if ((cached_value = check_cache()))
+ {
+ String *res= cached_value->val_str(str);
+ DBUG_ASSERT(!cached_value->null_value);
+ DBUG_RETURN(res);
+ }
+
if (exec())
+ {
reset();
+ DBUG_RETURN(NULL);
+ }
+
+ if (scache)
+ {
+ result.set(value, FALSE);
+ scache->put_value(&result);
+ }
+
str->set((ulonglong)value,&my_charset_bin);
- return str;
+ DBUG_RETURN(str);
}
@@ -1020,23 +1225,60 @@
my_decimal *Item_exists_subselect::val_decimal(my_decimal *decimal_value)
{
+ Item *cached_value;
+ DBUG_ENTER("Item_exists_subselect::val_decvimal");
DBUG_ASSERT(fixed == 1);
+
+ if ((cached_value = check_cache()))
+ {
+ my_decimal *res= cached_value->val_decimal(decimal_value);
+ DBUG_ASSERT(!cached_value->null_value);
+ DBUG_RETURN(res);
+ }
+
if (exec())
+ {
reset();
+ DBUG_RETURN(0);
+ }
+
+ if (scache)
+ {
+ result.set(value, FALSE);
+ scache->put_value(&result);
+ }
+
int2my_decimal(E_DEC_FATAL_ERROR, value, 0, decimal_value);
- return decimal_value;
+ DBUG_RETURN(decimal_value);
}
bool Item_exists_subselect::val_bool()
{
+ Item *cached_value;
+ DBUG_ENTER("Item_exists_subselect::val_real");
DBUG_ASSERT(fixed == 1);
+
+ if ((cached_value = check_cache()))
+ {
+ my_bool res= cached_value->val_bool();
+ DBUG_ASSERT(!cached_value->null_value);
+ DBUG_RETURN(res);
+ }
+
if (exec())
{
reset();
- return 0;
- }
- return value != 0;
+ DBUG_RETURN(0);
+ }
+
+ if (scache)
+ {
+ result.set(value, FALSE);
+ scache->put_value(&result);
+ }
+
+ DBUG_RETURN(value != 0);
}
=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h 2010-03-29 14:04:35 +0000
+++ b/sql/item_subselect.h 2010-05-24 17:29:56 +0000
@@ -27,6 +27,7 @@
class subselect_hash_sj_engine;
class Item_bool_func2;
class Cached_item;
+class Subquery_cache;
/* base class for subselects */
@@ -57,6 +58,10 @@
subselect_engine *engine;
/* old engine if engine was changed */
subselect_engine *old_engine;
+ /* subquery cache */
+ Subquery_cache *scache;
+ /* null consrtant for caching */
+ Item_null const_null_value;
/* cache of used external tables */
table_map used_tables_cache;
/* allowed number of columns (1 for single value subqueries) */
@@ -67,7 +72,7 @@
bool have_to_be_excluded;
/* cache of constant state */
bool const_item_cache;
-
+
bool inside_first_fix_fields;
bool done_first_fix_fields;
public:
@@ -88,13 +93,18 @@
*/
List<Ref_to_outside> upper_refs;
st_select_lex *parent_select;
-
- /*
+
+ /**
+ List of items subquery depends on (externally resolved);
+ */
+ List<Item*> depends_on;
+
+ /*
TRUE<=>Table Elimination has made it redundant to evaluate this select
(and so it is not part of QEP, etc)
- */
+ */
bool eliminated;
-
+
/* changed engine indicator */
bool engine_changed;
/* subquery is transformed */
@@ -178,6 +188,8 @@
return trace_unsupported_by_check_vcol_func_processor("subselect");
}
+ Item *check_cache();
+
/**
Get the SELECT_LEX structure associated with this Item.
@return the SELECT_LEX structure associated with this Item
@@ -202,6 +214,7 @@
{
protected:
Item_cache *value, **row;
+
public:
Item_singlerow_subselect(st_select_lex *select_lex);
Item_singlerow_subselect() :Item_subselect(), value(0), row (0) {}
@@ -268,6 +281,8 @@
{
protected:
bool value; /* value of this item (boolean: exists/not-exists) */
+ /* result representation for the subquery cache */
+ Item_bool_cache result;
public:
Item_exists_subselect(st_select_lex *select_lex);
=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h 2010-03-20 12:01:47 +0000
+++ b/sql/mysql_priv.h 2010-05-24 17:29:56 +0000
@@ -568,12 +568,13 @@
#define OPTIMIZER_SWITCH_SEMIJOIN 256
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
+#define OPTIMIZER_SWITCH_SUBQUERY_CACHE (1<<11)
#ifdef DBUG_OFF
-# define OPTIMIZER_SWITCH_LAST 2048
+# define OPTIMIZER_SWITCH_LAST (1<<12)
#else
-# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 2048
-# define OPTIMIZER_SWITCH_LAST 4096
+# define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1<<12)
+# define OPTIMIZER_SWITCH_LAST (1<<13)
#endif
#ifdef DBUG_OFF
@@ -588,7 +589,8 @@
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
- OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)
+ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
+ OPTIMIZER_SWITCH_SUBQUERY_CACHE)
#else
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
@@ -601,7 +603,8 @@
OPTIMIZER_SWITCH_MATERIALIZATION | \
OPTIMIZER_SWITCH_SEMIJOIN | \
OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
- OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)
+ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN|\
+ OPTIMIZER_SWITCH_SUBQUERY_CACHE)
#endif
/*
@@ -936,6 +939,7 @@
#ifdef MYSQL_SERVER
#include "sql_servers.h"
#include "opt_range.h"
+#include "sql_subquery_cache.h"
#ifdef HAVE_QUERY_CACHE
struct Query_cache_query_flags
@@ -1269,6 +1273,10 @@
Item *having, ORDER *proc_param, ulonglong select_type,
select_result *result, SELECT_LEX_UNIT *unit,
SELECT_LEX *select_lex);
+
+struct st_join_table *create_index_lookup_join_tab(TABLE *table);
+int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table,
+ struct st_table_ref *table_ref);
void free_underlaid_joins(THD *thd, SELECT_LEX *select);
bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit,
select_result *result);
@@ -1288,6 +1296,7 @@
bool table_cant_handle_bit_fields,
bool make_copy_field,
uint convert_blob_length);
+bool open_tmp_table(TABLE *table);
void sp_prepare_create_field(THD *thd, Create_field *sql_field);
int prepare_create_field(Create_field *sql_field,
uint *blob_columns,
=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc 2010-03-20 12:01:47 +0000
+++ b/sql/mysqld.cc 2010-05-24 17:29:56 +0000
@@ -305,6 +305,7 @@
"firstmatch","loosescan","materialization", "semijoin",
"partial_match_rowid_merge",
"partial_match_table_scan",
+ "subquery_cache",
#ifndef DBUG_OFF
"table_elimination",
#endif
@@ -325,6 +326,7 @@
sizeof("semijoin") - 1,
sizeof("partial_match_rowid_merge") - 1,
sizeof("partial_match_table_scan") - 1,
+ sizeof("subquery_cache") - 1,
#ifndef DBUG_OFF
sizeof("table_elimination") - 1,
#endif
@@ -404,8 +406,9 @@
static const char *optimizer_switch_str="index_merge=on,index_merge_union=on,"
"index_merge_sort_union=on,"
"index_merge_intersection=on,"
- "index_condition_pushdown=on"
-#ifndef DBUG_OFF
+ "index_condition_pushdown=on,"
+ "subquery_cache=on"
+#ifndef DBUG_OFF
",table_elimination=on";
#else
;
@@ -5872,7 +5875,9 @@
OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT,
OPT_RELAY_LOG_PURGE,
OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
- OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_ROWID_MERGE_BUFF_SIZE,
+ OPT_SLAVE_TRANS_RETRIES,
+ OPT_SUBQUERY_CACHE,
+ OPT_READONLY, OPT_ROWID_MERGE_BUFF_SIZE,
OPT_DEBUGGING, OPT_DEBUG_FLUSH,
OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE,
@@ -7164,7 +7169,7 @@
{"optimizer_switch", OPT_OPTIMIZER_SWITCH,
"optimizer_switch=option=val[,option=val...], where option={index_merge, "
"index_merge_union, index_merge_sort_union, index_merge_intersection, "
- "index_condition_pushdown"
+ "index_condition_pushdown, subquery_cache"
#ifndef DBUG_OFF
", table_elimination"
#endif
@@ -7868,6 +7873,8 @@
{"Ssl_version", (char*) &show_ssl_get_version, SHOW_FUNC},
#endif /* HAVE_OPENSSL */
{"Syncs", (char*) &my_sync_count, SHOW_LONG_NOFLUSH},
+ {"Subquery_cache_hit", (char*) &subquery_cache_hit, SHOW_LONG},
+ {"Subquery_cache_miss", (char*) &subquery_cache_miss, SHOW_LONG},
{"Table_locks_immediate", (char*) &locks_immediate, SHOW_LONG},
{"Table_locks_waited", (char*) &locks_waited, SHOW_LONG},
#ifdef HAVE_MMAP
@@ -8006,6 +8013,7 @@
abort_loop= select_thread_in_use= signal_thread_in_use= 0;
ready_to_exit= shutdown_in_progress= grant_option= 0;
aborted_threads= aborted_connects= 0;
+ subquery_cache_miss= subquery_cache_hit= 0;
delayed_insert_threads= delayed_insert_writes= delayed_rows_in_use= 0;
delayed_insert_errors= thread_created= 0;
specialflag= 0;
=== modified file 'sql/sql_base.cc'
--- a/sql/sql_base.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_base.cc 2010-05-24 17:29:56 +0000
@@ -8062,6 +8062,10 @@
if (*conds)
{
thd->where="where clause";
+ DBUG_EXECUTE("where",
+ print_where(*conds,
+ "WHERE in setup_conds",
+ QT_ORDINARY););
if ((!(*conds)->fixed && (*conds)->fix_fields(thd, conds)) ||
(*conds)->check_cols(1))
goto err_no_arena;
=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_class.cc 2010-05-24 17:29:56 +0000
@@ -3020,6 +3020,7 @@
table_charset= 0;
precomputed_group_by= 0;
bit_fields_as_long= 0;
+ skip_create_table= 0;
DBUG_VOID_RETURN;
}
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2010-03-20 12:01:47 +0000
+++ b/sql/sql_class.h 2010-05-24 17:29:56 +0000
@@ -2786,12 +2786,17 @@
that MEMORY tables cannot index BIT columns.
*/
bool bit_fields_as_long;
+ /*
+ Whether to create or postpone actual creation of this temporary table.
+ TRUE <=> create_tmp_table will create only the TABLE structure.
+ */
+ bool skip_create_table;
TMP_TABLE_PARAM()
:copy_field(0), group_parts(0),
group_length(0), group_null_parts(0), convert_blob_length(0),
schema_table(0), precomputed_group_by(0), force_copy_fields(0),
- bit_fields_as_long(0)
+ bit_fields_as_long(0), skip_create_table(0)
{}
~TMP_TABLE_PARAM()
{
=== modified file 'sql/sql_lex.cc'
--- a/sql/sql_lex.cc 2010-03-20 12:01:47 +0000
+++ b/sql/sql_lex.cc 2010-05-24 17:29:56 +0000
@@ -1829,6 +1829,53 @@
}
+/**
+ Registers reference on items on which the subqueries depends
+
+ @param last pointer to last st_select_lex struct, before
+ which all st_select_lex have to be marked as
+ dependent
+ @param dependency reference on the item on which all this
+ subqueries depends
+
+*/
+
+void st_select_lex::register_dependency_item(st_select_lex *last,
+ Item **dependency)
+{
+ SELECT_LEX *s= this;
+ DBUG_ENTER("st_select_lex::register_dependency_item");
+ DBUG_ASSERT(this != last);
+ DBUG_ASSERT(*dependency);
+ dependency= (*dependency)->unref(dependency);
+ do
+ {
+ /* check duplicates */
+ List_iterator_fast<Item*> li(s->master_unit()->item->depends_on);
+ Item **dep;
+ while ((dep= li++))
+ {
+ if ((*dep)->eq(*dependency, FALSE))
+ {
+ DBUG_PRINT("info", ("dependency %s already present",
+ ((*dependency)->name ?
+ (*dependency)->name :
+ "<no name>")));
+ DBUG_VOID_RETURN;
+ }
+ }
+
+ s->master_unit()->item->depends_on.push_back(dependency);
+ DBUG_PRINT("info", ("depends_on: Select: %d added: %s",
+ s->select_number,
+ ((*dependency)->name ?
+ (*dependency)->name :
+ "<no name>")));
+ } while ((s= s->outer_select()) != last && s != 0);
+ DBUG_VOID_RETURN;
+}
+
+
/*
st_select_lex_node::mark_as_dependent mark all st_select_lex struct from
this to 'last' as dependent
@@ -1843,7 +1890,7 @@
bool st_select_lex::mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency)
{
-
+ DBUG_ENTER("st_select_lex::mark_as_dependent");
DBUG_ASSERT(this != last);
/*
@@ -1872,11 +1919,11 @@
Item_subselect *subquery_expr= s->master_unit()->item;
if (subquery_expr && subquery_expr->mark_as_dependent(thd, last,
dependency))
- return TRUE;
+ DBUG_RETURN(TRUE);
} while ((s= s->outer_select()) != last && s != 0);
is_correlated= TRUE;
this->master_unit()->item->is_correlated= TRUE;
- return FALSE;
+ DBUG_RETURN(FALSE);
}
bool st_select_lex_node::set_braces(bool value) { return 1; }
=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h 2010-03-20 12:01:47 +0000
+++ b/sql/sql_lex.h 2010-05-24 17:29:56 +0000
@@ -748,6 +748,7 @@
}
bool mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency);
+ void register_dependency_item(st_select_lex *last, Item **dependency);
bool set_braces(bool value);
bool inc_in_sum_expr();
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-05-10 13:46:08 +0000
+++ b/sql/sql_select.cc 2010-05-24 17:29:56 +0000
@@ -151,7 +151,6 @@
static int join_read_system(JOIN_TAB *tab);
static int join_read_const(JOIN_TAB *tab);
static int join_read_key(JOIN_TAB *tab);
-static int join_read_key2(JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref);
static void join_read_key_unlock_row(st_join_table *tab);
static int join_read_always_key(JOIN_TAB *tab);
static int join_read_last_key(JOIN_TAB *tab);
@@ -5209,7 +5208,7 @@
'join->best_positions' contains a complete optimal extension of the
current partial QEP.
*/
- DBUG_EXECUTE("opt", print_plan(join, join->tables,
+ DBUG_EXECUTE("opt", print_plan(join, n_tables,
record_count, read_time, read_time,
"optimal"););
DBUG_RETURN(FALSE);
@@ -7625,6 +7624,40 @@
/**
+ Creates and fills JOIN_TAB for index look up in temporary table
+
+ @param table The table where to look up
+
+ @return JOIN_TAB object or NULL in case of error
+*/
+
+JOIN_TAB *create_index_lookup_join_tab(TABLE *table)
+{
+ JOIN_TAB *tab;
+ DBUG_ENTER("create_index_lookup_join_tab");
+
+ if (!((tab= new JOIN_TAB)))
+ DBUG_RETURN(NULL);
+ tab->read_record.table= table;
+ tab->read_record.file=table->file;
+ /*tab->read_record.unlock_row= rr_unlock_row;*/
+ tab->next_select=0;
+ tab->sorted= 1;
+
+ table->status= STATUS_NO_RECORD;
+ tab->read_first_record= join_read_key;
+ /*tab->read_record.unlock_row= join_read_key_unlock_row;*/
+ tab->read_record.read_record= join_no_more_records;
+ if (table->covering_keys.is_set(tab->ref.key) &&
+ !table->no_keyread)
+ {
+ table->key_read=1;
+ table->file->extra(HA_EXTRA_KEYREAD);
+ }
+ DBUG_RETURN(tab);
+}
+
+/**
Give error if we some tables are done with a full join.
This is used by multi_table_update and multi_table_delete when running
@@ -10778,6 +10811,7 @@
case Item::REF_ITEM:
case Item::NULL_ITEM:
case Item::VARBIN_ITEM:
+ case Item::CACHE_ITEM:
if (make_copy_field)
{
DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -11552,7 +11586,8 @@
¶m->recinfo, select_options))
goto err;
}
- if (open_tmp_table(table))
+ DBUG_PRINT("info", ("skip_create_table: %d", (int)param->skip_create_table));
+ if (!param->skip_create_table && open_tmp_table(table))
goto err;
thd->mem_root= mem_root_save;
@@ -11700,16 +11735,17 @@
bool open_tmp_table(TABLE *table)
{
int error;
+ DBUG_ENTER("open_tmp_table");
if ((error= table->file->ha_open(table, table->s->table_name.str, O_RDWR,
HA_OPEN_TMP_TABLE |
HA_OPEN_INTERNAL_TABLE)))
{
table->file->print_error(error,MYF(0)); /* purecov: inspected */
table->db_stat=0;
- return(1);
+ DBUG_RETURN(1);
}
(void) table->file->extra(HA_EXTRA_QUICK); /* Faster */
- return(0);
+ DBUG_RETURN(0);
}
@@ -12540,7 +12576,8 @@
else
{
/* Do index lookup in the materialized table */
- if ((res= join_read_key2(join_tab, sjm->table, sjm->tab_ref)) == 1)
+ if ((res= join_read_key2(join_tab->join->thd, join_tab,
+ sjm->table, sjm->tab_ref)) == 1)
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
if (res || !sjm->in_equality->val_int())
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
@@ -13323,61 +13360,61 @@
static int
join_read_key(JOIN_TAB *tab)
{
- return join_read_key2(tab, tab->table, &tab->ref);
+ return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref);
}
-/*
+/*
eq_ref access handler but generalized a bit to support TABLE and TABLE_REF
not from the join_tab. See join_read_key for detailed synopsis.
*/
-static int
-join_read_key2(JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
+int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
{
int error;
+ DBUG_ENTER("join_read_key2");
if (!table->file->inited)
{
table->file->ha_index_init(table_ref->key, tab->sorted);
}
/* TODO: Why don't we do "Late NULLs Filtering" here? */
- if (cmp_buffer_with_ref(tab->join->thd, table, table_ref) ||
+ if (cmp_buffer_with_ref(thd, table, table_ref) ||
(table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
{
if (table_ref->key_err)
{
table->status=STATUS_NOT_FOUND;
- return -1;
+ DBUG_RETURN(-1);
}
/*
Moving away from the current record. Unlock the row
in the handler if it did not match the partial WHERE.
*/
- if (tab->ref.has_record && tab->ref.use_count == 0)
+ if (table_ref->has_record && table_ref->use_count == 0)
{
tab->read_record.file->unlock_row();
- tab->ref.has_record= FALSE;
+ table_ref->has_record= FALSE;
}
error=table->file->ha_index_read_map(table->record[0],
table_ref->key_buff,
make_prev_keypart_map(table_ref->key_parts),
HA_READ_KEY_EXACT);
if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
- return report_error(table, error);
+ DBUG_RETURN(report_error(table, error));
if (! error)
{
- tab->ref.has_record= TRUE;
- tab->ref.use_count= 1;
+ table_ref->has_record= TRUE;
+ table_ref->use_count= 1;
}
}
else if (table->status == 0)
{
- DBUG_ASSERT(tab->ref.has_record);
- tab->ref.use_count++;
+ DBUG_ASSERT(table_ref->has_record);
+ table_ref->use_count++;
}
table->null_row=0;
- return table->status ? -1 : 0;
+ DBUG_RETURN(table->status ? -1 : 0);
}
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-03-20 12:01:47 +0000
+++ b/sql/table.cc 2010-05-24 17:29:56 +0000
@@ -20,6 +20,7 @@
#include "sql_trigger.h"
#include <m_ctype.h>
#include "my_md5.h"
+#include "my_bit.h"
/* INFORMATION_SCHEMA name */
LEX_STRING INFORMATION_SCHEMA_NAME= {C_STRING_WITH_LEN("information_schema")};
@@ -5096,6 +5097,115 @@
file->column_bitmaps_signal();
}
+
+/**
+ @brief
+ Allocate space for keys
+
+ @param key_count number of keys to allocate.
+
+ @details
+ Allocate space enough to fit 'key_count' keys for this table.
+
+ @return FALSE space was successfully allocated.
+ @return TRUE an error occur.
+*/
+
+bool TABLE::alloc_keys(uint key_count)
+{
+ DBUG_ASSERT(!s->keys);
+ key_info= s->key_info= (KEY*) my_malloc(sizeof(KEY)*key_count, MYF(0));
+ max_keys= key_count;
+ return !(key_info);
+}
+
+
+/**
+ @brief Adds one key to a temporary table.
+
+ @param key_parts bitmap of fields that take a part in the key.
+ @param key_name name of the key
+
+ @details
+ Creates a key for this table from fields which corresponds the bits set to 1
+ in the 'key_parts' bitmap. The 'key_name' name is given to the newly created
+ key.
+
+ @return <0 an error occur.
+ @return >=0 number of newly added key.
+*/
+
+int TABLE::add_tmp_key(ulonglong key_parts, const char *key_name)
+{
+ DBUG_ASSERT(s->keys< max_keys);
+
+ KEY* keyinfo;
+ Field **reg_field;
+ uint i;
+ bool key_start= TRUE;
+ uint key_part_count= my_count_bits(key_parts);
+ KEY_PART_INFO* key_part_info=
+ (KEY_PART_INFO*) my_malloc(sizeof(KEY_PART_INFO)* key_part_count, MYF(0));
+ if (!key_part_info)
+ return -1;
+ keyinfo= key_info + s->keys;
+ keyinfo->key_part=key_part_info;
+ keyinfo->usable_key_parts=keyinfo->key_parts= key_part_count;
+ keyinfo->key_length=0;
+ keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+ keyinfo->name= (char *)key_name;
+ keyinfo->flags= HA_GENERATED_KEY;
+ keyinfo->rec_per_key= (ulong*)my_malloc(sizeof(ulong)*key_part_count, MYF(0));
+ if (!keyinfo->rec_per_key)
+ return -1;
+ bzero(keyinfo->rec_per_key, sizeof(ulong)*key_part_count);
+ for (i= 0, reg_field=field ;
+ *reg_field;
+ i++, reg_field++)
+ {
+ if (!(key_parts & (1 << i)))
+ continue;
+ if (key_start)
+ (*reg_field)->key_start.set_bit(s->keys);
+ key_start= FALSE;
+ (*reg_field)->part_of_key.set_bit(s->keys);
+ (*reg_field)->flags|= PART_KEY_FLAG;
+ key_part_info->null_bit= (*reg_field)->null_bit;
+ key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
+ (uchar*) record[0]);
+ key_part_info->field= *reg_field;
+ key_part_info->offset= (*reg_field)->offset(record[0]);
+ key_part_info->length= (uint16) (*reg_field)->pack_length();
+ keyinfo->key_length+= key_part_info->length;
+ /* TODO:
+ The below method of computing the key format length of the
+ key part is a copy/paste from opt_range.cc, and table.cc.
+ This should be factored out, e.g. as a method of Field.
+ In addition it is not clear if any of the Field::*_length
+ methods is supposed to compute the same length. If so, it
+ might be reused.
+ */
+ key_part_info->store_length= key_part_info->length;
+
+ if ((*reg_field)->real_maybe_null())
+ key_part_info->store_length+= HA_KEY_NULL_LENGTH;
+ if ((*reg_field)->type() == MYSQL_TYPE_BLOB ||
+ (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR)
+ key_part_info->store_length+= HA_KEY_BLOB_LENGTH;
+
+ key_part_info->type= (uint8) (*reg_field)->key_type();
+ key_part_info->key_type =
+ ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT ||
+ (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
+ (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
+ 0 : FIELDFLAG_BINARY;
+ key_part_info++;
+ }
+ set_if_bigger(s->max_key_length, keyinfo->key_length);
+ return ++s->keys - 1;
+}
+
+
/**
@brief Check if this is part of a MERGE table with attached children.
=== modified file 'sql/table.h'
--- a/sql/table.h 2010-03-20 12:01:47 +0000
+++ b/sql/table.h 2010-05-24 17:29:56 +0000
@@ -781,6 +781,7 @@
uint temp_pool_slot; /* Used by intern temp tables */
uint status; /* What's in record[0] */
uint db_stat; /* mode of file as in handler.h */
+ uint max_keys; /* Size of allocated key_info array. */
/* number of select if it is derived table */
uint derived_select_number;
int current_lock; /* Type of lock on table */
@@ -914,6 +915,8 @@
inline bool needs_reopen_or_name_lock()
{ return s->version != refresh_version; }
bool is_children_attached(void);
+ bool alloc_keys(uint key_count);
+ int add_tmp_key(ulonglong key_parts, const char *key_name);
};
enum enum_schema_table_state
=== modified file 'storage/maria/ha_maria.cc'
--- a/storage/maria/ha_maria.cc 2010-03-20 12:01:47 +0000
+++ b/storage/maria/ha_maria.cc 2010-05-24 17:29:56 +0000
@@ -995,6 +995,8 @@
{
MARIA_HA *tmp= file;
file= 0;
+ if (!tmp)
+ return 0;
return maria_close(tmp);
}
1
0
Could anyone please explain data representation, both on disk and in
memory? Especially about BLOB fields
What was the reasons to move hot data from OS file cache to MySQL
memory? Is it great improvement of efficiency(to avoid kernel calls) or
necessity for future transaction support?
--
This message was sent from Launchpad by the user
Igor Kozyrev (https://launchpad.net/~ikozyrev36)
using the "Contact this team" link on the Maria developers
team page to each member directly.
For more information see
https://help.launchpad.net/YourAccount/ContactingPeople
1
0
Hello everyone,
I'm working on the documentation for the Segmented Key Cache, one of
the new features in MariaDB 5.2.
The documentation page will have "about", "syntax", and "example"
sections.
I've based the "About" section on the High-Level Description from
the worklog ( http://askmonty.org/worklog/Server-Sprint/?tid=85 ) and
from the description in the mysys/mf_keycache.c file in the source.
Here is my first draft. I've changed the wording a bit to help
it flow better and I've changed "partition" to "segment" in keeping with
the official name.
----------------------------------------------------------------------
== About Segmented Key Cache ==
A segmented key cache is a collection of structures for regular MyISAM
key caches called key cache segments. Segmented key caches mitigate one
of the major problem of the simple key cache: thread contention for key
cache lock (mutex). With regular key caches, every call of a key cache
interface function must acquire this lock. Threads compete for this
lock even when the file and pages they have acquired shared locks for
are in the key cache buffers.
When working with a segmented key cache any key cache interface
function that needs only one page has to acquire the key cache lock
only for the segment the page is assigned to. This makes the chances
for threads not having to compete for the same key cache lock better.
Any page from a file can be placed into a buffer of only one segment.
The number of the segment is calculated from the file number and the
position of the page in the file, and it's always the same for the
page. Pages are evenly distributed among segments.
The idea and the original code of the segmented key cache was provided
by Fredrik Nylander from Stardoll.com. The code was extensively
reworked, improved, and eventually merged into MariaDB by Igor Babaev
from Monty Program.
----------------------------------------------------------------------
If there are any factual errors with the above, let me know.
Improvements and suggestions are also welcome.
One thing I'm not happy with is this sentence:
"Threads compete for this lock even when the file and pages they
have acquired shared locks for are in the key cache buffers."
The original sentence from the worklog reads:
So threads compete for this lock even in the case when they have
acquired shared locks for the file and pages they want read from
are in the key cache buffers.
... which is very confusing to me. If anyone has a better or more
accurate rewording, please send it my way.
Also, there's a note in the source code (lines 5018-5023 of
mysys/mf_keycache.c) which states:
Unfortunately if we use a partitioned key cache with N partitions
for B-tree indexes we can't say that the chances becomes N times
less. The fact is that any index lookup operation requires reading
from the root page that, for any index, is always ascribed to the
same partition. To resolve this problem we should have employed
more sophisticated mechanisms of working with root pages.
Do any of you have any opinions on whether or not this should be
mentioned somewhere in the documentation? The only issue I have with
mentioning it is that if I do, the first question that comes to my mind
is: "If it is not N-times less, how much less is it?" I don't have an
answer to that question (and math isn't one of my strengths).
Next up will be the "Syntax" section.
Thanks.
--
Daniel Bartholomew
Monty Program - http://askmonty.org
3
6

Re: [Maria-developers] MySQL 5.1.46 diff of sql/sql_select.cc for review
by Kristian Nielsen 19 May '10
by Kristian Nielsen 19 May '10
19 May '10
Igor Babaev <igor(a)askmonty.org> writes:
> Here's my review.
Thanks a lot for your help! I checked through all your points (few detailed
comments below), all your proposed solutions seem reasonable to me.
In summary, we should as you suggest:
- Rework patches for bugs 39022, 48483, and 49324 as you described (1,7,8).
- Revert bug 45640 patch (5) and instead apply your proposed patch.
- Revert patches for bugs 51242 and 52336, and instead apply your proposed
fix (15,18).
- Reject (eg. revert in 5.1-release) patch for bug 39653 (2)
- Do nothing for bugs 40277, 45195, 45989, 49902, 50995, and 51494
(3,4,6,10,14,16), as necessary changes were already pushed to 5.1-release.
- Do nothing for bugs 49829, 50335, 50591, 50843, and 52177 (9,11,12,13,17),
as the patches for those are ok.
You proposed on the call yesterday that you could prepare a patch with these
changes for our merge tree:
lp:~maria-captains/maria/5.1-release
Please do so. I will try to get hold of Monty and discuss with him, but I
think he will agree with making your proposed changes.
If you need a full review for your changes, you will probably need to ask
someone else than me, as I am unfamiliar with this part of the code.
Thanks,
- Kristian.
> 1. Bug #39022 (by Georgi Kodinov)
> ----------------------------------
>
> SYNOPSIS
> Any call of SQL_SELECT::skip record ignore the fact that
> during evaluation of an expression (item) an error may occur.
>
> CONCLUSION
> The patch cannot be applied as it is, requires some rework.
>
> REASONS:
> The patch is incomplete:
> There are 5 calls of SQL_SELECT::skip_record in total:
> two - in sql_select.cc, and three others in
> filesort.cc, sql_delete.cc, sql_update.cc.
> Only the calls in sql_select.cc are handled in an error aware
> manner. The second call in sql_select is handled incorrectly:
> the error occurred at the evaluation of the select condition
> is caught only if the function returns false.
>
> POSSIBLE SOLUTION:
> change the synopsis of SQL_SELECT::skip_record:
> int SQL_SELECT::skip_record()
> {
> int rc= test(cond && !cond->val_int());
> if thd->is_error() rc=-1;
> return rc;
> }
>
> (thd must be added to SQL_SELECT)
>
> then after each call SQL_SELECT::retain_record
> add error handling for the cases when
> SQL_SELECT::skip_record returns -1:
> int rc= 0;
> if (!select || (rc= select->skip_record()) != 0)
> {
> if (rc < 0)
> {
> /* handle error returned by skip_record() */
> ...
> }
> ...
> }
Agree with your proposed solution.
> 2. Bug #39653 (by Gleb Shchepa)
>
> SYNOPSIS
> InnoDB covering primary index is used when using another
> covering index is more beneficial.
>
> CONCLUSION
> The patch must be rejected.
>
> REASONS:
> The patch is based on completely wrong idea that any
> covering secondary index is better than primary covering index
> for scanning in InnoDB.
>
> Here an example demonstrating that it's not so:
> CREATE TABLE t1 (
> a int, b int, c int,
> PRIMARY KEY (a, b),
> KEY idx (a,c)
> );
> Both primary key and the secondary key here are covering for the query
> SELECT a FROM t1 WHERE a BETWEEN 1000 and 5000;
> Apparently scanning by the primary key will be faster here as it does
> not use random seeks.
>
> The patch completely ignores non-InnoDB engines.
>
> POSSIBLE SOLUTION:
> Cost based choice that takes into account that sequential access is
> C times faster then random access.
I am ok with rejecting the patch.
It's hard to tell whether primary key or index will be better to use, in some
cases secondary index may require no random seeks if it is not fragmented
while primary key could be. In any case this seems a dangerous change in a
stable release (user can force index of choice if he/she has more information
about which will be best).
> 3. Bug #40277 (by Davi Arnaut)
> -------------------------------
> see the notes from Monty's review
Ok, Monty already fixed this in 5.1-release. Agree.
> 4. 45195 (by Sergey Glukhov)
> -----------------------------
> SYNOPSIS
> Reading uninitialized bytes from join cache buffer.
> (Valgrind's complain)
>
> CONCLUSION
> The patch should be accepted as it is
> (Monty has some a comment on this patch though).
Agree, Monty already pushed his fixes to 5.1-release.
> 5. Bug 45640 (by Gleb Shchepa)
> -------------------------------
> SYNOPSYS.
> Building Item_ref objects of a wrong type for outer references used
> in aliased expressions in a select with group by causes wrong
> results. Group by expressions containing references to aliases may
> cause wrong results.
>
> CONCLUSION.
> The patch cannot be accepted as it is, requires a serious re-work.
>
> REASON.
> Although the basic ideas behind the fix appear to be be valid their
> implementation is quite clumsy:
> -an unnecessary parameter is added to the function fix_inner_refs
> -the info about the syntax context of a field referenced is passed
> into Item_field::fix_fields in an unconventional and ugly manner
> -the group expression are traversed for each reference of the list
> of inner references
>
> POSSIBLE SOLUTION
> See the patch at the very end of the post.
Agree with proposed solution, using the patch at end.
> 6. Bug #45989 (by Georgi Kodinov)
> ----------------------------------
> This bug has been already fixed in MariaDB 5.1.44.
> Our fix is correct, the fix by Georgi is not quite correct
> (but not harmful).
Agree, yes the better fix is already pushed.
> 7. Bug #48483 (by Sergey Glukhov)
> ----------------------------------
> (No public access)
>
> SYNOPSIS
> Wrong calculation of table dependencies for join queries with
> outer join operation causes a crash.
>
> CONCLUSION
> The patch can be accepted with one change that matters
> - if (!((prev_table->on_expr->used_tables() & ~RAND_TABLE_BIT) &
> - ~prev_used_tables))
> + if (!((prev_table->on_expr->used_tables() &
> + ~(OUTER_REF_BIT | RAND_TABLE_BIT)) &
> + ~prev_used_tables))
I assume you mean OUTER_REF_TABLE_BIT here. Ok.
> 8. Bug #49324 (by Georgi Kodinov)
> ----------------------------------
> SYNOPSIS
> With InnoDB a GROUP BY / ORDER BY can use an index extended by some
> number of major components of the primary. The value of rec_per_key
> for such extended indexes must be calculated in a special way.
>
> CONCLUSION
> The patch could be accepted after changing the formula that calculates
> the value of rec_per_key for an extended index:
>
> - rec_per_key= used_key_parts &&
> - used_key_parts <= keyinfo->key_parts ?
> - keyinfo->rec_per_key[used_key_parts-1] : 1;
> + int used_index_parts= keyinfo->key_parts;
> + int used_pk_parts= 0;
> + set_if_bigger(used_pk_parts,
> + used_key_parts-used_index_parts);
> + rec_per_key= keyinfo->rec_per_key[used_key_parts-1];
> + if (used_pk_parts)
> + {
> + KEY *pkinfo= tab->table->key_info+table->s->primary_key;
> + rec_per_key*= pkinfo->rec_per_key[used_pk_parts-1];
> + rec_per_key/= pkinfo->rec_per_key[0];
> + }
>
> REASONS
> The formula in the patch does not take into account how many
> components of the of the primary key is used in the extended index.
Ok (I don't understand the calculation, but not knowing the code I'm willing
to take your word for it).
> 9. Bug #49829 (by Staale Smedseng)
> ----------------------------------
> SYNOPSYS
> Compiler problems (warnings) for a platform
>
> CONCLUSION
> THe patch is ok.
Agree (it was kind of nice to see the explanation for these warnings, which I
think I saw before but didn't know what meant).
> 10. Bug #49902 (by Sergey Vojtovich)
> -----------------------------------
> See the comments/suggestions from Monty's review
Yes, this is pushed to 5.1-release (and I agree with Monty's comment).
> 11. Bug #50335 (by Alexey Kopytov)
> -----------------------------------
> (No public access)
>
> SYNOPSYS
> Failure of a wrong assertion
>
> CONCLUSION
> The patch is ok.
Ok.
> 12. Bug #50591 (by Sergey Glukhov)
> -----------------------------------
> SYNOPSIS
> Wrong result for a grouping query over a table with a BIT field
>
> CONCLUSION
> The patch is ok.
Ok.
> 13. Bug #50843 (by Evgeny Potemkin)
> ------------------------------------
> SYNOPSIS
> Performance degradation problem when join cache + filesort
> are used intead of a full index scan by a primary key.
>
> CONCLUSION
> The patch looks ok.
Ok.
> 14. Bug #50995 (by Sergey Glukhov)
> ------------------------------------
> SYNOPSIS
> A badly formed list for conditions causes wrong query results.
>
> CONCLUSION
> The patch looks ok for me.
> See also Monty's recommendation from his review.
Yes. As far as I can see, Monty pushed his changes from his review to
5.1-release.
He did however not update the comments for eliminate_item_equal() as per his
suggestion (maybe he forgot):
"Note that we should update the function comment for eliminate_item_equal()
as this can't return 0. (If it would, then other things would break, just
look at how this function is used)."
> 15. Bug #51242 (by Sergey Glukhov)
> -----------------------------------
> SYNOPSYS
> The conjuncts that become false after substitution of the constant
> tables are ignored.
>
> CONCLUSION
> The fix should be turned down (but not the test case).
>
> REASON
> See the reasons for turning the fix for bug #52336 that is a
> correction for this patch.
>
> POSSIBLE SOLUTION
> See the solution for bug #52336.
> 16. Bug #51494 (by Sergey Glukhov)
> -----------------------------------
> SYNOPSIS
> Crash with explain of a query with outer join
>
> CONCLUSION
> The patch must be turned down.
>
> REASONS
> The patch triggers bug #53334 - a failure of a base join
> query for InnoDB.
> The bug is actually fixed by the patch for bug #52177
> (see my comment for bug #53334).
Yes, it is already reverted in our tree.
> 17. Bug #52177 (by Sergey Glukhov)
> -----------------------------------
> SYNOPSIS
> Crash with exaplain for a query with an outer join.
>
> CONCLUSION
> The fix is correct and the patch should be applied.
> The patch also fixes the bug #51494.
Ok.
> 18. Bug #52336 (by Sergey Glukhov)
> -----------------------------------
> SYNOPSYS
> A crash caused by an invalid fix for bug #51242.
>
> CONCLUSION
> The patch rather should be turned down.
>
> REASON.
> The patch does not fix the real cause of the problem:
> a wrong value is passed as a parameter in the call
> Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having,
> used_tables,
> used_tables);
> The patch actually suggest a work-around that hides the bug.
> This work-around simultaneously adds a new feature.
> that catches impossible HAVINGs after constant table substitution.
> Yet impossible WHEREs appeared after this optimization remain
> uncaught. So the feature is introduced half-baked.
>
> POSSIBLE SOLUTION
> - Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having,
> - used_tables,
> - used_tables);
> + Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having,
> + used_tables,
> + (table_map) 0);
Ok. This possible solution is not yet applied to our tree (we only discussed
it so far).
1
0
All,
I've created a tarball of a freshly branched MariaDB 5.2 source tree.
I created the branch with:
bzr branch lp:maria/5.2 mariadb-5.2
I created the tarball with:
tar -czvf mariadb-5.2-repo.tar.gz mariadb-5.2/
I then saw to it that the tarball was uploaded to our mirrors.
If you are having trouble with using bzr to branch the complete MariaDB
source tree, using this tarball is an option. For example, see this bug
on Launchpad: https://bugs.launchpad.net/bugs/407834
Links to the tarball (and some brief instructions) are here:
http://askmonty.org/wiki/Getting_the_MariaDB_Source_Code#Source_Tree_Tarball
I've tested the tree by downloading the tarball to my local machine, and
it appears to work just fine. Let me know if you try to use it and it
doesn't work for you.
Thanks.
--
Daniel Bartholomew
Monty Program - http://askmonty.org
1
0

[Maria-developers] bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (igor:2791)
by Igor Babaev 18 May '10
by Igor Babaev 18 May '10
18 May '10
#At lp:maria/5.2 based on revid:igor@askmonty.org-20100513065914-yq9y2pbd9zn2fm7w
2791 Igor Babaev 2010-05-18
Fixed bugs in the backport of derived tables (mwl106).
modified:
mysql-test/r/derived_view.result
mysql-test/r/table_elim.result
sql/item_cmpfunc.cc
sql/item_cmpfunc.h
sql/sql_class.h
sql/sql_select.cc
sql/sql_select.h
sql/sql_union.cc
=== modified file 'mysql-test/r/derived_view.result'
--- a/mysql-test/r/derived_view.result 2010-04-29 21:10:39 +0000
+++ b/mysql-test/r/derived_view.result 2010-05-18 17:46:32 +0000
@@ -442,7 +442,7 @@ id select_type table type possible_keys
1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where
1 SIMPLE t1 ALL NULL NULL NULL NULL 11 100.00 Using where; Using join buffer
Warnings:
-Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` join `test`.`t1` where ((`test`.`t1`.`f1` = `test`.`t1`.`f1`) and (`test`.`t1`.`f1` > 2) and (`test`.`t1`.`f1` < 7) and ((`test`.`t1`.`f1` > 2) and (`test`.`t1`.`f1` < 7)))
+Note 1003 select `test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11`,`test`.`t1`.`f1` AS `f1`,`test`.`t1`.`f11` AS `f11` from `test`.`t1` join `test`.`t1` where ((`test`.`t1`.`f1` = `test`.`t1`.`f1`) and (`test`.`t1`.`f1` > 2) and (`test`.`t1`.`f1` < 7) and (`test`.`t1`.`f1` > 2) and (`test`.`t1`.`f1` < 7))
select * from
(select * from
(select * from t1 where f1 < 7 ) tt where f1 > 2 ) x
=== modified file 'mysql-test/r/table_elim.result'
--- a/mysql-test/r/table_elim.result 2010-03-20 12:01:47 +0000
+++ b/mysql-test/r/table_elim.result 2010-05-18 17:46:32 +0000
@@ -117,58 +117,58 @@ t2 where id=f.id);
This should use one table:
explain select id from v1 where id=2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY f const PRIMARY PRIMARY 4 const 1 Using index
+1 SIMPLE f const PRIMARY PRIMARY 4 const 1 Using index
This should use one table:
explain extended select id from v1 where id in (1,2,3,4);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY f range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index
+1 SIMPLE f range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index
Warnings:
-Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` where (`f`.`id` in (1,2,3,4))
This should use facts and a1 tables:
explain extended select id from v1 where attr1 between 12 and 14;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY a1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using index condition; Using MRR
-1 PRIMARY f eq_ref PRIMARY PRIMARY 4 test.a1.id 1 100.00 Using index
+1 SIMPLE a1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using index condition; Using MRR
+1 SIMPLE f eq_ref PRIMARY PRIMARY 4 test.a1.id 1 100.00 Using index
Warnings:
-Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` join `test`.`t1` `a1` where ((`f`.`id` = `a1`.`id`) and (`a1`.`attr1` between 12 and 14))
This should use facts, a2 and its subquery:
explain extended select id from v1 where attr2 between 12 and 14;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY a2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using index condition; Using where; Using MRR
-1 PRIMARY f eq_ref PRIMARY PRIMARY 4 test.a2.id 1 100.00 Using index
+1 SIMPLE a2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using index condition; Using where; Using MRR
+1 SIMPLE f eq_ref PRIMARY PRIMARY 4 test.a2.id 1 100.00 Using index
3 DEPENDENT SUBQUERY t2 ref PRIMARY PRIMARY 4 test.a2.id 2 100.00 Using index
Warnings:
-Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.a2.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` join `test`.`t2` `a2` where ((`f`.`id` = `a2`.`id`) and (`a2`.`attr2` between 12 and 14) and (`a2`.`fromdate` = (select max(`test`.`t2`.`fromdate`) AS `MAX(fromdate)` from `test`.`t2` where (`test`.`t2`.`id` = `a2`.`id`))))
This should use one table:
explain select id from v2 where id=2;
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY f const PRIMARY PRIMARY 4 const 1 Using index
+1 SIMPLE f const PRIMARY PRIMARY 4 const 1 Using index
This should use one table:
explain extended select id from v2 where id in (1,2,3,4);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY f range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index
+1 SIMPLE f range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index
Warnings:
-Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` where (`f`.`id` in (1,2,3,4))
This should use facts and a1 tables:
explain extended select id from v2 where attr1 between 12 and 14;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY a1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using index condition; Using MRR
-1 PRIMARY f eq_ref PRIMARY PRIMARY 4 test.a1.id 1 100.00 Using index
+1 SIMPLE a1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using index condition; Using MRR
+1 SIMPLE f eq_ref PRIMARY PRIMARY 4 test.a1.id 1 100.00 Using index
Warnings:
-Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` join `test`.`t1` `a1` where ((`f`.`id` = `a1`.`id`) and (`a1`.`attr1` between 12 and 14))
This should use facts, a2 and its subquery:
explain extended select id from v2 where attr2 between 12 and 14;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY a2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using index condition; Using MRR
-1 PRIMARY f eq_ref PRIMARY PRIMARY 4 test.a2.id 1 100.00 Using where; Using index
+1 SIMPLE a2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using index condition; Using MRR
+1 SIMPLE f eq_ref PRIMARY PRIMARY 4 test.a2.id 1 100.00 Using where; Using index
3 DEPENDENT SUBQUERY t2 ref PRIMARY PRIMARY 4 test.f.id 2 100.00 Using index
Warnings:
-Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #1
+Note 1276 Field or reference 'test.f.id' of SELECT #3 was resolved in SELECT #2
Note 1003 select `f`.`id` AS `id` from `test`.`t0` `f` join `test`.`t2` `a2` where ((`f`.`id` = `a2`.`id`) and (`a2`.`attr2` between 12 and 14) and (`a2`.`fromdate` = (select max(`test`.`t2`.`fromdate`) AS `MAX(fromdate)` from `test`.`t2` where (`test`.`t2`.`id` = `f`.`id`))))
drop view v1, v2;
drop table t0, t1, t2;
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc 2010-04-29 21:10:39 +0000
+++ b/sql/item_cmpfunc.cc 2010-05-18 17:46:32 +0000
@@ -4232,8 +4232,20 @@ Item_cond::fix_fields(THD *thd, Item **r
(item= *li.ref())->check_cols(1))
return TRUE; /* purecov: inspected */
used_tables_cache|= item->used_tables();
+#if 0
if (!item->const_item())
const_item_cache= FALSE;
+#else
+ if (item->const_item())
+ and_tables_cache= (table_map) 0;
+ else
+ {
+ table_map tmp_table_map= item->not_null_tables();
+ not_null_tables_cache|= tmp_table_map;
+ and_tables_cache&= tmp_table_map;
+ const_item_cache= FALSE;
+ }
+#endif
with_sum_func= with_sum_func || item->with_sum_func;
with_subselect|= item->with_subselect;
@@ -4253,6 +4265,7 @@ Item_cond::eval_not_null_tables(uchar *o
{
Item *item;
List_iterator<Item> li(list);
+ and_tables_cache= ~(table_map) 0;
while ((item=li++))
{
table_map tmp_table_map;
=== modified file 'sql/item_cmpfunc.h'
--- a/sql/item_cmpfunc.h 2010-04-29 21:10:39 +0000
+++ b/sql/item_cmpfunc.h 2010-05-18 17:46:32 +0000
@@ -1778,19 +1778,6 @@ inline Item *and_conds(Item *a, Item *b)
{
if (!b) return a;
if (!a) return b;
- /* Try to minimize item tree by adding to already present AND functions. */
- if (a->type() == Item::COND_ITEM &&
- ((Item_cond*) a)->functype() == Item_func::COND_AND_FUNC)
- {
- ((Item_cond*)a)->add(b);
- return a;
- }
- else if (b->type() == Item::COND_ITEM &&
- ((Item_cond*) b)->functype() == Item_func::COND_AND_FUNC)
- {
- ((Item_cond*)b)->add(a);
- return b;
- }
return new Item_cond_and(a, b);
}
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2010-04-29 21:10:39 +0000
+++ b/sql/sql_class.h 2010-05-18 17:46:32 +0000
@@ -2790,11 +2790,6 @@ public:
*/
bool bit_fields_as_long;
- /*
- Whether to create or postpone actual creation of this temporary table.
- TRUE <=> create_tmp_table will create only the TABLE structure.
- */
- bool skip_create_table;
TMP_TABLE_PARAM()
:copy_field(0), group_parts(0),
group_length(0), group_null_parts(0), convert_blob_length(0),
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-05-12 04:09:58 +0000
+++ b/sql/sql_select.cc 2010-05-18 17:46:32 +0000
@@ -9845,8 +9845,9 @@ simplify_joins(JOIN *join, List<TABLE_LI
{
conds= and_conds(conds, table->on_expr);
conds->top_level_item();
- if (!conds->fixed)
- conds->fix_fields(join->thd, &conds);
+ /* conds is always a new item as both cond and on_expr existed */
+ DBUG_ASSERT(!conds->fixed);
+ conds->fix_fields(join->thd, &conds);
}
else
conds= table->on_expr;
@@ -11035,7 +11036,7 @@ TABLE *
create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulonglong select_options, ha_rows rows_limit,
- char *table_alias)
+ char *table_alias, bool do_not_open)
{
MEM_ROOT *mem_root_save, own_root;
TABLE *table;
@@ -11728,7 +11729,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA
if (thd->is_fatal_error) // If end of memory
goto err; /* purecov: inspected */
share->db_record_offset= 1;
- if (!param->skip_create_table)
+ if (!do_not_open)
{
if (share->db_type() == TMP_ENGINE_HTON)
{
=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h 2010-04-29 21:10:39 +0000
+++ b/sql/sql_select.h 2010-05-18 17:46:32 +0000
@@ -1984,7 +1984,7 @@ void push_index_cond(JOIN_TAB *tab, uint
TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulonglong select_options, ha_rows rows_limit,
- char* alias);
+ char* alias, bool do_not_open=FALSE);
void free_tmp_table(THD *thd, TABLE *entry);
bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
ENGINE_COLUMNDEF *start_recinfo,
=== modified file 'sql/sql_union.cc'
--- a/sql/sql_union.cc 2010-05-12 04:09:58 +0000
+++ b/sql/sql_union.cc 2010-05-18 17:46:32 +0000
@@ -126,12 +126,11 @@ select_union::create_result_table(THD *t
tmp_table_param.init();
tmp_table_param.field_count= column_types->elements;
tmp_table_param.bit_fields_as_long= bit_fields_as_long;
- tmp_table_param.skip_create_table= !create_table;
-
if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types,
(ORDER*) 0, is_union_distinct, 1,
- options, HA_POS_ERROR, (char*) alias)))
+ options, HA_POS_ERROR, (char*) alias,
+ !create_table)))
return TRUE;
if (create_table)
{
1
0
Hello everyone,
Summarizing from my previous email: I'm working on the documentation for
the Segmented Key Cache, one of the new features in MariaDB 5.2. My
previous email was about the "About" section of the documentation. This
email is about the "Syntax" section.
For the syntax section, I see there is one new global variable defined
for this feature and a new KEY_CACHES table in the information_schema
database. Are there any other user-visible items which should be
mentioned?
Here is a first draft of the syntax section:
----------------------------------------------------------------------
== Segmented Key Cache Syntax ==
New global variable: key_cache_partitions, it sets the number of
segments in a key cache. Valid values for this variable are whole
numbers between 0 and 64. If the number of partitions is set to a number
greater than 64 the number of partitions will be truncated to 64 and a
warning will be issued.
A value of '0' means the key cache is a regular (i.e. non-segmented)
key cache. This is the default.
Other global variables used when working with regular key caches also
apply to segmented key caches: key_buffer_size,
key_cache_age_threshold, key_cache_block_size, and
key_cache_division_limit. See the MySQL manual for descriptions of
these variables.
http://dev.mysql.com/doc/refman/5.1/en/server-system-variables.html
Statistics about the key cache can be found by looking at the
KEY_CACHES table in the INFORMATION_SCHEMA database. Columns in this
table are:
* KEY_CACHE_NAME: The name of the key cache
* PARTITIONS: total number of segments
* PARTITION_NUMBER: segment number (set to NULL if a simple key cache)
* FULL_SIZE: memory for cache buffers/auxiliary structures
* BLOCK_SIZE: size of the blocks
* USED_BLOCKS: number of currently used blocks
* UNUSED_BLOCKS: number of currently unused blocks
* DIRTY_BLOCKS: number of currently dirty blocks
* READ_REQUESTS: number of read requests
* READS: number of actual reads from files into buffers
* WRITE_REQUESTS: number of write requests
* WRITES: number of actual writes from buffers into files
----------------------------------------------------------------------
If there are any factual errors with the above, let me know.
Improvements and suggestions are also welcome.
Question for the developers: A key_cache_partitions value of '0' means
the key cache will not be segmented, it will be a regular (or simple)
key cache. A value of '1' means the key cache will be a segmented key
cache with a single segment. Is there any benefit to having a
"single-segment segmented key cache" compared to a regular "simple key
cache" or are they practically the same thing?
The key_cache_partitions variable also needs to be documented on the
Server System Variables page
(http://askmonty.org/wiki/Manual:Server_System_Variables) The entry
will look something like this:
----------------------------------------------------------------------
* <code>key_cache_partitions</code>
** '''Description:''' The number of segments in a key cache.
** '''Commandline:''' <code>--key_cache_partitions=#</code>
** '''Scope:''' Global
** '''Dynamic:''' No
** '''Type:''' number
** '''Valid values:''' <code>0-64</code>
** '''Default value:''' <code>0</code> ''(non-segmented)''
* '''Introduced:''' MariaDB 5.2
----------------------------------------------------------------------
Let me know if there is anything wrong with the above.
My task now is to come up with some examples. I'll probably use the
test cases for inspiration unless someone has an awesome segmented key
cache example that they've been dying to share with me. :)
Thanks.
--
Daniel Bartholomew
Monty Program - http://askmonty.org
3
3
All of the buildbots I administrate should now be back online
(adutko-centos5-amd64, adutko-ultrasparc3 and mariadb-brs). Thank you for
your patience.
-Adam
1
0
Michael Widenius <michael.widenius(a)gmail.com> writes:
> Daniel, can you create a 'initial' repository of MariaDB 5.2 and make a
> .tar.gz file of it available on our download page.
> We need this ASAP because we get complains from developers that they
> can't use bzr to create download MariaDB source because it takes way
> too long and often fails in the middle of the process.
Since Colin asked on IRC, let me add that (I think) the root of this problem
is this bug:
https://bugs.launchpad.net/bzr/+bug/407834
(so I can confirm it is a real problem!)
- Kristian.
1
0

[Maria-developers] Progress (by Knielsen): Use Buildbot to populate apt/yum repositories (117)
by worklog-noreply@askmonty.org 17 May '10
by worklog-noreply@askmonty.org 17 May '10
17 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Use Buildbot to populate apt/yum repositories
CREATION DATE..: Wed, 12 May 2010, 07:04
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 117 (http://askmonty.org/worklog/?tid=117)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 12
ESTIMATE.......: 8 (hours remain)
ORIG. ESTIMATE.: 20
PROGRESS NOTES:
-=-=(Knielsen - Mon, 17 May 2010, 08:48)=-=-
Fixed the conflict on lucid with mysql-client-core-5.1.
Figure out and document how to do the signing, discussions with OurDelta.
Worked 8 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 12 May 2010, 21:20)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.21448 2010-05-12 21:20:34.000000000 +0000
+++ /tmp/wklog.117.new.21448 2010-05-12 21:20:34.000000000 +0000
@@ -1,5 +1,5 @@
-As for signing, I think it may be possible/best to do the signing outside of
-buildbot, as a separate process. There are some advantages to this:
+The signing of packages can be done outside of Buildbot, as a separate
+process. There are some advantages to this:
- Security: the private key can be kept less exposed when it is not included
in the buildbot infrastructure.
@@ -9,9 +9,6 @@
- Generally reducing the complexity of the buildbot setup.
-This of course requires that it is possible to sign the packages after the
-actual build.
-
----
Here is how to sign the .rpms.
@@ -42,20 +39,37 @@
----
-For .deb, I *think* we are using secure apt, which does not actually sign the
-packages, rather it signs the "Release" file which is created when the
-repository is set up. So in this case again there is no problem doing the
-signing outside of the build itself (in fact that is the way it must be).
+For .deb, it is not the individual .deb that is signed, it is the
+repository. Here is one way to generate a signed repository, using reprepro.
-Found two tools that can help with building and signing apt repositories:
-reprepro (seems to be the newest, recommended) and apt-ftparchive.
+The ourdelta/bakery signing stuff needs to be copied to ~/.gnupg
-----
+mkdir repo # or whatever
+cd repo
+mkdir conf
+cat >conf/distributions <<END
+Origin: MariaDB
+Label: MariaDB
+Codename: hardy
+Architectures: amd64
+Components: mariadb-ourdelta
+Description: MariaDB test Repository
+SignWith: autosign(a)ourdelta.org
+END
+for i in `find /home/buildbot/debs/ -name '*.deb'` ; do reprepro --basedir=.
+includedeb hardy $i ; done
+
+The corrosponding line for /etc/apt/sources.list:
-ToDO: Figure out how to handle the mysql-client-core issue on lucid. Arjen
-suggested splitting up so we have this package ourselves, or maybe it can be
-handled with replace/provide/conflict dependencies.
+ deb file:///home/buildbot/repo hardy mariadb-ourdelta
+
+This works for multiple distributions, by adding more sections to the
+conf/distributions file.
+
+----
-ToDo: Figure out exactly what files/directory structure needs to be uploaded
-(asked Peter, awaiting reply).
+For the mysql-client-core-5.1 issue, the solution is to split the
+mariadb-client-5.1 (and 5.2) package similarly into
+mariadb-client-core-5.1. The mariadb-client-core-5.1 package then provides:
+mysql-client-core-5.1.
-=-=(Knielsen - Wed, 12 May 2010, 18:25)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.12634 2010-05-12 18:25:58.000000000 +0000
+++ /tmp/wklog.117.new.12634 2010-05-12 18:25:58.000000000 +0000
@@ -12,9 +12,35 @@
This of course requires that it is possible to sign the packages after the
actual build.
-For .rpm this seems to be easy (from reading, didn't try yet):
+----
+
+Here is how to sign the .rpms.
+
+Copy in the ourdelta/bakery signing stuff to ~/.gnupg and ~/.rpmmacros.
+
+Run
+
+ rpm --addsign *.rpm
+
+That's all! This can be tested by creating a local yum repository:
- rpm --addsign <packages>
+ createrepo <dir>
+
+(where <dir> contains the signed .rpms). Then create the file
+/etc/yum.repos.d/localmaria.repo:
+
+[localmaria]
+name=Local MariaDB repo
+baseurl=file:///home/buildbot/rpms
+gpgcheck=1
+enabled=1
+gpgkey=http://master.ourdelta.org/deb/ourdelta.gpg
+
+Now this should work to install MariaDB:
+
+ sudo yum install MariaDB-server
+
+----
For .deb, I *think* we are using secure apt, which does not actually sign the
packages, rather it signs the "Release" file which is created when the
-=-=(Knielsen - Wed, 12 May 2010, 07:14)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.401 2010-05-12 07:14:27.000000000 +0000
+++ /tmp/wklog.117.new.401 2010-05-12 07:14:27.000000000 +0000
@@ -1 +1,35 @@
+As for signing, I think it may be possible/best to do the signing outside of
+buildbot, as a separate process. There are some advantages to this:
+
+ - Security: the private key can be kept less exposed when it is not included
+ in the buildbot infrastructure.
+
+ - It is good to have one step of human intervention before actually signing
+ and releasing packages.
+
+ - Generally reducing the complexity of the buildbot setup.
+
+This of course requires that it is possible to sign the packages after the
+actual build.
+
+For .rpm this seems to be easy (from reading, didn't try yet):
+
+ rpm --addsign <packages>
+
+For .deb, I *think* we are using secure apt, which does not actually sign the
+packages, rather it signs the "Release" file which is created when the
+repository is set up. So in this case again there is no problem doing the
+signing outside of the build itself (in fact that is the way it must be).
+
+Found two tools that can help with building and signing apt repositories:
+reprepro (seems to be the newest, recommended) and apt-ftparchive.
+
+----
+
+ToDO: Figure out how to handle the mysql-client-core issue on lucid. Arjen
+suggested splitting up so we have this package ourselves, or maybe it can be
+handled with replace/provide/conflict dependencies.
+
+ToDo: Figure out exactly what files/directory structure needs to be uploaded
+(asked Peter, awaiting reply).
-=-=(Knielsen - Wed, 12 May 2010, 07:06)=-=-
Upgraded lucid VMs to the official release.
Discussed with Arjen how to handle things.
Did a lot of reading on how apt repositories work.
Worked 4 hours and estimate 16 hours remain (original estimate unchanged).
DESCRIPTION:
Since the package building for MariaDB is now fully automated in Buildbot, it
has been decided to use packages from Buildbot for the OurDelta apt and yum
repositories.
This worklog is about fixing/implementing anything that is missing to achieve
this.
- When doing a real release build, packages/repositories need to be signed,
so that users will not get a warning about unauthenticated packages. This
signing must only be done on official releases, not on daily builds (to
avoid confusing one with the other).
- Packages must be uploaded from the Buildbot host. The OurDelta
infrastructure has a DropBox share that could be used for this, another
option is to simply use rsync.
- Ubuntu 10.04 "lucid" has been released, and we need to support that for
packages, so the Buildbot VM for lucid must be upgraded to have the
official release.
- In Ubuntu 10.04, the official MySQL packages include a new package
mysql-client-core, we currently have a conflict with this on install that
we need to handle somehow.
HIGH-LEVEL SPECIFICATION:
The signing of packages can be done outside of Buildbot, as a separate
process. There are some advantages to this:
- Security: the private key can be kept less exposed when it is not included
in the buildbot infrastructure.
- It is good to have one step of human intervention before actually signing
and releasing packages.
- Generally reducing the complexity of the buildbot setup.
----
Here is how to sign the .rpms.
Copy in the ourdelta/bakery signing stuff to ~/.gnupg and ~/.rpmmacros.
Run
rpm --addsign *.rpm
That's all! This can be tested by creating a local yum repository:
createrepo <dir>
(where <dir> contains the signed .rpms). Then create the file
/etc/yum.repos.d/localmaria.repo:
[localmaria]
name=Local MariaDB repo
baseurl=file:///home/buildbot/rpms
gpgcheck=1
enabled=1
gpgkey=http://master.ourdelta.org/deb/ourdelta.gpg
Now this should work to install MariaDB:
sudo yum install MariaDB-server
----
For .deb, it is not the individual .deb that is signed, it is the
repository. Here is one way to generate a signed repository, using reprepro.
The ourdelta/bakery signing stuff needs to be copied to ~/.gnupg
mkdir repo # or whatever
cd repo
mkdir conf
cat >conf/distributions <<END
Origin: MariaDB
Label: MariaDB
Codename: hardy
Architectures: amd64
Components: mariadb-ourdelta
Description: MariaDB test Repository
SignWith: autosign(a)ourdelta.org
END
for i in `find /home/buildbot/debs/ -name '*.deb'` ; do reprepro --basedir=.
includedeb hardy $i ; done
The corrosponding line for /etc/apt/sources.list:
deb file:///home/buildbot/repo hardy mariadb-ourdelta
This works for multiple distributions, by adding more sections to the
conf/distributions file.
----
For the mysql-client-core-5.1 issue, the solution is to split the
mariadb-client-5.1 (and 5.2) package similarly into
mariadb-client-core-5.1. The mariadb-client-core-5.1 package then provides:
mysql-client-core-5.1.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 17 May '10
by worklog-noreply@askmonty.org 17 May '10
17 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 30
ESTIMATE.......: 5 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Mon, 17 May 2010, 08:45)=-=-
Merge with latest trunk and run Buildbot tests.
Worked 1 hour and estimate 5 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Status updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Un-Assigned
+In-Progress
------------------------------------------------------------
-=-=(View All Progress Notes, 30 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 17 May '10
by worklog-noreply@askmonty.org 17 May '10
17 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 30
ESTIMATE.......: 5 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Mon, 17 May 2010, 08:45)=-=-
Merge with latest trunk and run Buildbot tests.
Worked 1 hour and estimate 5 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Status updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Un-Assigned
+In-Progress
------------------------------------------------------------
-=-=(View All Progress Notes, 30 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 17 May '10
by worklog-noreply@askmonty.org 17 May '10
17 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 30
ESTIMATE.......: 5 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Mon, 17 May 2010, 08:45)=-=-
Merge with latest trunk and run Buildbot tests.
Worked 1 hour and estimate 5 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Status updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Un-Assigned
+In-Progress
------------------------------------------------------------
-=-=(View All Progress Notes, 30 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 17 May '10
by worklog-noreply@askmonty.org 17 May '10
17 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 30
ESTIMATE.......: 5 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Mon, 17 May 2010, 08:45)=-=-
Merge with latest trunk and run Buildbot tests.
Worked 1 hour and estimate 5 hours remain (original estimate unchanged).
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Status updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Un-Assigned
+In-Progress
------------------------------------------------------------
-=-=(View All Progress Notes, 30 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Buildbot MariaDB->MariaDB upgrade testing (118)
by worklog-noreply@askmonty.org 17 May '10
by worklog-noreply@askmonty.org 17 May '10
17 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Buildbot MariaDB->MariaDB upgrade testing
CREATION DATE..: Wed, 12 May 2010, 13:48
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 118 (http://askmonty.org/worklog/?tid=118)
VERSION........:
STATUS.........: Complete
PRIORITY.......: 60
WORKED HOURS...: 8
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 8
PROGRESS NOTES:
-=-=(Knielsen - Mon, 17 May 2010, 08:31)=-=-
Fixed a problem with the upgrade test for 5.2. Using a 5.[0-9] wildcard
doesn't work, as it tries to install both mariadb-server-5.1 and
mariadb-server-5.2 at the same time! Running `apt-get upgrade` also does not
work, as it will not test upgrade 5.1->5.2. Instead, grab the correct 5.x
version to test install of from the bzr branch name.
Found and fixed an upgrade dependency problem when upgrading to 5.2
(/usr/lib/mysql/plugins moved from libmysqlclient-dev in 5.1 to
mariadb-server-5.2 in 5.2).
Worked 3 hours and estimate 0 hours remain (original estimate increased by 3 hours).
-=-=(Knielsen - Fri, 14 May 2010, 06:41)=-=-
Version updated.
--- /tmp/wklog.118.old.18369 2010-05-14 06:41:01.000000000 +0000
+++ /tmp/wklog.118.new.18369 2010-05-14 06:41:01.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+
-=-=(Knielsen - Fri, 14 May 2010, 06:41)=-=-
Status updated.
--- /tmp/wklog.118.old.18369 2010-05-14 06:41:01.000000000 +0000
+++ /tmp/wklog.118.new.18369 2010-05-14 06:41:01.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Complete
-=-=(Knielsen - Fri, 14 May 2010, 06:40)=-=-
Installed the new virtual images, using 5.1.42 from OurDelta (for all except
lucid, which is new and so has no 5.1.42 ourdelta package and also needed a
fixed set of packages due to changes in the lucid MySQL packaging).
Added a new upgrade2 step in the Buildbot configuration to test this.
Updated Buildbot wiki documentation.
Worked 5 hours and estimate 0 hours remain (original estimate decreased by 3 hours).
DESCRIPTION:
Create an additional test step for Buildbot to check that upgrading from one
version of MariaDB to another works ok.
We already have testing of upgrade from distro-official MySQL .debs to
our MariaDB .debs.
What we need is for each Debian/Ubuntu, a new KVM virtual image with MariaDB
5.1.42 (First GA release) pre-installed, just like the existing update test
uses images with MySQL pre-installed.
Then the Buildbot configuration must be updated to add another upgrade test
step, just like the existing one but using the images with MariaDB pre-installed.
Also, the setup of the new images must be added to the existing documentation:
http://askmonty.org/wiki/BuildBot::package
http://askmonty.org/wiki/BuildBot:vm-setup
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] [Branch ~maria-captains/maria/5.1] Rev 2857: fix includes in libevent to support vpath builds
by noreply@launchpad.net 15 May '10
by noreply@launchpad.net 15 May '10
15 May '10
------------------------------------------------------------
revno: 2857
committer: Sergei Golubchik <sergii(a)pisem.net>
branch nick: maria-5.1
timestamp: Sat 2010-05-15 14:17:33 +0200
message:
fix includes in libevent to support vpath builds
modified:
extra/libevent/Makefile.am
--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0

Re: [Maria-developers] [Merge] lp:~paul-mccullagh/maria/pbxt-1.0.11 into lp:maria
by Kristian Nielsen 15 May '10
by Kristian Nielsen 15 May '10
15 May '10
Paul McCullagh <paul.mccullagh(a)primebase.org> writes:
> All tests in the PBXT suite run through on Mac and Linux, except for one error under Linux, which is a bit weird (see below).
> ------------------
>
> pbxt.select_safe [ fail ]
> Test ended at 2010-05-06 17:19:13
>
> CURRENT_TEST: pbxt.select_safe
> mysqltest: At line 19: query 'select 1 from t1,t1 as t2,t1 as t3' failed: 1104: The SELECT would examine more than MAX_JOIN_SIZE rows; check your WHERE and use SET SQL_BIG_SELECTS=1 or SET SQL_MAX_JOIN_SIZE=# if the SELECT is okay
>
> The result from queries just before the failure was:
> drop table if exists t1;
> SET SQL_SAFE_UPDATES=1,SQL_SELECT_LIMIT=4, SQL_MAX_JOIN_SIZE=9;
> create table t1 (a int auto_increment primary key, b char(20));
> insert into t1 values(1,"test");
> SELECT SQL_BUFFER_RESULT * from t1;
> a b
> 1 test
> update t1 set b="a" whe
Ok, I looked into this.
So this test does a three-way cartesian self-join on a table with 2 rows (a
total of 8 rows). And it sets SQL_MAX_JOIN_SIZE=9. And sometimes (apparently
timing dependent), this test fails because the optimiser estimates more than 9
rows in the join. So the problem here is that the optimiser estimate is
sometimes too big for SQL_MAX_JOIN_SIZE=9.
What I found is that when the test case fails, the function ha_pbxt::info()
returns records==3 for the table. When the test case succeeds, it returns
records==2. So it's pretty clear that the server will throw an error when
records==3 and SQL_MAX_JOIN_SIZE=9.
What remains to consider is why the returned records value differs between
test runs.
The testcase actually first inserts a row, then deletes it, then inserts two
more rows. I'm speculating that PBXT has some background cleanup thread or
similar that causes a race between freeing up space from the first row and
allocating space for the two new rows?
So Paul, do you think returning records = 2 or 3 at random from
ha_pbxt::info() is expected or not?
If expected, one way to fix the problem is to create a new table before
running the test:
create table t2 like t1;
insert into t2 select * from t1;
analyze table t2; # PBXT: required to get the correct COUNT(*)
select 1 from t2 as t1,t2,t2 as t3;
Then there is no possibility for the insert+delete to take up a slot and cause
a failure (I was not able to repeat the failure with this change).
Alternatively, we can just increate the SQL_MAX_JOIN_SIZE value.
Or alternatively, you may decide that the test is meaningless for PBXT due to
imprecise statistics, and just remove it from select_safe.test.
Just let me know what you prefer, and I'll change it as needed.
- Kristian.
2
3

[Maria-developers] Patch to fix compiler Warnings and build failure in pbxt
by Michael Widenius 14 May '10
by Michael Widenius 14 May '10
14 May '10
Hi!
I have now merged and pushed the xtstat patch into MariaDB
5.1-release.
Here is a patch that fixes some compiler warnings and a build failure
when building in another directory.
Paul, hope you can fix these also on your side. (The patch is already
in 5.1-release).
Regards,
Monty
=== modified file 'storage/pbxt/bin/Makefile.am'
--- storage/pbxt/bin/Makefile.am 2010-05-11 13:45:45 +0000
+++ storage/pbxt/bin/Makefile.am 2010-05-14 11:53:01 +0000
@@ -5,10 +5,10 @@ INCLUDES = -I$(top_srcdir)/include -I$(t
-I$(top_srcdir)/storage/innobase/include \
-I$(top_srcdir)/sql \
-I$(srcdir) \
- -I../src
+ -I$(srcdir)/../src
bin_PROGRAMS = xtstat
xtstat_SOURCES = xtstat_xt.cc ../src/strutil_xt.cc
-xtstat_LDADD = $(top_srcdir)/libmysql/libmysqlclient.la
+xtstat_LDADD = $(top_builddir)/libmysql/libmysqlclient.la
=== modified file 'storage/pbxt/bin/xtstat_xt.cc'
--- storage/pbxt/bin/xtstat_xt.cc 2010-05-11 13:45:45 +0000
+++ storage/pbxt/bin/xtstat_xt.cc 2010-05-14 10:59:41 +0000
@@ -93,7 +93,7 @@ struct Options {
"Connection protocol to use: default/tcp/socket/pipe/memory", "default", MYSQL_PROTOCOL_DEFAULT, false },
{ OPT_DISPLAY, 0, "display", OPT_HAS_VALUE,
"Columns to display: use short names separated by |, partial match allowed", "time-msec,commt,row-ins,rec,ind,ilog,xlog,data,to,dirty", 0, false },
- { OPT_NONE, 0, NULL, 0, NULL, 0, false }
+ { OPT_NONE, 0, NULL, 0, NULL, NULL, 0, false }
};
#ifdef XT_WIN
=== modified file 'storage/pbxt/src/datalog_xt.cc'
--- storage/pbxt/src/datalog_xt.cc 2010-05-05 10:59:57 +0000
+++ storage/pbxt/src/datalog_xt.cc 2010-05-14 10:55:32 +0000
@@ -1249,7 +1249,7 @@ xtBool XTDataLogBuffer::dlb_write_thru_l
*/
dlb_data_log->dlf_log_eof += size;
#ifdef DEBUG
- if (log_offset + size > dlb_max_write_offset)
+ if ((ulonglong) (log_offset + size) > (ulonglong) dlb_max_write_offset)
dlb_max_write_offset = log_offset + size;
#endif
dlb_flush_required = TRUE;
@@ -1291,7 +1291,7 @@ xtBool XTDataLogBuffer::dlb_append_log(x
if (!xt_pwrite_file(dlb_data_log->dlf_log_file, log_offset, size, data, &thread->st_statistics.st_data, thread))
return FAILED;
#ifdef DEBUG
- if (log_offset + size > dlb_max_write_offset)
+ if ((ulonglong) (log_offset + size) > (ulonglong) dlb_max_write_offset)
dlb_max_write_offset = log_offset + size;
#endif
dlb_flush_required = TRUE;
@@ -1734,8 +1734,8 @@ static xtBool dl_collect_garbage(XTThrea
xtLogOffset src_log_offset;
xtLogID curr_log_id;
xtLogOffset curr_log_offset;
- xtLogID dest_log_id;
- xtLogOffset dest_log_offset;
+ xtLogID dest_log_id= 0;
+ xtLogOffset dest_log_offset= 0;
off_t garbage_count = 0;
memset(&cs, 0, sizeof(XTCompactorStateRec));
=== modified file 'storage/pbxt/src/ha_pbxt.cc'
--- storage/pbxt/src/ha_pbxt.cc 2010-05-12 14:27:18 +0000
+++ storage/pbxt/src/ha_pbxt.cc 2010-05-14 10:54:13 +0000
@@ -1609,7 +1609,7 @@ static int pbxt_prepare(handlerton *hton
return err;
}
-static XTThreadPtr ha_temp_open_global_database(handlerton *hton, THD **ret_thd, int *temp_thread, char *thread_name, int *err)
+static XTThreadPtr ha_temp_open_global_database(handlerton *hton, THD **ret_thd, int *temp_thread, const char *thread_name, int *err)
{
THD *thd;
XTThreadPtr self = NULL;
=== modified file 'storage/pbxt/src/table_xt.cc'
--- storage/pbxt/src/table_xt.cc 2010-05-06 12:42:28 +0000
+++ storage/pbxt/src/table_xt.cc 2010-05-14 10:57:14 +0000
@@ -1793,10 +1793,12 @@ xtPublic void xt_check_table(XTThreadPtr
XTTableHPtr tab = ot->ot_table;
xtRecordID prec_id;
XTTabRecExtDPtr rec_buf = (XTTabRecExtDPtr) ot->ot_row_rbuffer;
+#ifdef CHECK_TABLE_READ_DATA_LOG
XTactExtRecEntryDRec ext_rec;
size_t log_size;
xtLogID log_id;
xtLogOffset log_offset;
+#endif
xtRecordID rec_id;
xtRecordID prev_rec_id;
xtXactID xn_id;
2
1

[Maria-developers] [Branch ~maria-captains/maria/5.1] Rev 2856: Add a -nobuild argument to the script. Useful for building the zip file release with Express Edit...
by noreply@launchpad.net 14 May '10
by noreply@launchpad.net 14 May '10
14 May '10
------------------------------------------------------------
revno: 2856
committer: Bo Thorsen <bo(a)askmonty.org>
branch nick: trunk-work
timestamp: Fri 2010-05-14 14:12:23 +0200
message:
Add a -nobuild argument to the script. Useful for building the zip file release with Express Edition which doesn't have the devenv command
modified:
win/make_mariadb_win_dist
--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0
Den 14-05-2010 10:01, Henrik Ingo skrev:
> (Adding maria-developers)
>
> On Wednesday 12 May 2010 16:10:36 Bo Thorsen wrote:
>> Den 12-05-2010 15:04, Henrik Ingo skrev:
>>> On Wednesday 12 May 2010 13:44:18 Michael Widenius wrote:
>>>> Hi!
>>>>
>>>>>>>>> "Bo" == Bo Thorsen<bo(a)askmonty.org> writes:
>>>>
>>>> Bo> Hi Daniel (and devs),
>>>> Bo> I have uploaded a windows 32 bit build zip file to
>>>> Bo>
>>>> /srv/www/vhosts/main/download/mariadb-5.1.44b/windows/mariadb-noinstall-
>>>> 5.1 .44b-win32.zip.
>>>>
>>>> Bo> How do I get this mentioned on the download page? And will the
>>>> mirrors Bo> pick the file up automatically?
>>>>
>>>> Bo> I'm not sure if we should add a warning about this zip file. It's
>>>> the Bo> first one we have done in a while, something could have gone
>>>> wrong with it.
>>>>
>>>> It would be good if we could add a warning like:
>>>>
>>>> We are actively working on a better Windows installation. This is our
>>>> first try in this direction; It *should* work, but it if doesn't
>>>> please report a bug about it so that we can fix it!
>>>
>>> Actually, the windows builds should be called beta for some time yet.
>>> Please also change the zip filename to indicate that (and make sure it
>>> says so on the download page).
>>
>> That's a change in what we usually do, since beta is only for the
>> software, not the packaging. It would be odd to have both a 5.1.44b and
>> a 5.1.44b-beta live at the site. People could be confused about what it
>> is. It's actually already live without the beta in the filename.
>
> It's not about the packaging but the platform. If you compile our sources for
> OS X, the sources are stable, but we never gave a guarantee that it will work
> at all for OS X. Windows is the same, we are only now paying attention to it.
> It's unreasonable to expect it will magically work just because we already do
> stable releases for Linux and Solaris.
I agree there is a risk that some of the code we have added between
5.1.39 and now can be broken on Windows.
>> And the scripts haven't changed since we last released this, so I don't
>> think it's a high risk.
>
> Ok, this is an argument I can buy. We've had one beta out, if no problems were
> reported, we could move to stable.
>
> Even so, my personal preference is to call the windows release stable only
> once we have a automated (buildbot based) system that outputs binaries without
> manual steps - I consider that more reliable as a process.
>
> To do the manually built zip file fills a need our users have, but isn't yet
> the end goal.
The steps I do manually are exactly what the buildbot system will do. I
just uncomment the lines that do visual studio build and run the scripts.
At one of the first runs I did, I modified the script to do this:
if [ "x_$1" != "x_-nobuild" ]; then
win/configure-mariadb.sh
cmake -G "Visual Studio 9 2008"
devenv.com MySQL.sln /build RelWithDebInfo
devenv.com MySQL.sln /build Debug
fi
The only modification here from the checked in script, is the if statement.
When I run "sh win/make_mariadb_win_dist -nobuild", I just have to run
the build lines manually before the script.
Perhaps I should check this if into the lp:maria?
Bo.
2
1

[Maria-developers] Updated (by Knielsen): Windows installer for MariaDB (55)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Windows installer for MariaDB
CREATION DATE..: Wed, 14 Oct 2009, 00:07
SUPERVISOR.....: Monty
IMPLEMENTOR....: Bothorsen
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 55 (http://askmonty.org/worklog/?tid=55)
VERSION........: Server-5.1
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Version updated.
--- /tmp/wklog.55.old.18466 2010-05-14 06:45:38.000000000 +0000
+++ /tmp/wklog.55.new.18466 2010-05-14 06:45:38.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.1
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Category updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Status updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Psergey - Sat, 17 Oct 2009, 00:03)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.19999 2009-10-17 00:03:11.000000000 +0300
+++ /tmp/wklog.55.new.19999 2009-10-17 00:03:11.000000000 +0300
@@ -79,5 +79,9 @@
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
-http://askmonty.org/wiki/index.php/MariaDB_Logo
+ http://askmonty.org/wiki/index.php/MariaDB_Logo
+* We should make both 32-bit installer and 64-bit installer (the
+ latter will be possible when we have 64-bit windows binaries)
+* At this point we don't see a need to force a reboot after the installation.
+* The installer should be Vista and Windows7-proof.
-=-=(Psergey - Thu, 15 Oct 2009, 23:46)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.23061 2009-10-15 23:46:18.000000000 +0300
+++ /tmp/wklog.55.new.23061 2009-10-15 23:46:18.000000000 +0300
@@ -7,28 +7,25 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
-- Prompts the user for "essential" configuration options. Preliminary list
- of "essential" options:
+- Presents the user with GPL licence
+- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
-
+ * [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
- busy. If they are, it asks to remove the previous installation first
- and aborts. (that is: upgrades are not supported in step#1)
+ busy. If they are, offers to either change these parameters or abort the
+ installation (that is: no support for any kind of upgrades at this point)
+- Copies installation files to appropriate destination
+- Registers mysqld a service
- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: starting server manually requires write access to datadir, which
+ not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
@@ -53,7 +50,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
- TODO come up with options
+ TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22795 2009-10-15 23:40:36.000000000 +0300
+++ /tmp/wklog.55.new.22795 2009-10-15 23:40:36.000000000 +0300
@@ -7,7 +7,8 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Shows GPL Licence
+- Copies files on installation
+- Registers mysqld a service
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -15,27 +16,28 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * [perhaps] sql_mode setting.
+ * TODO come up with the final list. The criteria for inclusion are:
+ 1. ask for things that are essential to have a working setup as soon as
+ the installation is complete
+ 2. ask for things without answers for which the newbies can get into
+ trouble.
-- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file (from a template)
-- Sets up SQL root user with specified password
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- (note: will this work from any user, and on any OS? if not, this might
- be omitted)
- to start mysql client
- - to edit the my.cnf file
-- Registers MariaDB to start as a service with the specified parameters
-- Registers MariaDB as installed software, sets up uninstaller
+ - to edit the my.cnf file.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the installation procedure will end up being).
+ on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22774 2009-10-15 23:40:16.000000000 +0300
+++ /tmp/wklog.55.new.22774 2009-10-15 23:40:16.000000000 +0300
@@ -15,13 +15,9 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
+ * [perhaps] sql_mode setting.
-- Copies files to destination directory
+- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
@@ -29,6 +25,8 @@
- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: will this work from any user, and on any OS? if not, this might
+ be omitted)
- to start mysql client
- to edit the my.cnf file
- Registers MariaDB to start as a service with the specified parameters
-=-=(Psergey - Thu, 15 Oct 2009, 23:38)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22618 2009-10-15 23:38:06.000000000 +0300
+++ /tmp/wklog.55.new.22618 2009-10-15 23:38:06.000000000 +0300
@@ -7,8 +7,7 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
+- Shows GPL Licence
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -22,22 +21,23 @@
2. ask for things without answers for which the newbies can get into
trouble.
+- Copies files to destination directory
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Generates appropriate my.cnf file (from a template)
+- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- - to edit the my.cnf file.
-- Registers MariaDB to start as a service with the specified parameters.
-- Registers MariaDB as installed software, sets up uninstaller.
+ - to edit the my.cnf file
+- Registers MariaDB to start as a service with the specified parameters
+- Registers MariaDB as installed software, sets up uninstaller
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the final installation procedure will be).
+ on how complex and error-prone the installation procedure will end up being).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 16:34)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.3919 2009-10-15 16:34:22.000000000 +0300
+++ /tmp/wklog.55.new.3919 2009-10-15 16:34:22.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal. It can be found here:
+* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Bothorsen - Thu, 15 Oct 2009, 15:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.1396 2009-10-15 15:40:03.000000000 +0300
+++ /tmp/wklog.55.new.1396 2009-10-15 15:40:03.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal (see e.g. here: https://code.launchpad.net/maria)
- Bo Thorsen has the latest revision of the picture in various formats.
+* MySQL's logo is the seal. It can be found here:
+http://askmonty.org/wiki/index.php/MariaDB_Logo
------------------------------------------------------------
-=-=(View All Progress Notes, 18 total)=-=-
http://askmonty.org/worklog/index.pl?tid=55&nolimit=1
DESCRIPTION:
We need Windows Installer package for MariaDB.
HIGH-LEVEL SPECIFICATION:
Not a spec so far but a list of points to consider:
1. Installer wishlist (user POV)
--------------------------------
>From the user point of view:
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
- Presents the user with GPL licence
- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
* [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, offers to either change these parameters or abort the
installation (that is: no support for any kind of upgrades at this point)
- Copies installation files to appropriate destination
- Registers mysqld a service
- Generates appropriate my.cnf file
- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
(note: starting server manually requires write access to datadir, which
not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Same as above but with handling of the case where MySQL has been already
installed:
- offer to replace MySQL.
- upgrade the data directory (todo we should sort out if anything/what is
needed for this).
- Uninstall MySQL
- Install MariaDB.
1.3 Step 3: Configuration wizard
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a list of things that one might want an installer to do but that are
out of scope of this WL entry:
* Calibration of my.cnf parameters based on available memory, number of CPUs,
etc.
2. Installer wishlist (developer POV)
-------------------------------------
* Some "installshield-like" tool that's easy to use (suggestion by Webyog:
NSIS)
* Installation procedure source should reside in MariaDB source repository
* Installation procedure source file is better to be in human-readable text
format.
* It should be possible to automate creation of the installer package, in a way
that can be run from buildbot (e.g. the installer package build process
should print messages to its stdout)
* Any suggestions on how can one automatically test the installation package?
(for example, we'll want to start the installer, install, check that
installation succeeded, then start the server, run some commands, then
uninstall. Any ways to achieve that?)
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
* We should make both 32-bit installer and 64-bit installer (the
latter will be possible when we have 64-bit windows binaries)
* At this point we don't see a need to force a reboot after the installation.
* The installer should be Vista and Windows7-proof.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Windows installer for MariaDB (55)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Windows installer for MariaDB
CREATION DATE..: Wed, 14 Oct 2009, 00:07
SUPERVISOR.....: Monty
IMPLEMENTOR....: Bothorsen
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 55 (http://askmonty.org/worklog/?tid=55)
VERSION........: Server-5.1
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Version updated.
--- /tmp/wklog.55.old.18466 2010-05-14 06:45:38.000000000 +0000
+++ /tmp/wklog.55.new.18466 2010-05-14 06:45:38.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.1
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Category updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Status updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Psergey - Sat, 17 Oct 2009, 00:03)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.19999 2009-10-17 00:03:11.000000000 +0300
+++ /tmp/wklog.55.new.19999 2009-10-17 00:03:11.000000000 +0300
@@ -79,5 +79,9 @@
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
-http://askmonty.org/wiki/index.php/MariaDB_Logo
+ http://askmonty.org/wiki/index.php/MariaDB_Logo
+* We should make both 32-bit installer and 64-bit installer (the
+ latter will be possible when we have 64-bit windows binaries)
+* At this point we don't see a need to force a reboot after the installation.
+* The installer should be Vista and Windows7-proof.
-=-=(Psergey - Thu, 15 Oct 2009, 23:46)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.23061 2009-10-15 23:46:18.000000000 +0300
+++ /tmp/wklog.55.new.23061 2009-10-15 23:46:18.000000000 +0300
@@ -7,28 +7,25 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
-- Prompts the user for "essential" configuration options. Preliminary list
- of "essential" options:
+- Presents the user with GPL licence
+- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
-
+ * [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
- busy. If they are, it asks to remove the previous installation first
- and aborts. (that is: upgrades are not supported in step#1)
+ busy. If they are, offers to either change these parameters or abort the
+ installation (that is: no support for any kind of upgrades at this point)
+- Copies installation files to appropriate destination
+- Registers mysqld a service
- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: starting server manually requires write access to datadir, which
+ not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
@@ -53,7 +50,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
- TODO come up with options
+ TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22795 2009-10-15 23:40:36.000000000 +0300
+++ /tmp/wklog.55.new.22795 2009-10-15 23:40:36.000000000 +0300
@@ -7,7 +7,8 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Shows GPL Licence
+- Copies files on installation
+- Registers mysqld a service
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -15,27 +16,28 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * [perhaps] sql_mode setting.
+ * TODO come up with the final list. The criteria for inclusion are:
+ 1. ask for things that are essential to have a working setup as soon as
+ the installation is complete
+ 2. ask for things without answers for which the newbies can get into
+ trouble.
-- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file (from a template)
-- Sets up SQL root user with specified password
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- (note: will this work from any user, and on any OS? if not, this might
- be omitted)
- to start mysql client
- - to edit the my.cnf file
-- Registers MariaDB to start as a service with the specified parameters
-- Registers MariaDB as installed software, sets up uninstaller
+ - to edit the my.cnf file.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the installation procedure will end up being).
+ on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22774 2009-10-15 23:40:16.000000000 +0300
+++ /tmp/wklog.55.new.22774 2009-10-15 23:40:16.000000000 +0300
@@ -15,13 +15,9 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
+ * [perhaps] sql_mode setting.
-- Copies files to destination directory
+- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
@@ -29,6 +25,8 @@
- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: will this work from any user, and on any OS? if not, this might
+ be omitted)
- to start mysql client
- to edit the my.cnf file
- Registers MariaDB to start as a service with the specified parameters
-=-=(Psergey - Thu, 15 Oct 2009, 23:38)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22618 2009-10-15 23:38:06.000000000 +0300
+++ /tmp/wklog.55.new.22618 2009-10-15 23:38:06.000000000 +0300
@@ -7,8 +7,7 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
+- Shows GPL Licence
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -22,22 +21,23 @@
2. ask for things without answers for which the newbies can get into
trouble.
+- Copies files to destination directory
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Generates appropriate my.cnf file (from a template)
+- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- - to edit the my.cnf file.
-- Registers MariaDB to start as a service with the specified parameters.
-- Registers MariaDB as installed software, sets up uninstaller.
+ - to edit the my.cnf file
+- Registers MariaDB to start as a service with the specified parameters
+- Registers MariaDB as installed software, sets up uninstaller
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the final installation procedure will be).
+ on how complex and error-prone the installation procedure will end up being).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 16:34)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.3919 2009-10-15 16:34:22.000000000 +0300
+++ /tmp/wklog.55.new.3919 2009-10-15 16:34:22.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal. It can be found here:
+* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Bothorsen - Thu, 15 Oct 2009, 15:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.1396 2009-10-15 15:40:03.000000000 +0300
+++ /tmp/wklog.55.new.1396 2009-10-15 15:40:03.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal (see e.g. here: https://code.launchpad.net/maria)
- Bo Thorsen has the latest revision of the picture in various formats.
+* MySQL's logo is the seal. It can be found here:
+http://askmonty.org/wiki/index.php/MariaDB_Logo
------------------------------------------------------------
-=-=(View All Progress Notes, 18 total)=-=-
http://askmonty.org/worklog/index.pl?tid=55&nolimit=1
DESCRIPTION:
We need Windows Installer package for MariaDB.
HIGH-LEVEL SPECIFICATION:
Not a spec so far but a list of points to consider:
1. Installer wishlist (user POV)
--------------------------------
>From the user point of view:
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
- Presents the user with GPL licence
- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
* [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, offers to either change these parameters or abort the
installation (that is: no support for any kind of upgrades at this point)
- Copies installation files to appropriate destination
- Registers mysqld a service
- Generates appropriate my.cnf file
- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
(note: starting server manually requires write access to datadir, which
not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Same as above but with handling of the case where MySQL has been already
installed:
- offer to replace MySQL.
- upgrade the data directory (todo we should sort out if anything/what is
needed for this).
- Uninstall MySQL
- Install MariaDB.
1.3 Step 3: Configuration wizard
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a list of things that one might want an installer to do but that are
out of scope of this WL entry:
* Calibration of my.cnf parameters based on available memory, number of CPUs,
etc.
2. Installer wishlist (developer POV)
-------------------------------------
* Some "installshield-like" tool that's easy to use (suggestion by Webyog:
NSIS)
* Installation procedure source should reside in MariaDB source repository
* Installation procedure source file is better to be in human-readable text
format.
* It should be possible to automate creation of the installer package, in a way
that can be run from buildbot (e.g. the installer package build process
should print messages to its stdout)
* Any suggestions on how can one automatically test the installation package?
(for example, we'll want to start the installer, install, check that
installation succeeded, then start the server, run some commands, then
uninstall. Any ways to achieve that?)
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
* We should make both 32-bit installer and 64-bit installer (the
latter will be possible when we have 64-bit windows binaries)
* At this point we don't see a need to force a reboot after the installation.
* The installer should be Vista and Windows7-proof.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Windows installer for MariaDB (55)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Windows installer for MariaDB
CREATION DATE..: Wed, 14 Oct 2009, 00:07
SUPERVISOR.....: Monty
IMPLEMENTOR....: Bothorsen
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 55 (http://askmonty.org/worklog/?tid=55)
VERSION........: Server-5.1
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Version updated.
--- /tmp/wklog.55.old.18466 2010-05-14 06:45:38.000000000 +0000
+++ /tmp/wklog.55.new.18466 2010-05-14 06:45:38.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.1
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Category updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Status updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Psergey - Sat, 17 Oct 2009, 00:03)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.19999 2009-10-17 00:03:11.000000000 +0300
+++ /tmp/wklog.55.new.19999 2009-10-17 00:03:11.000000000 +0300
@@ -79,5 +79,9 @@
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
-http://askmonty.org/wiki/index.php/MariaDB_Logo
+ http://askmonty.org/wiki/index.php/MariaDB_Logo
+* We should make both 32-bit installer and 64-bit installer (the
+ latter will be possible when we have 64-bit windows binaries)
+* At this point we don't see a need to force a reboot after the installation.
+* The installer should be Vista and Windows7-proof.
-=-=(Psergey - Thu, 15 Oct 2009, 23:46)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.23061 2009-10-15 23:46:18.000000000 +0300
+++ /tmp/wklog.55.new.23061 2009-10-15 23:46:18.000000000 +0300
@@ -7,28 +7,25 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
-- Prompts the user for "essential" configuration options. Preliminary list
- of "essential" options:
+- Presents the user with GPL licence
+- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
-
+ * [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
- busy. If they are, it asks to remove the previous installation first
- and aborts. (that is: upgrades are not supported in step#1)
+ busy. If they are, offers to either change these parameters or abort the
+ installation (that is: no support for any kind of upgrades at this point)
+- Copies installation files to appropriate destination
+- Registers mysqld a service
- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: starting server manually requires write access to datadir, which
+ not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
@@ -53,7 +50,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
- TODO come up with options
+ TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22795 2009-10-15 23:40:36.000000000 +0300
+++ /tmp/wklog.55.new.22795 2009-10-15 23:40:36.000000000 +0300
@@ -7,7 +7,8 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Shows GPL Licence
+- Copies files on installation
+- Registers mysqld a service
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -15,27 +16,28 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * [perhaps] sql_mode setting.
+ * TODO come up with the final list. The criteria for inclusion are:
+ 1. ask for things that are essential to have a working setup as soon as
+ the installation is complete
+ 2. ask for things without answers for which the newbies can get into
+ trouble.
-- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file (from a template)
-- Sets up SQL root user with specified password
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- (note: will this work from any user, and on any OS? if not, this might
- be omitted)
- to start mysql client
- - to edit the my.cnf file
-- Registers MariaDB to start as a service with the specified parameters
-- Registers MariaDB as installed software, sets up uninstaller
+ - to edit the my.cnf file.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the installation procedure will end up being).
+ on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22774 2009-10-15 23:40:16.000000000 +0300
+++ /tmp/wklog.55.new.22774 2009-10-15 23:40:16.000000000 +0300
@@ -15,13 +15,9 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
+ * [perhaps] sql_mode setting.
-- Copies files to destination directory
+- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
@@ -29,6 +25,8 @@
- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: will this work from any user, and on any OS? if not, this might
+ be omitted)
- to start mysql client
- to edit the my.cnf file
- Registers MariaDB to start as a service with the specified parameters
-=-=(Psergey - Thu, 15 Oct 2009, 23:38)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22618 2009-10-15 23:38:06.000000000 +0300
+++ /tmp/wklog.55.new.22618 2009-10-15 23:38:06.000000000 +0300
@@ -7,8 +7,7 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
+- Shows GPL Licence
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -22,22 +21,23 @@
2. ask for things without answers for which the newbies can get into
trouble.
+- Copies files to destination directory
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Generates appropriate my.cnf file (from a template)
+- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- - to edit the my.cnf file.
-- Registers MariaDB to start as a service with the specified parameters.
-- Registers MariaDB as installed software, sets up uninstaller.
+ - to edit the my.cnf file
+- Registers MariaDB to start as a service with the specified parameters
+- Registers MariaDB as installed software, sets up uninstaller
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the final installation procedure will be).
+ on how complex and error-prone the installation procedure will end up being).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 16:34)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.3919 2009-10-15 16:34:22.000000000 +0300
+++ /tmp/wklog.55.new.3919 2009-10-15 16:34:22.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal. It can be found here:
+* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Bothorsen - Thu, 15 Oct 2009, 15:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.1396 2009-10-15 15:40:03.000000000 +0300
+++ /tmp/wklog.55.new.1396 2009-10-15 15:40:03.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal (see e.g. here: https://code.launchpad.net/maria)
- Bo Thorsen has the latest revision of the picture in various formats.
+* MySQL's logo is the seal. It can be found here:
+http://askmonty.org/wiki/index.php/MariaDB_Logo
------------------------------------------------------------
-=-=(View All Progress Notes, 18 total)=-=-
http://askmonty.org/worklog/index.pl?tid=55&nolimit=1
DESCRIPTION:
We need Windows Installer package for MariaDB.
HIGH-LEVEL SPECIFICATION:
Not a spec so far but a list of points to consider:
1. Installer wishlist (user POV)
--------------------------------
>From the user point of view:
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
- Presents the user with GPL licence
- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
* [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, offers to either change these parameters or abort the
installation (that is: no support for any kind of upgrades at this point)
- Copies installation files to appropriate destination
- Registers mysqld a service
- Generates appropriate my.cnf file
- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
(note: starting server manually requires write access to datadir, which
not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Same as above but with handling of the case where MySQL has been already
installed:
- offer to replace MySQL.
- upgrade the data directory (todo we should sort out if anything/what is
needed for this).
- Uninstall MySQL
- Install MariaDB.
1.3 Step 3: Configuration wizard
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a list of things that one might want an installer to do but that are
out of scope of this WL entry:
* Calibration of my.cnf parameters based on available memory, number of CPUs,
etc.
2. Installer wishlist (developer POV)
-------------------------------------
* Some "installshield-like" tool that's easy to use (suggestion by Webyog:
NSIS)
* Installation procedure source should reside in MariaDB source repository
* Installation procedure source file is better to be in human-readable text
format.
* It should be possible to automate creation of the installer package, in a way
that can be run from buildbot (e.g. the installer package build process
should print messages to its stdout)
* Any suggestions on how can one automatically test the installation package?
(for example, we'll want to start the installer, install, check that
installation succeeded, then start the server, run some commands, then
uninstall. Any ways to achieve that?)
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
* We should make both 32-bit installer and 64-bit installer (the
latter will be possible when we have 64-bit windows binaries)
* At this point we don't see a need to force a reboot after the installation.
* The installer should be Vista and Windows7-proof.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Windows installer for MariaDB (55)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Windows installer for MariaDB
CREATION DATE..: Wed, 14 Oct 2009, 00:07
SUPERVISOR.....: Monty
IMPLEMENTOR....: Bothorsen
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 55 (http://askmonty.org/worklog/?tid=55)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Category updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Status updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Psergey - Sat, 17 Oct 2009, 00:03)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.19999 2009-10-17 00:03:11.000000000 +0300
+++ /tmp/wklog.55.new.19999 2009-10-17 00:03:11.000000000 +0300
@@ -79,5 +79,9 @@
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
-http://askmonty.org/wiki/index.php/MariaDB_Logo
+ http://askmonty.org/wiki/index.php/MariaDB_Logo
+* We should make both 32-bit installer and 64-bit installer (the
+ latter will be possible when we have 64-bit windows binaries)
+* At this point we don't see a need to force a reboot after the installation.
+* The installer should be Vista and Windows7-proof.
-=-=(Psergey - Thu, 15 Oct 2009, 23:46)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.23061 2009-10-15 23:46:18.000000000 +0300
+++ /tmp/wklog.55.new.23061 2009-10-15 23:46:18.000000000 +0300
@@ -7,28 +7,25 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
-- Prompts the user for "essential" configuration options. Preliminary list
- of "essential" options:
+- Presents the user with GPL licence
+- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
-
+ * [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
- busy. If they are, it asks to remove the previous installation first
- and aborts. (that is: upgrades are not supported in step#1)
+ busy. If they are, offers to either change these parameters or abort the
+ installation (that is: no support for any kind of upgrades at this point)
+- Copies installation files to appropriate destination
+- Registers mysqld a service
- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: starting server manually requires write access to datadir, which
+ not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
@@ -53,7 +50,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
- TODO come up with options
+ TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22795 2009-10-15 23:40:36.000000000 +0300
+++ /tmp/wklog.55.new.22795 2009-10-15 23:40:36.000000000 +0300
@@ -7,7 +7,8 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Shows GPL Licence
+- Copies files on installation
+- Registers mysqld a service
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -15,27 +16,28 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * [perhaps] sql_mode setting.
+ * TODO come up with the final list. The criteria for inclusion are:
+ 1. ask for things that are essential to have a working setup as soon as
+ the installation is complete
+ 2. ask for things without answers for which the newbies can get into
+ trouble.
-- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file (from a template)
-- Sets up SQL root user with specified password
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- (note: will this work from any user, and on any OS? if not, this might
- be omitted)
- to start mysql client
- - to edit the my.cnf file
-- Registers MariaDB to start as a service with the specified parameters
-- Registers MariaDB as installed software, sets up uninstaller
+ - to edit the my.cnf file.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the installation procedure will end up being).
+ on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22774 2009-10-15 23:40:16.000000000 +0300
+++ /tmp/wklog.55.new.22774 2009-10-15 23:40:16.000000000 +0300
@@ -15,13 +15,9 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
+ * [perhaps] sql_mode setting.
-- Copies files to destination directory
+- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
@@ -29,6 +25,8 @@
- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: will this work from any user, and on any OS? if not, this might
+ be omitted)
- to start mysql client
- to edit the my.cnf file
- Registers MariaDB to start as a service with the specified parameters
-=-=(Psergey - Thu, 15 Oct 2009, 23:38)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22618 2009-10-15 23:38:06.000000000 +0300
+++ /tmp/wklog.55.new.22618 2009-10-15 23:38:06.000000000 +0300
@@ -7,8 +7,7 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
+- Shows GPL Licence
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -22,22 +21,23 @@
2. ask for things without answers for which the newbies can get into
trouble.
+- Copies files to destination directory
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Generates appropriate my.cnf file (from a template)
+- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- - to edit the my.cnf file.
-- Registers MariaDB to start as a service with the specified parameters.
-- Registers MariaDB as installed software, sets up uninstaller.
+ - to edit the my.cnf file
+- Registers MariaDB to start as a service with the specified parameters
+- Registers MariaDB as installed software, sets up uninstaller
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the final installation procedure will be).
+ on how complex and error-prone the installation procedure will end up being).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 16:34)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.3919 2009-10-15 16:34:22.000000000 +0300
+++ /tmp/wklog.55.new.3919 2009-10-15 16:34:22.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal. It can be found here:
+* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Bothorsen - Thu, 15 Oct 2009, 15:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.1396 2009-10-15 15:40:03.000000000 +0300
+++ /tmp/wklog.55.new.1396 2009-10-15 15:40:03.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal (see e.g. here: https://code.launchpad.net/maria)
- Bo Thorsen has the latest revision of the picture in various formats.
+* MySQL's logo is the seal. It can be found here:
+http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Psergey - Thu, 15 Oct 2009, 15:22)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.466 2009-10-15 15:22:05.000000000 +0300
+++ /tmp/wklog.55.new.466 2009-10-15 15:22:05.000000000 +0300
@@ -16,7 +16,7 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * <TODO come up with the final list. The criteria for inclusion are:
+ * TODO come up with the final list. The criteria for inclusion are:
1. ask for things that are essential to have a working setup as soon as
the installation is complete
2. ask for things without answers for which the newbies can get into
@@ -25,11 +25,14 @@
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- to edit the my.cnf file.
-- Registers MariaDB as installed, sets up uninstaller.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
@@ -54,9 +57,10 @@
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This is a list of things that one might want an installer to do but that are out
-of scope of this WL entry.
-* Calibration of my.cnf parameters based on available memory, number of CPUs, etc.
+This is a list of things that one might want an installer to do but that are
+out of scope of this WL entry:
+* Calibration of my.cnf parameters based on available memory, number of CPUs,
+ etc.
2. Installer wishlist (developer POV)
------------------------------------------------------------
-=-=(View All Progress Notes, 17 total)=-=-
http://askmonty.org/worklog/index.pl?tid=55&nolimit=1
DESCRIPTION:
We need Windows Installer package for MariaDB.
HIGH-LEVEL SPECIFICATION:
Not a spec so far but a list of points to consider:
1. Installer wishlist (user POV)
--------------------------------
>From the user point of view:
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
- Presents the user with GPL licence
- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
* [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, offers to either change these parameters or abort the
installation (that is: no support for any kind of upgrades at this point)
- Copies installation files to appropriate destination
- Registers mysqld a service
- Generates appropriate my.cnf file
- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
(note: starting server manually requires write access to datadir, which
not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Same as above but with handling of the case where MySQL has been already
installed:
- offer to replace MySQL.
- upgrade the data directory (todo we should sort out if anything/what is
needed for this).
- Uninstall MySQL
- Install MariaDB.
1.3 Step 3: Configuration wizard
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a list of things that one might want an installer to do but that are
out of scope of this WL entry:
* Calibration of my.cnf parameters based on available memory, number of CPUs,
etc.
2. Installer wishlist (developer POV)
-------------------------------------
* Some "installshield-like" tool that's easy to use (suggestion by Webyog:
NSIS)
* Installation procedure source should reside in MariaDB source repository
* Installation procedure source file is better to be in human-readable text
format.
* It should be possible to automate creation of the installer package, in a way
that can be run from buildbot (e.g. the installer package build process
should print messages to its stdout)
* Any suggestions on how can one automatically test the installation package?
(for example, we'll want to start the installer, install, check that
installation succeeded, then start the server, run some commands, then
uninstall. Any ways to achieve that?)
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
* We should make both 32-bit installer and 64-bit installer (the
latter will be possible when we have 64-bit windows binaries)
* At this point we don't see a need to force a reboot after the installation.
* The installer should be Vista and Windows7-proof.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Windows installer for MariaDB (55)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Windows installer for MariaDB
CREATION DATE..: Wed, 14 Oct 2009, 00:07
SUPERVISOR.....: Monty
IMPLEMENTOR....: Bothorsen
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 55 (http://askmonty.org/worklog/?tid=55)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Category updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Status updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Psergey - Sat, 17 Oct 2009, 00:03)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.19999 2009-10-17 00:03:11.000000000 +0300
+++ /tmp/wklog.55.new.19999 2009-10-17 00:03:11.000000000 +0300
@@ -79,5 +79,9 @@
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
-http://askmonty.org/wiki/index.php/MariaDB_Logo
+ http://askmonty.org/wiki/index.php/MariaDB_Logo
+* We should make both 32-bit installer and 64-bit installer (the
+ latter will be possible when we have 64-bit windows binaries)
+* At this point we don't see a need to force a reboot after the installation.
+* The installer should be Vista and Windows7-proof.
-=-=(Psergey - Thu, 15 Oct 2009, 23:46)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.23061 2009-10-15 23:46:18.000000000 +0300
+++ /tmp/wklog.55.new.23061 2009-10-15 23:46:18.000000000 +0300
@@ -7,28 +7,25 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
-- Prompts the user for "essential" configuration options. Preliminary list
- of "essential" options:
+- Presents the user with GPL licence
+- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
-
+ * [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
- busy. If they are, it asks to remove the previous installation first
- and aborts. (that is: upgrades are not supported in step#1)
+ busy. If they are, offers to either change these parameters or abort the
+ installation (that is: no support for any kind of upgrades at this point)
+- Copies installation files to appropriate destination
+- Registers mysqld a service
- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: starting server manually requires write access to datadir, which
+ not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
@@ -53,7 +50,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
- TODO come up with options
+ TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22795 2009-10-15 23:40:36.000000000 +0300
+++ /tmp/wklog.55.new.22795 2009-10-15 23:40:36.000000000 +0300
@@ -7,7 +7,8 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Shows GPL Licence
+- Copies files on installation
+- Registers mysqld a service
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -15,27 +16,28 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * [perhaps] sql_mode setting.
+ * TODO come up with the final list. The criteria for inclusion are:
+ 1. ask for things that are essential to have a working setup as soon as
+ the installation is complete
+ 2. ask for things without answers for which the newbies can get into
+ trouble.
-- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file (from a template)
-- Sets up SQL root user with specified password
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- (note: will this work from any user, and on any OS? if not, this might
- be omitted)
- to start mysql client
- - to edit the my.cnf file
-- Registers MariaDB to start as a service with the specified parameters
-- Registers MariaDB as installed software, sets up uninstaller
+ - to edit the my.cnf file.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the installation procedure will end up being).
+ on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22774 2009-10-15 23:40:16.000000000 +0300
+++ /tmp/wklog.55.new.22774 2009-10-15 23:40:16.000000000 +0300
@@ -15,13 +15,9 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
+ * [perhaps] sql_mode setting.
-- Copies files to destination directory
+- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
@@ -29,6 +25,8 @@
- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: will this work from any user, and on any OS? if not, this might
+ be omitted)
- to start mysql client
- to edit the my.cnf file
- Registers MariaDB to start as a service with the specified parameters
-=-=(Psergey - Thu, 15 Oct 2009, 23:38)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22618 2009-10-15 23:38:06.000000000 +0300
+++ /tmp/wklog.55.new.22618 2009-10-15 23:38:06.000000000 +0300
@@ -7,8 +7,7 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
+- Shows GPL Licence
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -22,22 +21,23 @@
2. ask for things without answers for which the newbies can get into
trouble.
+- Copies files to destination directory
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Generates appropriate my.cnf file (from a template)
+- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- - to edit the my.cnf file.
-- Registers MariaDB to start as a service with the specified parameters.
-- Registers MariaDB as installed software, sets up uninstaller.
+ - to edit the my.cnf file
+- Registers MariaDB to start as a service with the specified parameters
+- Registers MariaDB as installed software, sets up uninstaller
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the final installation procedure will be).
+ on how complex and error-prone the installation procedure will end up being).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 16:34)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.3919 2009-10-15 16:34:22.000000000 +0300
+++ /tmp/wklog.55.new.3919 2009-10-15 16:34:22.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal. It can be found here:
+* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Bothorsen - Thu, 15 Oct 2009, 15:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.1396 2009-10-15 15:40:03.000000000 +0300
+++ /tmp/wklog.55.new.1396 2009-10-15 15:40:03.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal (see e.g. here: https://code.launchpad.net/maria)
- Bo Thorsen has the latest revision of the picture in various formats.
+* MySQL's logo is the seal. It can be found here:
+http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Psergey - Thu, 15 Oct 2009, 15:22)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.466 2009-10-15 15:22:05.000000000 +0300
+++ /tmp/wklog.55.new.466 2009-10-15 15:22:05.000000000 +0300
@@ -16,7 +16,7 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * <TODO come up with the final list. The criteria for inclusion are:
+ * TODO come up with the final list. The criteria for inclusion are:
1. ask for things that are essential to have a working setup as soon as
the installation is complete
2. ask for things without answers for which the newbies can get into
@@ -25,11 +25,14 @@
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- to edit the my.cnf file.
-- Registers MariaDB as installed, sets up uninstaller.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
@@ -54,9 +57,10 @@
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This is a list of things that one might want an installer to do but that are out
-of scope of this WL entry.
-* Calibration of my.cnf parameters based on available memory, number of CPUs, etc.
+This is a list of things that one might want an installer to do but that are
+out of scope of this WL entry:
+* Calibration of my.cnf parameters based on available memory, number of CPUs,
+ etc.
2. Installer wishlist (developer POV)
------------------------------------------------------------
-=-=(View All Progress Notes, 17 total)=-=-
http://askmonty.org/worklog/index.pl?tid=55&nolimit=1
DESCRIPTION:
We need Windows Installer package for MariaDB.
HIGH-LEVEL SPECIFICATION:
Not a spec so far but a list of points to consider:
1. Installer wishlist (user POV)
--------------------------------
>From the user point of view:
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
- Presents the user with GPL licence
- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
* [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, offers to either change these parameters or abort the
installation (that is: no support for any kind of upgrades at this point)
- Copies installation files to appropriate destination
- Registers mysqld a service
- Generates appropriate my.cnf file
- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
(note: starting server manually requires write access to datadir, which
not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Same as above but with handling of the case where MySQL has been already
installed:
- offer to replace MySQL.
- upgrade the data directory (todo we should sort out if anything/what is
needed for this).
- Uninstall MySQL
- Install MariaDB.
1.3 Step 3: Configuration wizard
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a list of things that one might want an installer to do but that are
out of scope of this WL entry:
* Calibration of my.cnf parameters based on available memory, number of CPUs,
etc.
2. Installer wishlist (developer POV)
-------------------------------------
* Some "installshield-like" tool that's easy to use (suggestion by Webyog:
NSIS)
* Installation procedure source should reside in MariaDB source repository
* Installation procedure source file is better to be in human-readable text
format.
* It should be possible to automate creation of the installer package, in a way
that can be run from buildbot (e.g. the installer package build process
should print messages to its stdout)
* Any suggestions on how can one automatically test the installation package?
(for example, we'll want to start the installer, install, check that
installation succeeded, then start the server, run some commands, then
uninstall. Any ways to achieve that?)
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
* We should make both 32-bit installer and 64-bit installer (the
latter will be possible when we have 64-bit windows binaries)
* At this point we don't see a need to force a reboot after the installation.
* The installer should be Vista and Windows7-proof.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Windows installer for MariaDB (55)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Windows installer for MariaDB
CREATION DATE..: Wed, 14 Oct 2009, 00:07
SUPERVISOR.....: Monty
IMPLEMENTOR....: Bothorsen
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 55 (http://askmonty.org/worklog/?tid=55)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Category updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Knielsen - Fri, 14 May 2010, 06:45)=-=-
Status updated.
--- /tmp/wklog.55.old.18457 2010-05-14 06:45:28.000000000 +0000
+++ /tmp/wklog.55.new.18457 2010-05-14 06:45:28.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Psergey - Sat, 17 Oct 2009, 00:03)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.19999 2009-10-17 00:03:11.000000000 +0300
+++ /tmp/wklog.55.new.19999 2009-10-17 00:03:11.000000000 +0300
@@ -79,5 +79,9 @@
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
-http://askmonty.org/wiki/index.php/MariaDB_Logo
+ http://askmonty.org/wiki/index.php/MariaDB_Logo
+* We should make both 32-bit installer and 64-bit installer (the
+ latter will be possible when we have 64-bit windows binaries)
+* At this point we don't see a need to force a reboot after the installation.
+* The installer should be Vista and Windows7-proof.
-=-=(Psergey - Thu, 15 Oct 2009, 23:46)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.23061 2009-10-15 23:46:18.000000000 +0300
+++ /tmp/wklog.55.new.23061 2009-10-15 23:46:18.000000000 +0300
@@ -7,28 +7,25 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
-- Prompts the user for "essential" configuration options. Preliminary list
- of "essential" options:
+- Presents the user with GPL licence
+- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
-
+ * [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
- busy. If they are, it asks to remove the previous installation first
- and aborts. (that is: upgrades are not supported in step#1)
+ busy. If they are, offers to either change these parameters or abort the
+ installation (that is: no support for any kind of upgrades at this point)
+- Copies installation files to appropriate destination
+- Registers mysqld a service
- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: starting server manually requires write access to datadir, which
+ not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
@@ -53,7 +50,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
- TODO come up with options
+ TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22795 2009-10-15 23:40:36.000000000 +0300
+++ /tmp/wklog.55.new.22795 2009-10-15 23:40:36.000000000 +0300
@@ -7,7 +7,8 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Shows GPL Licence
+- Copies files on installation
+- Registers mysqld a service
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -15,27 +16,28 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * [perhaps] sql_mode setting.
+ * TODO come up with the final list. The criteria for inclusion are:
+ 1. ask for things that are essential to have a working setup as soon as
+ the installation is complete
+ 2. ask for things without answers for which the newbies can get into
+ trouble.
-- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file (from a template)
-- Sets up SQL root user with specified password
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- (note: will this work from any user, and on any OS? if not, this might
- be omitted)
- to start mysql client
- - to edit the my.cnf file
-- Registers MariaDB to start as a service with the specified parameters
-- Registers MariaDB as installed software, sets up uninstaller
+ - to edit the my.cnf file.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the installation procedure will end up being).
+ on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 23:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22774 2009-10-15 23:40:16.000000000 +0300
+++ /tmp/wklog.55.new.22774 2009-10-15 23:40:16.000000000 +0300
@@ -15,13 +15,9 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * TODO come up with the final list. The criteria for inclusion are:
- 1. ask for things that are essential to have a working setup as soon as
- the installation is complete
- 2. ask for things without answers for which the newbies can get into
- trouble.
+ * [perhaps] sql_mode setting.
-- Copies files to destination directory
+- Copies files to destination directories
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
@@ -29,6 +25,8 @@
- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
+ (note: will this work from any user, and on any OS? if not, this might
+ be omitted)
- to start mysql client
- to edit the my.cnf file
- Registers MariaDB to start as a service with the specified parameters
-=-=(Psergey - Thu, 15 Oct 2009, 23:38)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.22618 2009-10-15 23:38:06.000000000 +0300
+++ /tmp/wklog.55.new.22618 2009-10-15 23:38:06.000000000 +0300
@@ -7,8 +7,7 @@
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
-- Copies files on installation
-- Registers mysqld a service
+- Shows GPL Licence
- Prompts the user for "essential" configuration options. Preliminary list
of "essential" options:
* Install directory
@@ -22,22 +21,23 @@
2. ask for things without answers for which the newbies can get into
trouble.
+- Copies files to destination directory
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
-- Generates appropriate my.cnf file
-- Sets up SQL user with specified password
+- Generates appropriate my.cnf file (from a template)
+- Sets up SQL root user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- - to edit the my.cnf file.
-- Registers MariaDB to start as a service with the specified parameters.
-- Registers MariaDB as installed software, sets up uninstaller.
+ - to edit the my.cnf file
+- Registers MariaDB to start as a service with the specified parameters
+- Registers MariaDB as installed software, sets up uninstaller
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
- on how complex and error-prone the final installation procedure will be).
+ on how complex and error-prone the installation procedure will end up being).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-=-=(Psergey - Thu, 15 Oct 2009, 16:34)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.3919 2009-10-15 16:34:22.000000000 +0300
+++ /tmp/wklog.55.new.3919 2009-10-15 16:34:22.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal. It can be found here:
+* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Bothorsen - Thu, 15 Oct 2009, 15:40)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.1396 2009-10-15 15:40:03.000000000 +0300
+++ /tmp/wklog.55.new.1396 2009-10-15 15:40:03.000000000 +0300
@@ -81,6 +81,6 @@
3. Other notes
--------------
-* MySQL's logo is the seal (see e.g. here: https://code.launchpad.net/maria)
- Bo Thorsen has the latest revision of the picture in various formats.
+* MySQL's logo is the seal. It can be found here:
+http://askmonty.org/wiki/index.php/MariaDB_Logo
-=-=(Psergey - Thu, 15 Oct 2009, 15:22)=-=-
High-Level Specification modified.
--- /tmp/wklog.55.old.466 2009-10-15 15:22:05.000000000 +0300
+++ /tmp/wklog.55.new.466 2009-10-15 15:22:05.000000000 +0300
@@ -16,7 +16,7 @@
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
- * <TODO come up with the final list. The criteria for inclusion are:
+ * TODO come up with the final list. The criteria for inclusion are:
1. ask for things that are essential to have a working setup as soon as
the installation is complete
2. ask for things without answers for which the newbies can get into
@@ -25,11 +25,14 @@
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, it asks to remove the previous installation first
and aborts. (that is: upgrades are not supported in step#1)
+- Generates appropriate my.cnf file
+- Sets up SQL user with specified password
- Adds start menu items
- to start the server manually (with --console)
- to start mysql client
- to edit the my.cnf file.
-- Registers MariaDB as installed, sets up uninstaller.
+- Registers MariaDB to start as a service with the specified parameters.
+- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
@@ -54,9 +57,10 @@
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This is a list of things that one might want an installer to do but that are out
-of scope of this WL entry.
-* Calibration of my.cnf parameters based on available memory, number of CPUs, etc.
+This is a list of things that one might want an installer to do but that are
+out of scope of this WL entry:
+* Calibration of my.cnf parameters based on available memory, number of CPUs,
+ etc.
2. Installer wishlist (developer POV)
------------------------------------------------------------
-=-=(View All Progress Notes, 17 total)=-=-
http://askmonty.org/worklog/index.pl?tid=55&nolimit=1
DESCRIPTION:
We need Windows Installer package for MariaDB.
HIGH-LEVEL SPECIFICATION:
Not a spec so far but a list of points to consider:
1. Installer wishlist (user POV)
--------------------------------
>From the user point of view:
1.1 Step 1: simple installer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An installer package that
- Presents the user with GPL licence
- Prompts the user for "essential" configuration options:
* Install directory
- Data directory (see email from Peter Laursen on maria-developers@ dated
14-10-2009 about data directory, Vista and UAC)
* root password
* default character set
* [possibly] sql_mode setting.
- Checks if the target installation directory, TCP port, or named pipe are
busy. If they are, offers to either change these parameters or abort the
installation (that is: no support for any kind of upgrades at this point)
- Copies installation files to appropriate destination
- Registers mysqld a service
- Generates appropriate my.cnf file
- Sets up SQL root user with the specified password
- Adds start menu items
- to start the server manually (with --console)
(note: starting server manually requires write access to datadir, which
not all users will have. what to do?)
- to start mysql client
- to edit the my.cnf file.
- Registers MariaDB to start as a service with the specified parameters.
- Registers MariaDB as installed software, sets up uninstaller.
(TODO: should the uninstaller the datadir or leave it? (or ask the user?))
- Creates installation log, and in case of any failures presents the log to
the user and requests to file it as a bug (How far we should go here depends
on how complex and error-prone the final installation procedure will be).
1.2 Step 2: Upgrades from MySQL or MariaDB
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Same as above but with handling of the case where MySQL has been already
installed:
- offer to replace MySQL.
- upgrade the data directory (todo we should sort out if anything/what is
needed for this).
- Uninstall MySQL
- Install MariaDB.
1.3 Step 3: Configuration wizard
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Installer should include configuration wizard that's similar to what MySQL
installer does.
TODO come up with a list of options to set.
1.4 Items not on the wishlist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This is a list of things that one might want an installer to do but that are
out of scope of this WL entry:
* Calibration of my.cnf parameters based on available memory, number of CPUs,
etc.
2. Installer wishlist (developer POV)
-------------------------------------
* Some "installshield-like" tool that's easy to use (suggestion by Webyog:
NSIS)
* Installation procedure source should reside in MariaDB source repository
* Installation procedure source file is better to be in human-readable text
format.
* It should be possible to automate creation of the installer package, in a way
that can be run from buildbot (e.g. the installer package build process
should print messages to its stdout)
* Any suggestions on how can one automatically test the installation package?
(for example, we'll want to start the installer, install, check that
installation succeeded, then start the server, run some commands, then
uninstall. Any ways to achieve that?)
3. Other notes
--------------
* MariaDB's logo is the seal. It can be found here:
http://askmonty.org/wiki/index.php/MariaDB_Logo
* We should make both 32-bit installer and 64-bit installer (the
latter will be possible when we have 64-bit windows binaries)
* At this point we don't see a need to force a reboot after the installation.
* The installer should be Vista and Windows7-proof.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Implement mysql-test output parser for Buildbot (22)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Implement mysql-test output parser for Buildbot
CREATION DATE..: Thu, 21 May 2009, 22:19
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 22 (http://askmonty.org/worklog/?tid=22)
VERSION........:
STATUS.........: Complete
PRIORITY.......: 60
WORKED HOURS...: 30
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 30
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:44)=-=-
Status updated.
--- /tmp/wklog.22.old.18407 2010-05-14 06:44:28.000000000 +0000
+++ /tmp/wklog.22.new.18407 2010-05-14 06:44:28.000000000 +0000
@@ -1 +1 @@
-Assigned
+Complete
-=-=(Knielsen - Fri, 14 May 2010, 06:44)=-=-
This was done long time ago. But I lost records of how much time was spent on it.
Worked 30 hours and estimate 0 hours remain (original estimate unchanged).
-=-=(Guest - Fri, 26 Jun 2009, 11:34)=-=-
Status updated.
--- /tmp/wklog.22.old.29004 2009-06-26 11:34:41.000000000 +0300
+++ /tmp/wklog.22.new.29004 2009-06-26 11:34:41.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Knielsen - Thu, 21 May 2009, 22:33)=-=-
High Level Description modified.
--- /tmp/wklog.22.old.27679 2009-05-21 22:33:35.000000000 +0300
+++ /tmp/wklog.22.new.27679 2009-05-21 22:33:35.000000000 +0300
@@ -9,3 +9,6 @@
http://djmitche.github.com/buildbot/docs/0.7.10/#Writing-New-BuildSteps
+Later, once we get the infrastructure to write Buildbot results into a MySQL
+database, we want to extend this to also insert into the database all test
+failures and the mysqltest failure output (for cross-reference search).
DESCRIPTION:
Like in Pushbuild at MySQL AB, we want to have buildbot parse the output of
mysql-test-run for test errors so we can display on the front page the name
of any tests that failed.
The parser can also count the number of tests completed so far so Buildbot can
provide more accurate completion estimates.
Buildbot already has support for plugging in such modules. See eg.
http://djmitche.github.com/buildbot/docs/0.7.10/#Writing-New-BuildSteps
Later, once we get the infrastructure to write Buildbot results into a MySQL
database, we want to extend this to also insert into the database all test
failures and the mysqltest failure output (for cross-reference search).
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Implement mysql-test output parser for Buildbot (22)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Implement mysql-test output parser for Buildbot
CREATION DATE..: Thu, 21 May 2009, 22:19
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 22 (http://askmonty.org/worklog/?tid=22)
VERSION........:
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 30
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 30
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:44)=-=-
This was done long time ago. But I lost records of how much time was spent on it.
Worked 30 hours and estimate 0 hours remain (original estimate unchanged).
-=-=(Guest - Fri, 26 Jun 2009, 11:34)=-=-
Status updated.
--- /tmp/wklog.22.old.29004 2009-06-26 11:34:41.000000000 +0300
+++ /tmp/wklog.22.new.29004 2009-06-26 11:34:41.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Knielsen - Thu, 21 May 2009, 22:33)=-=-
High Level Description modified.
--- /tmp/wklog.22.old.27679 2009-05-21 22:33:35.000000000 +0300
+++ /tmp/wklog.22.new.27679 2009-05-21 22:33:35.000000000 +0300
@@ -9,3 +9,6 @@
http://djmitche.github.com/buildbot/docs/0.7.10/#Writing-New-BuildSteps
+Later, once we get the infrastructure to write Buildbot results into a MySQL
+database, we want to extend this to also insert into the database all test
+failures and the mysqltest failure output (for cross-reference search).
DESCRIPTION:
Like in Pushbuild at MySQL AB, we want to have buildbot parse the output of
mysql-test-run for test errors so we can display on the front page the name
of any tests that failed.
The parser can also count the number of tests completed so far so Buildbot can
provide more accurate completion estimates.
Buildbot already has support for plugging in such modules. See eg.
http://djmitche.github.com/buildbot/docs/0.7.10/#Writing-New-BuildSteps
Later, once we get the infrastructure to write Buildbot results into a MySQL
database, we want to extend this to also insert into the database all test
failures and the mysqltest failure output (for cross-reference search).
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Buildbot upgrade test from MariaDB 5.1.42->newest (108)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Buildbot upgrade test from MariaDB 5.1.42->newest
CREATION DATE..: Fri, 19 Mar 2010, 07:48
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 108 (http://askmonty.org/worklog/?tid=108)
VERSION........:
STATUS.........: Cancelled
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 25 (hours remain)
ORIG. ESTIMATE.: 25
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:43)=-=-
Version updated.
--- /tmp/wklog.108.old.18388 2010-05-14 06:43:08.000000000 +0000
+++ /tmp/wklog.108.new.18388 2010-05-14 06:43:08.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+
-=-=(Knielsen - Fri, 14 May 2010, 06:43)=-=-
Status updated.
--- /tmp/wklog.108.old.18388 2010-05-14 06:43:08.000000000 +0000
+++ /tmp/wklog.108.new.18388 2010-05-14 06:43:08.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Cancelled
-=-=(Knielsen - Fri, 14 May 2010, 06:42)=-=-
This task was done in MWL#118 (I forgot about this one, so added a new, duplicate task).
Reported zero hours worked. Estimate unchanged.
-=-=(Knielsen - Fri, 19 Mar 2010, 07:52)=-=-
Low Level Design modified.
--- /tmp/wklog.108.old.3290 2010-03-19 07:52:59.000000000 +0000
+++ /tmp/wklog.108.new.3290 2010-03-19 07:52:59.000000000 +0000
@@ -1 +1,11 @@
+Tasks needed to do this:
+
+ - For all the .deb kvm builders, set up a new virtual machine image. Based on
+ -serial or -install, pre-install the appropriate MariaDB 5.1.42 package
+ with some data. Similar to how the -upgrade images are set up with MySQL
+ pre-installed.
+
+ - Add a test step in the Buildbot configuration that runs another upgrade
+ test, similar to the existing upgrade test, but with the images with
+ MariaDB preinstalled rather than MySQL.
DESCRIPTION:
Buildbot currently does automatic upgrade test for .debs from the official MySQL
5.0/5.1 package to the newest MariaDB package.
In addition to this, we need an upgrade test from older MariaDB release.
Probably 5.1.42, the first stable release, is the one to use.
This is particularly important for the 5.2/5.3 trees, to test that upgrade
from 5.1->5.2/5.3 works.
LOW-LEVEL DESIGN:
Tasks needed to do this:
- For all the .deb kvm builders, set up a new virtual machine image. Based on
-serial or -install, pre-install the appropriate MariaDB 5.1.42 package
with some data. Similar to how the -upgrade images are set up with MySQL
pre-installed.
- Add a test step in the Buildbot configuration that runs another upgrade
test, similar to the existing upgrade test, but with the images with
MariaDB preinstalled rather than MySQL.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Buildbot upgrade test from MariaDB 5.1.42->newest (108)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Buildbot upgrade test from MariaDB 5.1.42->newest
CREATION DATE..: Fri, 19 Mar 2010, 07:48
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 108 (http://askmonty.org/worklog/?tid=108)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 25 (hours remain)
ORIG. ESTIMATE.: 25
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:42)=-=-
This task was done in MWL#118 (I forgot about this one, so added a new, duplicate task).
Reported zero hours worked. Estimate unchanged.
-=-=(Knielsen - Fri, 19 Mar 2010, 07:52)=-=-
Low Level Design modified.
--- /tmp/wklog.108.old.3290 2010-03-19 07:52:59.000000000 +0000
+++ /tmp/wklog.108.new.3290 2010-03-19 07:52:59.000000000 +0000
@@ -1 +1,11 @@
+Tasks needed to do this:
+
+ - For all the .deb kvm builders, set up a new virtual machine image. Based on
+ -serial or -install, pre-install the appropriate MariaDB 5.1.42 package
+ with some data. Similar to how the -upgrade images are set up with MySQL
+ pre-installed.
+
+ - Add a test step in the Buildbot configuration that runs another upgrade
+ test, similar to the existing upgrade test, but with the images with
+ MariaDB preinstalled rather than MySQL.
DESCRIPTION:
Buildbot currently does automatic upgrade test for .debs from the official MySQL
5.0/5.1 package to the newest MariaDB package.
In addition to this, we need an upgrade test from older MariaDB release.
Probably 5.1.42, the first stable release, is the one to use.
This is particularly important for the 5.2/5.3 trees, to test that upgrade
from 5.1->5.2/5.3 works.
LOW-LEVEL DESIGN:
Tasks needed to do this:
- For all the .deb kvm builders, set up a new virtual machine image. Based on
-serial or -install, pre-install the appropriate MariaDB 5.1.42 package
with some data. Similar to how the -upgrade images are set up with MySQL
pre-installed.
- Add a test step in the Buildbot configuration that runs another upgrade
test, similar to the existing upgrade test, but with the images with
MariaDB preinstalled rather than MySQL.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Buildbot MariaDB->MariaDB upgrade testing (118)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Buildbot MariaDB->MariaDB upgrade testing
CREATION DATE..: Wed, 12 May 2010, 13:48
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 118 (http://askmonty.org/worklog/?tid=118)
VERSION........:
STATUS.........: Complete
PRIORITY.......: 60
WORKED HOURS...: 5
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 8
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:41)=-=-
Version updated.
--- /tmp/wklog.118.old.18369 2010-05-14 06:41:01.000000000 +0000
+++ /tmp/wklog.118.new.18369 2010-05-14 06:41:01.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+
-=-=(Knielsen - Fri, 14 May 2010, 06:41)=-=-
Status updated.
--- /tmp/wklog.118.old.18369 2010-05-14 06:41:01.000000000 +0000
+++ /tmp/wklog.118.new.18369 2010-05-14 06:41:01.000000000 +0000
@@ -1 +1 @@
-Un-Assigned
+Complete
-=-=(Knielsen - Fri, 14 May 2010, 06:40)=-=-
Installed the new virtual images, using 5.1.42 from OurDelta (for all except
lucid, which is new and so has no 5.1.42 ourdelta package and also needed a
fixed set of packages due to changes in the lucid MySQL packaging).
Added a new upgrade2 step in the Buildbot configuration to test this.
Updated Buildbot wiki documentation.
Worked 5 hours and estimate 0 hours remain (original estimate decreased by 3 hours).
DESCRIPTION:
Create an additional test step for Buildbot to check that upgrading from one
version of MariaDB to another works ok.
We already have testing of upgrade from distro-official MySQL .debs to
our MariaDB .debs.
What we need is for each Debian/Ubuntu, a new KVM virtual image with MariaDB
5.1.42 (First GA release) pre-installed, just like the existing update test
uses images with MySQL pre-installed.
Then the Buildbot configuration must be updated to add another upgrade test
step, just like the existing one but using the images with MariaDB pre-installed.
Also, the setup of the new images must be added to the existing documentation:
http://askmonty.org/wiki/BuildBot::package
http://askmonty.org/wiki/BuildBot:vm-setup
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Buildbot MariaDB->MariaDB upgrade testing (118)
by worklog-noreply@askmonty.org 14 May '10
by worklog-noreply@askmonty.org 14 May '10
14 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Buildbot MariaDB->MariaDB upgrade testing
CREATION DATE..: Wed, 12 May 2010, 13:48
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 118 (http://askmonty.org/worklog/?tid=118)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 5
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 8
PROGRESS NOTES:
-=-=(Knielsen - Fri, 14 May 2010, 06:40)=-=-
Installed the new virtual images, using 5.1.42 from OurDelta (for all except
lucid, which is new and so has no 5.1.42 ourdelta package and also needed a
fixed set of packages due to changes in the lucid MySQL packaging).
Added a new upgrade2 step in the Buildbot configuration to test this.
Updated Buildbot wiki documentation.
Worked 5 hours and estimate 0 hours remain (original estimate decreased by 3 hours).
DESCRIPTION:
Create an additional test step for Buildbot to check that upgrading from one
version of MariaDB to another works ok.
We already have testing of upgrade from distro-official MySQL .debs to
our MariaDB .debs.
What we need is for each Debian/Ubuntu, a new KVM virtual image with MariaDB
5.1.42 (First GA release) pre-installed, just like the existing update test
uses images with MySQL pre-installed.
Then the Buildbot configuration must be updated to add another upgrade test
step, just like the existing one but using the images with MariaDB pre-installed.
Also, the setup of the new images must be added to the existing documentation:
http://askmonty.org/wiki/BuildBot::package
http://askmonty.org/wiki/BuildBot:vm-setup
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Summary of discussion about MariaDB future and release plans
by Kristian Nielsen 13 May '10
by Kristian Nielsen 13 May '10
13 May '10
Hi,
With all the european MariaDB people stuck in California, there is an
unexpected opportunity for extra face-to-face meetups. So today I had the
opportunity to enjoy lunch at a nice sushi place in Santa Clara with Henrik,
Timour, Peter, Vadim, and Sanja. Here are a couple of key points from the
discussion:
1. We need to consider carefully how we handle keeping the MariaDB product
stable for production use. Peter had the point that the critical thing is
avoiding behaviour change. This means that if some version of MariaDB works
sufficiently different that the user need to work on their application to
upgrade, it is a blocker to adoption. Different in this context means
different from any previous version of MariaDB, but it _also_ means any
difference from MySQL!
I think I can see that this is actually more important than mere
bugs/regressions. If a bug slips through, we can fix it, and if we can fix it
quick enough the user may even be happy with us.
But if the behaviour is qualitatively different (and such difference can not
easily be turned off), that is a real problem for the user.
We discussed a lot around this for the subquery optimisations. This is a
"scary feature", as it has the very real potential to change execution plans,
occasionally for the worse.
As Timour pointed out, there is for each new optimiser feature a server
variable to enable/disable it. So we need to very clearly state in the release
notes, under "upgrading to MariaDB x.y", that there are optimiser changes, and
give the exact list of variable settings that will make the optimiser run in
"MySQL 5.1 mode".
For other features this is simpler. For storage engines, just disable the
engine and it will have no impact. For possibly replication plugins, again
don't use the plugin and there should be no difference in behaviour. But
again, we need to very carefully think about this and communicate/document
this.
For other features again we might need to re-think this point, making sure we
do not change behaviour in a non reversible way.
I think this is a good observation from those with a deep knowledge of the
operational side of Databases.
2. On a note related to stability, I think we need to carefully avoid the
mistakes from the MySQL release model. Basically, we need to have regular
releases (6-12 month cycles). This is all-important! Much more important than
any single feature, however big. We must _never_ push a feature into a tree if
it is not ready. Better make an empty release!
So this means it is actually wrong to say that subqueries will be in MariaDB
5.3. That should not be the plan. The subqueries will be in the first release
made after they are ready. Which maybe 5.3, may be something else.
I think we very nearly made the same mistake with 5.2. We had a feature list,
and while some of them were "rolling", we also had features that were decided
in advance that they _must_ be pushed at a certain date. I strongly believe
this is wrong. We should have made _all_ features rolling.
Unless we are extremely careful with this, sooner or later there will be
sufficient pressure that we will push unfinished stuff into a release, causing
_all_ of that release to be a failure. By waiting with the unfinished feature,
only that feature will "fail", and by being careful with releases, the next
release will be close anyway.
(Yes, "finished" cannot mean bug free. But it can mean "as good as we can
reasonably make it".)
3. The final point I took from the discussion was related to version
numbering. We in the MariaDB team of course know that we rock, and that we
will change the world tomorrow, or at least as soon as planes will take us
back to Europe :-). But the reality is that for now, we are an appendix to
MySQL with MariaDB 5.1 and 5.2.
It thus can be argued that this should be reflected in the version numbering,
to avoid confusion. Peter made the point that he would like to see MariaDB 5.1
and 5.2 versioning be made so that it is clear that there is a base MySQL
version plus some well-defined set of changes. (This is how XtraDB release
numbering works).
So we currently have MariaDB 5.1.44, which is equivalent to MySQL 5.1.44 plus
some set of safe patches. But MariaDB 5.2.0 is actually the same, just with
some additional, also safe patches.
So rather than go to MariaDB 5.2, we could imagine something like this:
MariaDB 5.1.44-1 Current MariaDB 5.1.44
MariaDB 5.1.44-2 Current MariaDB 5.2.0
MariaDB 5.1.44-1.1 In case we need to do a bugfix for 5.1.44-1
MariaDB 5.1.46-1 Next time we merge MySQL 5.1.46 into MariaDB -1
MariaDB 5.1.46-2 Next time we merge MySQL 5.1.46 into MariaDB -2
I need to think more before I fully make up my mind about these points one way
or the other, but I think these are in any case interesting points to
consider.
- Kristian.
7
17

[Maria-developers] bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (igor:2790)
by Igor Babaev 13 May '10
by Igor Babaev 13 May '10
13 May '10
#At lp:maria/5.2 based on revid:igor@askmonty.org-20100512040958-yc654eq2zdfo8l8m
2790 Igor Babaev 2010-05-12
Cleanup
modified:
sql/sql_derived.cc
=== modified file 'sql/sql_derived.cc'
--- a/sql/sql_derived.cc 2010-05-12 04:09:58 +0000
+++ b/sql/sql_derived.cc 2010-05-13 06:59:14 +0000
@@ -159,12 +159,7 @@ mysql_handle_single_derived(LEX *lex, TA
uint phase_flag= DT_INIT << phase;
if (phase_flag > phases)
break;
-#if 0
- if (!(phases & phase_flag) ||
- derived->merged_for_insert && phase_flag != DT_REINIT)
-#else
if (!(phases & phase_flag))
-#endif
continue;
/* Skip derived tables to which the phase isn't applicable. */
if (phase_flag != DT_PREPARE &&
@@ -605,19 +600,11 @@ bool mysql_derived_prepare(THD *thd, LEX
bool res= FALSE;
// Skip already prepared views/DT
-#if 0
- if (!unit || unit->prepared || derived->merged_for_insert)
-#else
if (!unit || unit->prepared)
-#endif
DBUG_RETURN(FALSE);
/* It's a target view for an INSERT, create field translation only. */
-#if 0
- if (derived->skip_prepare_derived && !derived->is_multitable())
-#else
if (derived->merged_for_insert)
-#endif
{
res= derived->create_field_translation(thd);
DBUG_RETURN(res);
1
0

[Maria-developers] Updated (by Knielsen): Use Buildbot to populate apt/yum repositories (117)
by worklog-noreply@askmonty.org 13 May '10
by worklog-noreply@askmonty.org 13 May '10
13 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Use Buildbot to populate apt/yum repositories
CREATION DATE..: Wed, 12 May 2010, 07:04
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 117 (http://askmonty.org/worklog/?tid=117)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 4
ESTIMATE.......: 16 (hours remain)
ORIG. ESTIMATE.: 20
PROGRESS NOTES:
-=-=(Knielsen - Wed, 12 May 2010, 21:20)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.21448 2010-05-12 21:20:34.000000000 +0000
+++ /tmp/wklog.117.new.21448 2010-05-12 21:20:34.000000000 +0000
@@ -1,5 +1,5 @@
-As for signing, I think it may be possible/best to do the signing outside of
-buildbot, as a separate process. There are some advantages to this:
+The signing of packages can be done outside of Buildbot, as a separate
+process. There are some advantages to this:
- Security: the private key can be kept less exposed when it is not included
in the buildbot infrastructure.
@@ -9,9 +9,6 @@
- Generally reducing the complexity of the buildbot setup.
-This of course requires that it is possible to sign the packages after the
-actual build.
-
----
Here is how to sign the .rpms.
@@ -42,20 +39,37 @@
----
-For .deb, I *think* we are using secure apt, which does not actually sign the
-packages, rather it signs the "Release" file which is created when the
-repository is set up. So in this case again there is no problem doing the
-signing outside of the build itself (in fact that is the way it must be).
+For .deb, it is not the individual .deb that is signed, it is the
+repository. Here is one way to generate a signed repository, using reprepro.
-Found two tools that can help with building and signing apt repositories:
-reprepro (seems to be the newest, recommended) and apt-ftparchive.
+The ourdelta/bakery signing stuff needs to be copied to ~/.gnupg
-----
+mkdir repo # or whatever
+cd repo
+mkdir conf
+cat >conf/distributions <<END
+Origin: MariaDB
+Label: MariaDB
+Codename: hardy
+Architectures: amd64
+Components: mariadb-ourdelta
+Description: MariaDB test Repository
+SignWith: autosign(a)ourdelta.org
+END
+for i in `find /home/buildbot/debs/ -name '*.deb'` ; do reprepro --basedir=.
+includedeb hardy $i ; done
+
+The corrosponding line for /etc/apt/sources.list:
-ToDO: Figure out how to handle the mysql-client-core issue on lucid. Arjen
-suggested splitting up so we have this package ourselves, or maybe it can be
-handled with replace/provide/conflict dependencies.
+ deb file:///home/buildbot/repo hardy mariadb-ourdelta
+
+This works for multiple distributions, by adding more sections to the
+conf/distributions file.
+
+----
-ToDo: Figure out exactly what files/directory structure needs to be uploaded
-(asked Peter, awaiting reply).
+For the mysql-client-core-5.1 issue, the solution is to split the
+mariadb-client-5.1 (and 5.2) package similarly into
+mariadb-client-core-5.1. The mariadb-client-core-5.1 package then provides:
+mysql-client-core-5.1.
-=-=(Knielsen - Wed, 12 May 2010, 18:25)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.12634 2010-05-12 18:25:58.000000000 +0000
+++ /tmp/wklog.117.new.12634 2010-05-12 18:25:58.000000000 +0000
@@ -12,9 +12,35 @@
This of course requires that it is possible to sign the packages after the
actual build.
-For .rpm this seems to be easy (from reading, didn't try yet):
+----
+
+Here is how to sign the .rpms.
+
+Copy in the ourdelta/bakery signing stuff to ~/.gnupg and ~/.rpmmacros.
+
+Run
+
+ rpm --addsign *.rpm
+
+That's all! This can be tested by creating a local yum repository:
- rpm --addsign <packages>
+ createrepo <dir>
+
+(where <dir> contains the signed .rpms). Then create the file
+/etc/yum.repos.d/localmaria.repo:
+
+[localmaria]
+name=Local MariaDB repo
+baseurl=file:///home/buildbot/rpms
+gpgcheck=1
+enabled=1
+gpgkey=http://master.ourdelta.org/deb/ourdelta.gpg
+
+Now this should work to install MariaDB:
+
+ sudo yum install MariaDB-server
+
+----
For .deb, I *think* we are using secure apt, which does not actually sign the
packages, rather it signs the "Release" file which is created when the
-=-=(Knielsen - Wed, 12 May 2010, 07:14)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.401 2010-05-12 07:14:27.000000000 +0000
+++ /tmp/wklog.117.new.401 2010-05-12 07:14:27.000000000 +0000
@@ -1 +1,35 @@
+As for signing, I think it may be possible/best to do the signing outside of
+buildbot, as a separate process. There are some advantages to this:
+
+ - Security: the private key can be kept less exposed when it is not included
+ in the buildbot infrastructure.
+
+ - It is good to have one step of human intervention before actually signing
+ and releasing packages.
+
+ - Generally reducing the complexity of the buildbot setup.
+
+This of course requires that it is possible to sign the packages after the
+actual build.
+
+For .rpm this seems to be easy (from reading, didn't try yet):
+
+ rpm --addsign <packages>
+
+For .deb, I *think* we are using secure apt, which does not actually sign the
+packages, rather it signs the "Release" file which is created when the
+repository is set up. So in this case again there is no problem doing the
+signing outside of the build itself (in fact that is the way it must be).
+
+Found two tools that can help with building and signing apt repositories:
+reprepro (seems to be the newest, recommended) and apt-ftparchive.
+
+----
+
+ToDO: Figure out how to handle the mysql-client-core issue on lucid. Arjen
+suggested splitting up so we have this package ourselves, or maybe it can be
+handled with replace/provide/conflict dependencies.
+
+ToDo: Figure out exactly what files/directory structure needs to be uploaded
+(asked Peter, awaiting reply).
-=-=(Knielsen - Wed, 12 May 2010, 07:06)=-=-
Upgraded lucid VMs to the official release.
Discussed with Arjen how to handle things.
Did a lot of reading on how apt repositories work.
Worked 4 hours and estimate 16 hours remain (original estimate unchanged).
DESCRIPTION:
Since the package building for MariaDB is now fully automated in Buildbot, it
has been decided to use packages from Buildbot for the OurDelta apt and yum
repositories.
This worklog is about fixing/implementing anything that is missing to achieve
this.
- When doing a real release build, packages/repositories need to be signed,
so that users will not get a warning about unauthenticated packages. This
signing must only be done on official releases, not on daily builds (to
avoid confusing one with the other).
- Packages must be uploaded from the Buildbot host. The OurDelta
infrastructure has a DropBox share that could be used for this, another
option is to simply use rsync.
- Ubuntu 10.04 "lucid" has been released, and we need to support that for
packages, so the Buildbot VM for lucid must be upgraded to have the
official release.
- In Ubuntu 10.04, the official MySQL packages include a new package
mysql-client-core, we currently have a conflict with this on install that
we need to handle somehow.
HIGH-LEVEL SPECIFICATION:
The signing of packages can be done outside of Buildbot, as a separate
process. There are some advantages to this:
- Security: the private key can be kept less exposed when it is not included
in the buildbot infrastructure.
- It is good to have one step of human intervention before actually signing
and releasing packages.
- Generally reducing the complexity of the buildbot setup.
----
Here is how to sign the .rpms.
Copy in the ourdelta/bakery signing stuff to ~/.gnupg and ~/.rpmmacros.
Run
rpm --addsign *.rpm
That's all! This can be tested by creating a local yum repository:
createrepo <dir>
(where <dir> contains the signed .rpms). Then create the file
/etc/yum.repos.d/localmaria.repo:
[localmaria]
name=Local MariaDB repo
baseurl=file:///home/buildbot/rpms
gpgcheck=1
enabled=1
gpgkey=http://master.ourdelta.org/deb/ourdelta.gpg
Now this should work to install MariaDB:
sudo yum install MariaDB-server
----
For .deb, it is not the individual .deb that is signed, it is the
repository. Here is one way to generate a signed repository, using reprepro.
The ourdelta/bakery signing stuff needs to be copied to ~/.gnupg
mkdir repo # or whatever
cd repo
mkdir conf
cat >conf/distributions <<END
Origin: MariaDB
Label: MariaDB
Codename: hardy
Architectures: amd64
Components: mariadb-ourdelta
Description: MariaDB test Repository
SignWith: autosign(a)ourdelta.org
END
for i in `find /home/buildbot/debs/ -name '*.deb'` ; do reprepro --basedir=.
includedeb hardy $i ; done
The corrosponding line for /etc/apt/sources.list:
deb file:///home/buildbot/repo hardy mariadb-ourdelta
This works for multiple distributions, by adding more sections to the
conf/distributions file.
----
For the mysql-client-core-5.1 issue, the solution is to split the
mariadb-client-5.1 (and 5.2) package similarly into
mariadb-client-core-5.1. The mariadb-client-core-5.1 package then provides:
mysql-client-core-5.1.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Knielsen): Use Buildbot to populate apt/yum repositories (117)
by worklog-noreply@askmonty.org 12 May '10
by worklog-noreply@askmonty.org 12 May '10
12 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Use Buildbot to populate apt/yum repositories
CREATION DATE..: Wed, 12 May 2010, 07:04
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 117 (http://askmonty.org/worklog/?tid=117)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 4
ESTIMATE.......: 16 (hours remain)
ORIG. ESTIMATE.: 20
PROGRESS NOTES:
-=-=(Knielsen - Wed, 12 May 2010, 18:25)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.12634 2010-05-12 18:25:58.000000000 +0000
+++ /tmp/wklog.117.new.12634 2010-05-12 18:25:58.000000000 +0000
@@ -12,9 +12,35 @@
This of course requires that it is possible to sign the packages after the
actual build.
-For .rpm this seems to be easy (from reading, didn't try yet):
+----
+
+Here is how to sign the .rpms.
+
+Copy in the ourdelta/bakery signing stuff to ~/.gnupg and ~/.rpmmacros.
+
+Run
+
+ rpm --addsign *.rpm
+
+That's all! This can be tested by creating a local yum repository:
- rpm --addsign <packages>
+ createrepo <dir>
+
+(where <dir> contains the signed .rpms). Then create the file
+/etc/yum.repos.d/localmaria.repo:
+
+[localmaria]
+name=Local MariaDB repo
+baseurl=file:///home/buildbot/rpms
+gpgcheck=1
+enabled=1
+gpgkey=http://master.ourdelta.org/deb/ourdelta.gpg
+
+Now this should work to install MariaDB:
+
+ sudo yum install MariaDB-server
+
+----
For .deb, I *think* we are using secure apt, which does not actually sign the
packages, rather it signs the "Release" file which is created when the
-=-=(Knielsen - Wed, 12 May 2010, 07:14)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.401 2010-05-12 07:14:27.000000000 +0000
+++ /tmp/wklog.117.new.401 2010-05-12 07:14:27.000000000 +0000
@@ -1 +1,35 @@
+As for signing, I think it may be possible/best to do the signing outside of
+buildbot, as a separate process. There are some advantages to this:
+
+ - Security: the private key can be kept less exposed when it is not included
+ in the buildbot infrastructure.
+
+ - It is good to have one step of human intervention before actually signing
+ and releasing packages.
+
+ - Generally reducing the complexity of the buildbot setup.
+
+This of course requires that it is possible to sign the packages after the
+actual build.
+
+For .rpm this seems to be easy (from reading, didn't try yet):
+
+ rpm --addsign <packages>
+
+For .deb, I *think* we are using secure apt, which does not actually sign the
+packages, rather it signs the "Release" file which is created when the
+repository is set up. So in this case again there is no problem doing the
+signing outside of the build itself (in fact that is the way it must be).
+
+Found two tools that can help with building and signing apt repositories:
+reprepro (seems to be the newest, recommended) and apt-ftparchive.
+
+----
+
+ToDO: Figure out how to handle the mysql-client-core issue on lucid. Arjen
+suggested splitting up so we have this package ourselves, or maybe it can be
+handled with replace/provide/conflict dependencies.
+
+ToDo: Figure out exactly what files/directory structure needs to be uploaded
+(asked Peter, awaiting reply).
-=-=(Knielsen - Wed, 12 May 2010, 07:06)=-=-
Upgraded lucid VMs to the official release.
Discussed with Arjen how to handle things.
Did a lot of reading on how apt repositories work.
Worked 4 hours and estimate 16 hours remain (original estimate unchanged).
DESCRIPTION:
Since the package building for MariaDB is now fully automated in Buildbot, it
has been decided to use packages from Buildbot for the OurDelta apt and yum
repositories.
This worklog is about fixing/implementing anything that is missing to achieve
this.
- When doing a real release build, packages/repositories need to be signed,
so that users will not get a warning about unauthenticated packages. This
signing must only be done on official releases, not on daily builds (to
avoid confusing one with the other).
- Packages must be uploaded from the Buildbot host. The OurDelta
infrastructure has a DropBox share that could be used for this, another
option is to simply use rsync.
- Ubuntu 10.04 "lucid" has been released, and we need to support that for
packages, so the Buildbot VM for lucid must be upgraded to have the
official release.
- In Ubuntu 10.04, the official MySQL packages include a new package
mysql-client-core, we currently have a conflict with this on install that
we need to handle somehow.
HIGH-LEVEL SPECIFICATION:
As for signing, I think it may be possible/best to do the signing outside of
buildbot, as a separate process. There are some advantages to this:
- Security: the private key can be kept less exposed when it is not included
in the buildbot infrastructure.
- It is good to have one step of human intervention before actually signing
and releasing packages.
- Generally reducing the complexity of the buildbot setup.
This of course requires that it is possible to sign the packages after the
actual build.
----
Here is how to sign the .rpms.
Copy in the ourdelta/bakery signing stuff to ~/.gnupg and ~/.rpmmacros.
Run
rpm --addsign *.rpm
That's all! This can be tested by creating a local yum repository:
createrepo <dir>
(where <dir> contains the signed .rpms). Then create the file
/etc/yum.repos.d/localmaria.repo:
[localmaria]
name=Local MariaDB repo
baseurl=file:///home/buildbot/rpms
gpgcheck=1
enabled=1
gpgkey=http://master.ourdelta.org/deb/ourdelta.gpg
Now this should work to install MariaDB:
sudo yum install MariaDB-server
----
For .deb, I *think* we are using secure apt, which does not actually sign the
packages, rather it signs the "Release" file which is created when the
repository is set up. So in this case again there is no problem doing the
signing outside of the build itself (in fact that is the way it must be).
Found two tools that can help with building and signing apt repositories:
reprepro (seems to be the newest, recommended) and apt-ftparchive.
----
ToDO: Figure out how to handle the mysql-client-core issue on lucid. Arjen
suggested splitting up so we have this package ourselves, or maybe it can be
handled with replace/provide/conflict dependencies.
ToDo: Figure out exactly what files/directory structure needs to be uploaded
(asked Peter, awaiting reply).
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] New (by Knielsen): Buildbot MariaDB->MariaDB upgrade testing (118)
by worklog-noreply@askmonty.org 12 May '10
by worklog-noreply@askmonty.org 12 May '10
12 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Buildbot MariaDB->MariaDB upgrade testing
CREATION DATE..: Wed, 12 May 2010, 13:48
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 118 (http://askmonty.org/worklog/?tid=118)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 8 (hours remain)
ORIG. ESTIMATE.: 8
PROGRESS NOTES:
DESCRIPTION:
Create an additional test step for Buildbot to check that upgrading from one
version of MariaDB to another works ok.
We already have testing of upgrade from distro-official MySQL .debs to
our MariaDB .debs.
What we need is for each Debian/Ubuntu, a new KVM virtual image with MariaDB
5.1.42 (First GA release) pre-installed, just like the existing update test
uses images with MySQL pre-installed.
Then the Buildbot configuration must be updated to add another upgrade test
step, just like the existing one but using the images with MariaDB pre-installed.
Also, the setup of the new images must be added to the existing documentation:
http://askmonty.org/wiki/BuildBot::package
http://askmonty.org/wiki/BuildBot:vm-setup
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] [Branch ~maria-captains/maria/5.1] Rev 2855: Build perl scripts in the correct directory
by noreply@launchpad.net 12 May '10
by noreply@launchpad.net 12 May '10
12 May '10
------------------------------------------------------------
revno: 2855
committer: Bo Thorsen <bo(a)askmonty.org>
branch nick: trunk-work
timestamp: Wed 2010-05-12 14:33:10 +0200
message:
Build perl scripts in the correct directory
modified:
scripts/CMakeLists.txt
--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0

12 May '10
As discussed with Kristian Nielsen on IRC.
Bo.
1
0

[Maria-developers] Updated (by Knielsen): Use Buildbot to populate apt/yum repositories (117)
by worklog-noreply@askmonty.org 12 May '10
by worklog-noreply@askmonty.org 12 May '10
12 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Use Buildbot to populate apt/yum repositories
CREATION DATE..: Wed, 12 May 2010, 07:04
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 117 (http://askmonty.org/worklog/?tid=117)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 4
ESTIMATE.......: 16 (hours remain)
ORIG. ESTIMATE.: 20
PROGRESS NOTES:
-=-=(Knielsen - Wed, 12 May 2010, 07:14)=-=-
High-Level Specification modified.
--- /tmp/wklog.117.old.401 2010-05-12 07:14:27.000000000 +0000
+++ /tmp/wklog.117.new.401 2010-05-12 07:14:27.000000000 +0000
@@ -1 +1,35 @@
+As for signing, I think it may be possible/best to do the signing outside of
+buildbot, as a separate process. There are some advantages to this:
+
+ - Security: the private key can be kept less exposed when it is not included
+ in the buildbot infrastructure.
+
+ - It is good to have one step of human intervention before actually signing
+ and releasing packages.
+
+ - Generally reducing the complexity of the buildbot setup.
+
+This of course requires that it is possible to sign the packages after the
+actual build.
+
+For .rpm this seems to be easy (from reading, didn't try yet):
+
+ rpm --addsign <packages>
+
+For .deb, I *think* we are using secure apt, which does not actually sign the
+packages, rather it signs the "Release" file which is created when the
+repository is set up. So in this case again there is no problem doing the
+signing outside of the build itself (in fact that is the way it must be).
+
+Found two tools that can help with building and signing apt repositories:
+reprepro (seems to be the newest, recommended) and apt-ftparchive.
+
+----
+
+ToDO: Figure out how to handle the mysql-client-core issue on lucid. Arjen
+suggested splitting up so we have this package ourselves, or maybe it can be
+handled with replace/provide/conflict dependencies.
+
+ToDo: Figure out exactly what files/directory structure needs to be uploaded
+(asked Peter, awaiting reply).
-=-=(Knielsen - Wed, 12 May 2010, 07:06)=-=-
Upgraded lucid VMs to the official release.
Discussed with Arjen how to handle things.
Did a lot of reading on how apt repositories work.
Worked 4 hours and estimate 16 hours remain (original estimate unchanged).
DESCRIPTION:
Since the package building for MariaDB is now fully automated in Buildbot, it
has been decided to use packages from Buildbot for the OurDelta apt and yum
repositories.
This worklog is about fixing/implementing anything that is missing to achieve
this.
- When doing a real release build, packages/repositories need to be signed,
so that users will not get a warning about unauthenticated packages. This
signing must only be done on official releases, not on daily builds (to
avoid confusing one with the other).
- Packages must be uploaded from the Buildbot host. The OurDelta
infrastructure has a DropBox share that could be used for this, another
option is to simply use rsync.
- Ubuntu 10.04 "lucid" has been released, and we need to support that for
packages, so the Buildbot VM for lucid must be upgraded to have the
official release.
- In Ubuntu 10.04, the official MySQL packages include a new package
mysql-client-core, we currently have a conflict with this on install that
we need to handle somehow.
HIGH-LEVEL SPECIFICATION:
As for signing, I think it may be possible/best to do the signing outside of
buildbot, as a separate process. There are some advantages to this:
- Security: the private key can be kept less exposed when it is not included
in the buildbot infrastructure.
- It is good to have one step of human intervention before actually signing
and releasing packages.
- Generally reducing the complexity of the buildbot setup.
This of course requires that it is possible to sign the packages after the
actual build.
For .rpm this seems to be easy (from reading, didn't try yet):
rpm --addsign <packages>
For .deb, I *think* we are using secure apt, which does not actually sign the
packages, rather it signs the "Release" file which is created when the
repository is set up. So in this case again there is no problem doing the
signing outside of the build itself (in fact that is the way it must be).
Found two tools that can help with building and signing apt repositories:
reprepro (seems to be the newest, recommended) and apt-ftparchive.
----
ToDO: Figure out how to handle the mysql-client-core issue on lucid. Arjen
suggested splitting up so we have this package ourselves, or maybe it can be
handled with replace/provide/conflict dependencies.
ToDo: Figure out exactly what files/directory structure needs to be uploaded
(asked Peter, awaiting reply).
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Use Buildbot to populate apt/yum repositories (117)
by worklog-noreply@askmonty.org 12 May '10
by worklog-noreply@askmonty.org 12 May '10
12 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Use Buildbot to populate apt/yum repositories
CREATION DATE..: Wed, 12 May 2010, 07:04
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 117 (http://askmonty.org/worklog/?tid=117)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 4
ESTIMATE.......: 16 (hours remain)
ORIG. ESTIMATE.: 20
PROGRESS NOTES:
-=-=(Knielsen - Wed, 12 May 2010, 07:06)=-=-
Upgraded lucid VMs to the official release.
Discussed with Arjen how to handle things.
Did a lot of reading on how apt repositories work.
Worked 4 hours and estimate 16 hours remain (original estimate unchanged).
DESCRIPTION:
Since the package building for MariaDB is now fully automated in Buildbot, it
has been decided to use packages from Buildbot for the OurDelta apt and yum
repositories.
This worklog is about fixing/implementing anything that is missing to achieve
this.
- When doing a real release build, packages/repositories need to be signed,
so that users will not get a warning about unauthenticated packages. This
signing must only be done on official releases, not on daily builds (to
avoid confusing one with the other).
- Packages must be uploaded from the Buildbot host. The OurDelta
infrastructure has a DropBox share that could be used for this, another
option is to simply use rsync.
- Ubuntu 10.04 "lucid" has been released, and we need to support that for
packages, so the Buildbot VM for lucid must be upgraded to have the
official release.
- In Ubuntu 10.04, the official MySQL packages include a new package
mysql-client-core, we currently have a conflict with this on install that
we need to handle somehow.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] New (by Knielsen): Use Buildbot to populate apt/yum repositories (117)
by worklog-noreply@askmonty.org 12 May '10
by worklog-noreply@askmonty.org 12 May '10
12 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Use Buildbot to populate apt/yum repositories
CREATION DATE..: Wed, 12 May 2010, 07:04
SUPERVISOR.....: Knielsen
IMPLEMENTOR....: Knielsen
COPIES TO......:
CATEGORY.......: Other
TASK ID........: 117 (http://askmonty.org/worklog/?tid=117)
VERSION........: Server-9.x
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 20 (hours remain)
ORIG. ESTIMATE.: 20
PROGRESS NOTES:
DESCRIPTION:
Since the package building for MariaDB is now fully automated in Buildbot, it
has been decided to use packages from Buildbot for the OurDelta apt and yum
repositories.
This worklog is about fixing/implementing anything that is missing to achieve
this.
- When doing a real release build, packages/repositories need to be signed,
so that users will not get a warning about unauthenticated packages. This
signing must only be done on official releases, not on daily builds (to
avoid confusing one with the other).
- Packages must be uploaded from the Buildbot host. The OurDelta
infrastructure has a DropBox share that could be used for this, another
option is to simply use rsync.
- Ubuntu 10.04 "lucid" has been released, and we need to support that for
packages, so the Buildbot VM for lucid must be upgraded to have the
official release.
- In Ubuntu 10.04, the official MySQL packages include a new package
mysql-client-core, we currently have a conflict with this on install that
we need to handle somehow.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 12 May '10
by worklog-noreply@askmonty.org 12 May '10
12 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 28
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Progress (by Knielsen): Efficient group commit for binary log (116)
by worklog-noreply@askmonty.org 12 May '10
by worklog-noreply@askmonty.org 12 May '10
12 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Efficient group commit for binary log
CREATION DATE..: Mon, 26 Apr 2010, 13:28
SUPERVISOR.....: Knielsen
IMPLEMENTOR....:
COPIES TO......: Serg
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 116 (http://askmonty.org/worklog/?tid=116)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 28
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Knielsen - Wed, 12 May 2010, 06:41)=-=-
Started work on a Quilt patch series, refactoring the binlog code to prepare for implementing the
group commit, and working on the design of group commit in parallel.
Found and fixed several problems in error handling when writing to binlog.
Removed redundant table map version locking.
Split binlog writing into two parts in preparations for group commit. When ready to write to the
binlog, threads enter a queue, and the first thread in the queue handles the binlog writing for
everyone. When it obtains the LOCK_log, it first loops over all threads, executing the first part of
binlog writing (the write(2) syscall essentially). It then runs the second part (fsync(2)
essentially) only once, and then wakes up the remaining threads in the queue.
Still to be done:
Finish the proof-of-concept group commit patch, by 1) implementing the prepare_fast() and
commit_fast() callbacks in handler.cc 2) move the binlog thread enqueue from log_xid() to
binlog_prepare_fast(), 3) move fast part of InnoDB commit to innobase_commit_fast(), removing the
prepare_commit_mutex().
Write up the final design in this worklog.
Evaluate the design to see if we can do better/different.
Think about possible next steps, such as releasing innodb row locks early (in
innobase_prepare_fast), and doing crash recovery by replaying transactions from the binlog (removing
the need for engine durability and 2 of 3 fsync() in commit).
Worked 28 hours and estimate 0 hours remain (original estimate increased by 28 hours).
-=-=(Serg - Mon, 26 Apr 2010, 14:10)=-=-
Observers changed: Serg
DESCRIPTION:
Currently, in order to ensure that the server can recover after a crash to a
state in which storage engines and binary log are consistent with each other,
it is necessary to use XA with durable commits for both storage engines
(innodb_flush_log_at_trx_commit=1) and binary log (sync_binlog=1).
This is _very_ expensive, since the server needs to do three fsync() operations
for every commit, as there is no working group commit when the binary log is
enabled.
The idea is to
- Implement/fix group commit to work properly with the binary log enabled.
- (Optionally) avoid the need to fsync() in the engine, and instead rely on
replaying any lost transactions from the binary log against the engine
during crash recovery.
For background see these articles:
http://kristiannielsen.livejournal.com/12254.html
http://kristiannielsen.livejournal.com/12408.html
http://kristiannielsen.livejournal.com/12553.html
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (igor:2788)
by Igor Babaev 12 May '10
by Igor Babaev 12 May '10
12 May '10
#At lp:maria/5.2 based on revid:psergey@askmonty.org-20100329200940-9ikx6gpww0gtsx00
2788 Igor Babaev 2010-05-11 [merge]
Merge 5.1-release -> 5.3
added:
mysql-test/include/min_null_cond.inc
mysql-test/include/not_binlog_format_row.inc
mysql-test/include/view_alias.inc
mysql-test/r/bug39022.result
mysql-test/r/innodb_bug47621.result
mysql-test/r/log_tables_upgrade.result
mysql-test/r/no_binlog.result
mysql-test/r/partition_debug_sync.result
mysql-test/r/plugin_not_embedded.result
mysql-test/r/view_alias.result
mysql-test/std_data/binlog_savepoint.000001
mysql-test/std_data/bug46565.ARZ
mysql-test/std_data/bug46565.frm
mysql-test/std_data/bug48265.frm
mysql-test/std_data/bug48449.frm
mysql-test/std_data/bug49823.CSM
mysql-test/std_data/bug49823.CSV
mysql-test/std_data/bug49823.frm
mysql-test/suite/binlog/t/binlog_index-master.opt
mysql-test/suite/engines/
mysql-test/suite/engines/README
mysql-test/suite/engines/funcs/
mysql-test/suite/engines/funcs/r/
mysql-test/suite/engines/funcs/r/ai_init_alter_table.result
mysql-test/suite/engines/funcs/r/ai_init_create_table.result
mysql-test/suite/engines/funcs/r/ai_init_insert.result
mysql-test/suite/engines/funcs/r/ai_init_insert_id.result
mysql-test/suite/engines/funcs/r/ai_overflow_error.result
mysql-test/suite/engines/funcs/r/ai_reset_by_truncate.result
mysql-test/suite/engines/funcs/r/ai_sql_auto_is_null.result
mysql-test/suite/engines/funcs/r/an_calendar.result
mysql-test/suite/engines/funcs/r/an_number.result
mysql-test/suite/engines/funcs/r/an_string.result
mysql-test/suite/engines/funcs/r/comment_column.result
mysql-test/suite/engines/funcs/r/comment_column2.result
mysql-test/suite/engines/funcs/r/comment_table.result
mysql-test/suite/engines/funcs/r/crash_manycolumns_number.result
mysql-test/suite/engines/funcs/r/crash_manycolumns_string.result
mysql-test/suite/engines/funcs/r/crash_manyindexes_number.result
mysql-test/suite/engines/funcs/r/crash_manyindexes_string.result
mysql-test/suite/engines/funcs/r/crash_manytables_number.result
mysql-test/suite/engines/funcs/r/crash_manytables_string.result
mysql-test/suite/engines/funcs/r/date_function.result
mysql-test/suite/engines/funcs/r/datetime_function.result
mysql-test/suite/engines/funcs/r/db_alter_character_set.result
mysql-test/suite/engines/funcs/r/db_alter_character_set_collate.result
mysql-test/suite/engines/funcs/r/db_alter_collate_ascii.result
mysql-test/suite/engines/funcs/r/db_alter_collate_utf8.result
mysql-test/suite/engines/funcs/r/db_create_character_set.result
mysql-test/suite/engines/funcs/r/db_create_character_set_collate.result
mysql-test/suite/engines/funcs/r/db_create_drop.result
mysql-test/suite/engines/funcs/r/db_create_error.result
mysql-test/suite/engines/funcs/r/db_create_error_reserved.result
mysql-test/suite/engines/funcs/r/db_create_if_not_exists.result
mysql-test/suite/engines/funcs/r/db_drop_error.result
mysql-test/suite/engines/funcs/r/db_use_error.result
mysql-test/suite/engines/funcs/r/de_autoinc.result
mysql-test/suite/engines/funcs/r/de_calendar_range.result
mysql-test/suite/engines/funcs/r/de_ignore.result
mysql-test/suite/engines/funcs/r/de_limit.result
mysql-test/suite/engines/funcs/r/de_multi_db_table.result
mysql-test/suite/engines/funcs/r/de_multi_db_table_using.result
mysql-test/suite/engines/funcs/r/de_multi_table.result
mysql-test/suite/engines/funcs/r/de_multi_table_using.result
mysql-test/suite/engines/funcs/r/de_number_range.result
mysql-test/suite/engines/funcs/r/de_quick.result
mysql-test/suite/engines/funcs/r/de_string_range.result
mysql-test/suite/engines/funcs/r/de_truncate.result
mysql-test/suite/engines/funcs/r/de_truncate_autoinc.result
mysql-test/suite/engines/funcs/r/fu_aggregate_avg_number.result
mysql-test/suite/engines/funcs/r/fu_aggregate_count_number.result
mysql-test/suite/engines/funcs/r/fu_aggregate_max_number.result
mysql-test/suite/engines/funcs/r/fu_aggregate_max_subquery.result
mysql-test/suite/engines/funcs/r/fu_aggregate_min_number.result
mysql-test/suite/engines/funcs/r/fu_aggregate_sum_number.result
mysql-test/suite/engines/funcs/r/general_no_data.result
mysql-test/suite/engines/funcs/r/general_not_null.result
mysql-test/suite/engines/funcs/r/general_null.result
mysql-test/suite/engines/funcs/r/in_calendar_2_unique_constraints_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_calendar_pk_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_calendar_pk_constraint_error.result
mysql-test/suite/engines/funcs/r/in_calendar_pk_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_calendar_unique_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_calendar_unique_constraint_error.result
mysql-test/suite/engines/funcs/r/in_calendar_unique_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_enum_null.result
mysql-test/suite/engines/funcs/r/in_enum_null_boundary_error.result
mysql-test/suite/engines/funcs/r/in_enum_null_large_error.result
mysql-test/suite/engines/funcs/r/in_insert_select.result
mysql-test/suite/engines/funcs/r/in_insert_select_autoinc.result
mysql-test/suite/engines/funcs/r/in_insert_select_unique_violation.result
mysql-test/suite/engines/funcs/r/in_lob_boundary_error.result
mysql-test/suite/engines/funcs/r/in_multicolumn_calendar_pk_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_multicolumn_calendar_pk_constraint_error.result
mysql-test/suite/engines/funcs/r/in_multicolumn_calendar_pk_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_multicolumn_calendar_unique_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_multicolumn_calendar_unique_constraint_error.result
mysql-test/suite/engines/funcs/r/in_multicolumn_calendar_unique_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_multicolumn_number_pk_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_multicolumn_number_pk_constraint_error.result
mysql-test/suite/engines/funcs/r/in_multicolumn_number_pk_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_multicolumn_number_unique_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_multicolumn_number_unique_constraint_error.result
mysql-test/suite/engines/funcs/r/in_multicolumn_number_unique_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_multicolumn_string_pk_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_multicolumn_string_pk_constraint_error.result
mysql-test/suite/engines/funcs/r/in_multicolumn_string_pk_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_multicolumn_string_unique_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_multicolumn_string_unique_constraint_error.result
mysql-test/suite/engines/funcs/r/in_multicolumn_string_unique_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_number_2_unique_constraints_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_number_boundary_error.result
mysql-test/suite/engines/funcs/r/in_number_decimal_boundary_error.result
mysql-test/suite/engines/funcs/r/in_number_length.result
mysql-test/suite/engines/funcs/r/in_number_null.result
mysql-test/suite/engines/funcs/r/in_number_pk_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_number_pk_constraint_error.result
mysql-test/suite/engines/funcs/r/in_number_pk_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_number_unique_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_number_unique_constraint_error.result
mysql-test/suite/engines/funcs/r/in_number_unique_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_set_null.result
mysql-test/suite/engines/funcs/r/in_set_null_boundary_error.result
mysql-test/suite/engines/funcs/r/in_set_null_large.result
mysql-test/suite/engines/funcs/r/in_string_2_unique_constraints_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_string_boundary_error.result
mysql-test/suite/engines/funcs/r/in_string_not_null.result
mysql-test/suite/engines/funcs/r/in_string_null.result
mysql-test/suite/engines/funcs/r/in_string_pk_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_string_pk_constraint_error.result
mysql-test/suite/engines/funcs/r/in_string_pk_constraint_ignore.result
mysql-test/suite/engines/funcs/r/in_string_set_enum_fail.result
mysql-test/suite/engines/funcs/r/in_string_unique_constraint_duplicate_update.result
mysql-test/suite/engines/funcs/r/in_string_unique_constraint_error.result
mysql-test/suite/engines/funcs/r/in_string_unique_constraint_ignore.result
mysql-test/suite/engines/funcs/r/ix_drop.result
mysql-test/suite/engines/funcs/r/ix_drop_error.result
mysql-test/suite/engines/funcs/r/ix_index_decimals.result
mysql-test/suite/engines/funcs/r/ix_index_lob.result
mysql-test/suite/engines/funcs/r/ix_index_non_string.result
mysql-test/suite/engines/funcs/r/ix_index_string.result
mysql-test/suite/engines/funcs/r/ix_index_string_length.result
mysql-test/suite/engines/funcs/r/ix_unique_decimals.result
mysql-test/suite/engines/funcs/r/ix_unique_lob.result
mysql-test/suite/engines/funcs/r/ix_unique_non_string.result
mysql-test/suite/engines/funcs/r/ix_unique_string.result
mysql-test/suite/engines/funcs/r/ix_unique_string_length.result
mysql-test/suite/engines/funcs/r/ix_using_order.result
mysql-test/suite/engines/funcs/r/jp_comment_column.result
mysql-test/suite/engines/funcs/r/jp_comment_older_compatibility1.result
mysql-test/suite/engines/funcs/r/jp_comment_table.result
mysql-test/suite/engines/funcs/r/ld_all_number_string_calendar_types.result
mysql-test/suite/engines/funcs/r/ld_bit.result
mysql-test/suite/engines/funcs/r/ld_enum_set.result
mysql-test/suite/engines/funcs/r/ld_less_columns.result
mysql-test/suite/engines/funcs/r/ld_more_columns_truncated.result
mysql-test/suite/engines/funcs/r/ld_null.result
mysql-test/suite/engines/funcs/r/ld_quote.result
mysql-test/suite/engines/funcs/r/ld_simple.result
mysql-test/suite/engines/funcs/r/ld_starting.result
mysql-test/suite/engines/funcs/r/ld_unique_error1.result
mysql-test/suite/engines/funcs/r/ld_unique_error1_local.result
mysql-test/suite/engines/funcs/r/ld_unique_error2.result
mysql-test/suite/engines/funcs/r/ld_unique_error2_local.result
mysql-test/suite/engines/funcs/r/ld_unique_error3.result
mysql-test/suite/engines/funcs/r/ld_unique_error3_local.result
mysql-test/suite/engines/funcs/r/ps_number_length.result
mysql-test/suite/engines/funcs/r/ps_number_null.result
mysql-test/suite/engines/funcs/r/ps_string_not_null.result
mysql-test/suite/engines/funcs/r/ps_string_null.result
mysql-test/suite/engines/funcs/r/re_number_range.result
mysql-test/suite/engines/funcs/r/re_number_range_set.result
mysql-test/suite/engines/funcs/r/re_number_select.result
mysql-test/suite/engines/funcs/r/re_string_range.result
mysql-test/suite/engines/funcs/r/re_string_range_set.result
mysql-test/suite/engines/funcs/r/rpl000010.result
mysql-test/suite/engines/funcs/r/rpl000011.result
mysql-test/suite/engines/funcs/r/rpl000013.result
mysql-test/suite/engines/funcs/r/rpl000017.result
mysql-test/suite/engines/funcs/r/rpl_000015.result
mysql-test/suite/engines/funcs/r/rpl_LD_INFILE.result
mysql-test/suite/engines/funcs/r/rpl_REDIRECT.result
mysql-test/suite/engines/funcs/r/rpl_alter.result
mysql-test/suite/engines/funcs/r/rpl_alter_db.result
mysql-test/suite/engines/funcs/r/rpl_bit.result
mysql-test/suite/engines/funcs/r/rpl_bit_npk.result
mysql-test/suite/engines/funcs/r/rpl_change_master.result
mysql-test/suite/engines/funcs/r/rpl_create_database.result
mysql-test/suite/engines/funcs/r/rpl_do_grant.result
mysql-test/suite/engines/funcs/r/rpl_drop.result
mysql-test/suite/engines/funcs/r/rpl_drop_db.result
mysql-test/suite/engines/funcs/r/rpl_dual_pos_advance.result
mysql-test/suite/engines/funcs/r/rpl_empty_master_crash.result
mysql-test/suite/engines/funcs/r/rpl_err_ignoredtable.result
mysql-test/suite/engines/funcs/r/rpl_flushlog_loop.result
mysql-test/suite/engines/funcs/r/rpl_free_items.result
mysql-test/suite/engines/funcs/r/rpl_get_lock.result
mysql-test/suite/engines/funcs/r/rpl_ignore_grant.result
mysql-test/suite/engines/funcs/r/rpl_ignore_revoke.result
mysql-test/suite/engines/funcs/r/rpl_ignore_table_update.result
mysql-test/suite/engines/funcs/r/rpl_init_slave.result
mysql-test/suite/engines/funcs/r/rpl_insert.result
mysql-test/suite/engines/funcs/r/rpl_insert_select.result
mysql-test/suite/engines/funcs/r/rpl_loaddata2.result
mysql-test/suite/engines/funcs/r/rpl_loaddata_m.result
mysql-test/suite/engines/funcs/r/rpl_loaddata_s.result
mysql-test/suite/engines/funcs/r/rpl_loaddatalocal.result
mysql-test/suite/engines/funcs/r/rpl_loadfile.result
mysql-test/suite/engines/funcs/r/rpl_log_pos.result
mysql-test/suite/engines/funcs/r/rpl_many_optimize.result
mysql-test/suite/engines/funcs/r/rpl_master_pos_wait.result
mysql-test/suite/engines/funcs/r/rpl_misc_functions.result
mysql-test/suite/engines/funcs/r/rpl_multi_delete.result
mysql-test/suite/engines/funcs/r/rpl_multi_delete2.result
mysql-test/suite/engines/funcs/r/rpl_multi_update4.result
mysql-test/suite/engines/funcs/r/rpl_ps.result
mysql-test/suite/engines/funcs/r/rpl_rbr_to_sbr.result
mysql-test/suite/engines/funcs/r/rpl_relayspace.result
mysql-test/suite/engines/funcs/r/rpl_replicate_ignore_db.result
mysql-test/suite/engines/funcs/r/rpl_row_NOW.result
mysql-test/suite/engines/funcs/r/rpl_row_USER.result
mysql-test/suite/engines/funcs/r/rpl_row_drop.result
mysql-test/suite/engines/funcs/r/rpl_row_func001.result
mysql-test/suite/engines/funcs/r/rpl_row_inexist_tbl.result
mysql-test/suite/engines/funcs/r/rpl_row_max_relay_size.result
mysql-test/suite/engines/funcs/r/rpl_row_reset_slave.result
mysql-test/suite/engines/funcs/r/rpl_row_sp001.result
mysql-test/suite/engines/funcs/r/rpl_row_sp005.result
mysql-test/suite/engines/funcs/r/rpl_row_sp008.result
mysql-test/suite/engines/funcs/r/rpl_row_sp009.result
mysql-test/suite/engines/funcs/r/rpl_row_sp010.result
mysql-test/suite/engines/funcs/r/rpl_row_sp011.result
mysql-test/suite/engines/funcs/r/rpl_row_sp012.result
mysql-test/suite/engines/funcs/r/rpl_row_stop_middle.result
mysql-test/suite/engines/funcs/r/rpl_row_trig001.result
mysql-test/suite/engines/funcs/r/rpl_row_trig002.result
mysql-test/suite/engines/funcs/r/rpl_row_trig003.result
mysql-test/suite/engines/funcs/r/rpl_row_until.result
mysql-test/suite/engines/funcs/r/rpl_row_view01.result
mysql-test/suite/engines/funcs/r/rpl_server_id1.result
mysql-test/suite/engines/funcs/r/rpl_server_id2.result
mysql-test/suite/engines/funcs/r/rpl_session_var.result
mysql-test/suite/engines/funcs/r/rpl_sf.result
mysql-test/suite/engines/funcs/r/rpl_skip_error.result
mysql-test/suite/engines/funcs/r/rpl_slave_status.result
mysql-test/suite/engines/funcs/r/rpl_sp.result
mysql-test/suite/engines/funcs/r/rpl_sp004.result
mysql-test/suite/engines/funcs/r/rpl_sp_effects.result
mysql-test/suite/engines/funcs/r/rpl_start_stop_slave.result
mysql-test/suite/engines/funcs/r/rpl_stm_max_relay_size.result
mysql-test/suite/engines/funcs/r/rpl_stm_mystery22.result
mysql-test/suite/engines/funcs/r/rpl_stm_no_op.result
mysql-test/suite/engines/funcs/r/rpl_stm_reset_slave.result
mysql-test/suite/engines/funcs/r/rpl_switch_stm_row_mixed.result
mysql-test/suite/engines/funcs/r/rpl_temp_table.result
mysql-test/suite/engines/funcs/r/rpl_temporary.result
mysql-test/suite/engines/funcs/r/rpl_trigger.result
mysql-test/suite/engines/funcs/r/rpl_trunc_temp.result
mysql-test/suite/engines/funcs/r/rpl_user_variables.result
mysql-test/suite/engines/funcs/r/rpl_variables.result
mysql-test/suite/engines/funcs/r/rpl_view.result
mysql-test/suite/engines/funcs/r/se_join_cross.result
mysql-test/suite/engines/funcs/r/se_join_default.result
mysql-test/suite/engines/funcs/r/se_join_inner.result
mysql-test/suite/engines/funcs/r/se_join_left.result
mysql-test/suite/engines/funcs/r/se_join_left_outer.result
mysql-test/suite/engines/funcs/r/se_join_natural_left.result
mysql-test/suite/engines/funcs/r/se_join_natural_left_outer.result
mysql-test/suite/engines/funcs/r/se_join_natural_right.result
mysql-test/suite/engines/funcs/r/se_join_natural_right_outer.result
mysql-test/suite/engines/funcs/r/se_join_right.result
mysql-test/suite/engines/funcs/r/se_join_right_outer.result
mysql-test/suite/engines/funcs/r/se_join_straight.result
mysql-test/suite/engines/funcs/r/se_rowid.result
mysql-test/suite/engines/funcs/r/se_string_distinct.result
mysql-test/suite/engines/funcs/r/se_string_from.result
mysql-test/suite/engines/funcs/r/se_string_groupby.result
mysql-test/suite/engines/funcs/r/se_string_having.result
mysql-test/suite/engines/funcs/r/se_string_limit.result
mysql-test/suite/engines/funcs/r/se_string_orderby.result
mysql-test/suite/engines/funcs/r/se_string_union.result
mysql-test/suite/engines/funcs/r/se_string_where.result
mysql-test/suite/engines/funcs/r/se_string_where_and.result
mysql-test/suite/engines/funcs/r/se_string_where_or.result
mysql-test/suite/engines/funcs/r/sf_alter.result
mysql-test/suite/engines/funcs/r/sf_cursor.result
mysql-test/suite/engines/funcs/r/sf_simple1.result
mysql-test/suite/engines/funcs/r/sp_alter.result
mysql-test/suite/engines/funcs/r/sp_cursor.result
mysql-test/suite/engines/funcs/r/sp_simple1.result
mysql-test/suite/engines/funcs/r/sq_all.result
mysql-test/suite/engines/funcs/r/sq_any.result
mysql-test/suite/engines/funcs/r/sq_corr.result
mysql-test/suite/engines/funcs/r/sq_error.result
mysql-test/suite/engines/funcs/r/sq_exists.result
mysql-test/suite/engines/funcs/r/sq_from.result
mysql-test/suite/engines/funcs/r/sq_in.result
mysql-test/suite/engines/funcs/r/sq_row.result
mysql-test/suite/engines/funcs/r/sq_scalar.result
mysql-test/suite/engines/funcs/r/sq_some.result
mysql-test/suite/engines/funcs/r/ta_2part_column_to_pk.result
mysql-test/suite/engines/funcs/r/ta_2part_diff_string_to_pk.result
mysql-test/suite/engines/funcs/r/ta_2part_diff_to_pk.result
mysql-test/suite/engines/funcs/r/ta_2part_string_to_pk.result
mysql-test/suite/engines/funcs/r/ta_3part_column_to_pk.result
mysql-test/suite/engines/funcs/r/ta_3part_string_to_pk.result
mysql-test/suite/engines/funcs/r/ta_add_column.result
mysql-test/suite/engines/funcs/r/ta_add_column2.result
mysql-test/suite/engines/funcs/r/ta_add_column_first.result
mysql-test/suite/engines/funcs/r/ta_add_column_first2.result
mysql-test/suite/engines/funcs/r/ta_add_column_middle.result
mysql-test/suite/engines/funcs/r/ta_add_column_middle2.result
mysql-test/suite/engines/funcs/r/ta_add_string.result
mysql-test/suite/engines/funcs/r/ta_add_string2.result
mysql-test/suite/engines/funcs/r/ta_add_string_first.result
mysql-test/suite/engines/funcs/r/ta_add_string_first2.result
mysql-test/suite/engines/funcs/r/ta_add_string_middle.result
mysql-test/suite/engines/funcs/r/ta_add_string_middle2.result
mysql-test/suite/engines/funcs/r/ta_add_string_unique_index.result
mysql-test/suite/engines/funcs/r/ta_add_unique_index.result
mysql-test/suite/engines/funcs/r/ta_column_from_unsigned.result
mysql-test/suite/engines/funcs/r/ta_column_from_zerofill.result
mysql-test/suite/engines/funcs/r/ta_column_to_index.result
mysql-test/suite/engines/funcs/r/ta_column_to_not_null.result
mysql-test/suite/engines/funcs/r/ta_column_to_null.result
mysql-test/suite/engines/funcs/r/ta_column_to_pk.result
mysql-test/suite/engines/funcs/r/ta_column_to_unsigned.result
mysql-test/suite/engines/funcs/r/ta_column_to_zerofill.result
mysql-test/suite/engines/funcs/r/ta_drop_column.result
mysql-test/suite/engines/funcs/r/ta_drop_index.result
mysql-test/suite/engines/funcs/r/ta_drop_pk_autoincrement.result
mysql-test/suite/engines/funcs/r/ta_drop_pk_number.result
mysql-test/suite/engines/funcs/r/ta_drop_pk_string.result
mysql-test/suite/engines/funcs/r/ta_drop_string_index.result
mysql-test/suite/engines/funcs/r/ta_orderby.result
mysql-test/suite/engines/funcs/r/ta_rename.result
mysql-test/suite/engines/funcs/r/ta_set_drop_default.result
mysql-test/suite/engines/funcs/r/ta_string_drop_column.result
mysql-test/suite/engines/funcs/r/ta_string_to_index.result
mysql-test/suite/engines/funcs/r/ta_string_to_not_null.result
mysql-test/suite/engines/funcs/r/ta_string_to_null.result
mysql-test/suite/engines/funcs/r/ta_string_to_pk.result
mysql-test/suite/engines/funcs/r/tc_column_autoincrement.result
mysql-test/suite/engines/funcs/r/tc_column_comment.result
mysql-test/suite/engines/funcs/r/tc_column_comment_string.result
mysql-test/suite/engines/funcs/r/tc_column_default_decimal.result
mysql-test/suite/engines/funcs/r/tc_column_default_number.result
mysql-test/suite/engines/funcs/r/tc_column_default_string.result
mysql-test/suite/engines/funcs/r/tc_column_enum.result
mysql-test/suite/engines/funcs/r/tc_column_enum_long.result
mysql-test/suite/engines/funcs/r/tc_column_key.result
mysql-test/suite/engines/funcs/r/tc_column_key_length.result
mysql-test/suite/engines/funcs/r/tc_column_length.result
mysql-test/suite/engines/funcs/r/tc_column_length_decimals.result
mysql-test/suite/engines/funcs/r/tc_column_length_zero.result
mysql-test/suite/engines/funcs/r/tc_column_not_null.result
mysql-test/suite/engines/funcs/r/tc_column_null.result
mysql-test/suite/engines/funcs/r/tc_column_primary_key_number.result
mysql-test/suite/engines/funcs/r/tc_column_primary_key_string.result
mysql-test/suite/engines/funcs/r/tc_column_serial.result
mysql-test/suite/engines/funcs/r/tc_column_set.result
mysql-test/suite/engines/funcs/r/tc_column_set_long.result
mysql-test/suite/engines/funcs/r/tc_column_unique_key.result
mysql-test/suite/engines/funcs/r/tc_column_unique_key_string.result
mysql-test/suite/engines/funcs/r/tc_column_unsigned.result
mysql-test/suite/engines/funcs/r/tc_column_zerofill.result
mysql-test/suite/engines/funcs/r/tc_drop_table.result
mysql-test/suite/engines/funcs/r/tc_multicolumn_different.result
mysql-test/suite/engines/funcs/r/tc_multicolumn_same.result
mysql-test/suite/engines/funcs/r/tc_multicolumn_same_string.result
mysql-test/suite/engines/funcs/r/tc_partition_analyze.result
mysql-test/suite/engines/funcs/r/tc_partition_change_from_range_to_hash_key.result
mysql-test/suite/engines/funcs/r/tc_partition_check.result
mysql-test/suite/engines/funcs/r/tc_partition_hash.result
mysql-test/suite/engines/funcs/r/tc_partition_hash_date_function.result
mysql-test/suite/engines/funcs/r/tc_partition_key.result
mysql-test/suite/engines/funcs/r/tc_partition_linear_key.result
mysql-test/suite/engines/funcs/r/tc_partition_list_directory.result
mysql-test/suite/engines/funcs/r/tc_partition_list_error.result
mysql-test/suite/engines/funcs/r/tc_partition_optimize.result
mysql-test/suite/engines/funcs/r/tc_partition_rebuild.result
mysql-test/suite/engines/funcs/r/tc_partition_remove.result
mysql-test/suite/engines/funcs/r/tc_partition_reorg_divide.result
mysql-test/suite/engines/funcs/r/tc_partition_reorg_hash_key.result
mysql-test/suite/engines/funcs/r/tc_partition_reorg_merge.result
mysql-test/suite/engines/funcs/r/tc_partition_repair.result
mysql-test/suite/engines/funcs/r/tc_partition_sub1.result
mysql-test/suite/engines/funcs/r/tc_partition_sub2.result
mysql-test/suite/engines/funcs/r/tc_partition_value.result
mysql-test/suite/engines/funcs/r/tc_partition_value_error.result
mysql-test/suite/engines/funcs/r/tc_partition_value_specific.result
mysql-test/suite/engines/funcs/r/tc_rename.result
mysql-test/suite/engines/funcs/r/tc_rename_across_database.result
mysql-test/suite/engines/funcs/r/tc_rename_error.result
mysql-test/suite/engines/funcs/r/tc_structure_comment.result
mysql-test/suite/engines/funcs/r/tc_structure_create_like.result
mysql-test/suite/engines/funcs/r/tc_structure_create_like_string.result
mysql-test/suite/engines/funcs/r/tc_structure_create_select.result
mysql-test/suite/engines/funcs/r/tc_structure_create_select_string.result
mysql-test/suite/engines/funcs/r/tc_structure_string_comment.result
mysql-test/suite/engines/funcs/r/tc_temporary_column.result
mysql-test/suite/engines/funcs/r/tc_temporary_column_length.result
mysql-test/suite/engines/funcs/r/time_function.result
mysql-test/suite/engines/funcs/r/tr_all_type_triggers.result
mysql-test/suite/engines/funcs/r/tr_delete.result
mysql-test/suite/engines/funcs/r/tr_delete_new_error.result
mysql-test/suite/engines/funcs/r/tr_insert.result
mysql-test/suite/engines/funcs/r/tr_insert_after_error.result
mysql-test/suite/engines/funcs/r/tr_insert_old_error.result
mysql-test/suite/engines/funcs/r/tr_update.result
mysql-test/suite/engines/funcs/r/tr_update_after_error.result
mysql-test/suite/engines/funcs/r/up_calendar_range.result
mysql-test/suite/engines/funcs/r/up_ignore.result
mysql-test/suite/engines/funcs/r/up_limit.result
mysql-test/suite/engines/funcs/r/up_multi_db_table.result
mysql-test/suite/engines/funcs/r/up_multi_table.result
mysql-test/suite/engines/funcs/r/up_nullcheck.result
mysql-test/suite/engines/funcs/r/up_number_range.result
mysql-test/suite/engines/funcs/r/up_string_range.result
mysql-test/suite/engines/funcs/t/
mysql-test/suite/engines/funcs/t/ai_init_alter_table.test
mysql-test/suite/engines/funcs/t/ai_init_create_table.test
mysql-test/suite/engines/funcs/t/ai_init_insert.test
mysql-test/suite/engines/funcs/t/ai_init_insert_id.test
mysql-test/suite/engines/funcs/t/ai_overflow_error.test
mysql-test/suite/engines/funcs/t/ai_reset_by_truncate.test
mysql-test/suite/engines/funcs/t/ai_sql_auto_is_null.test
mysql-test/suite/engines/funcs/t/an_calendar.test
mysql-test/suite/engines/funcs/t/an_number.test
mysql-test/suite/engines/funcs/t/an_string.test
mysql-test/suite/engines/funcs/t/comment_column.test
mysql-test/suite/engines/funcs/t/comment_column2.test
mysql-test/suite/engines/funcs/t/comment_table.test
mysql-test/suite/engines/funcs/t/crash_manycolumns_number.test
mysql-test/suite/engines/funcs/t/crash_manycolumns_string.test
mysql-test/suite/engines/funcs/t/crash_manyindexes_number.test
mysql-test/suite/engines/funcs/t/crash_manyindexes_string.test
mysql-test/suite/engines/funcs/t/crash_manytables_number.test
mysql-test/suite/engines/funcs/t/crash_manytables_string.test
mysql-test/suite/engines/funcs/t/data1.inc
mysql-test/suite/engines/funcs/t/data2.inc
mysql-test/suite/engines/funcs/t/date_function.test
mysql-test/suite/engines/funcs/t/datetime_function.test
mysql-test/suite/engines/funcs/t/db_alter_character_set.test
mysql-test/suite/engines/funcs/t/db_alter_character_set_collate.test
mysql-test/suite/engines/funcs/t/db_alter_collate_ascii.test
mysql-test/suite/engines/funcs/t/db_alter_collate_utf8.test
mysql-test/suite/engines/funcs/t/db_create_character_set.test
mysql-test/suite/engines/funcs/t/db_create_character_set_collate.test
mysql-test/suite/engines/funcs/t/db_create_drop.test
mysql-test/suite/engines/funcs/t/db_create_error.test
mysql-test/suite/engines/funcs/t/db_create_error_reserved.test
mysql-test/suite/engines/funcs/t/db_create_if_not_exists.test
mysql-test/suite/engines/funcs/t/db_drop_error.test
mysql-test/suite/engines/funcs/t/db_use_error.test
mysql-test/suite/engines/funcs/t/de_autoinc.test
mysql-test/suite/engines/funcs/t/de_calendar_range.test
mysql-test/suite/engines/funcs/t/de_ignore.test
mysql-test/suite/engines/funcs/t/de_limit.test
mysql-test/suite/engines/funcs/t/de_multi_db_table.test
mysql-test/suite/engines/funcs/t/de_multi_db_table_using.test
mysql-test/suite/engines/funcs/t/de_multi_table.test
mysql-test/suite/engines/funcs/t/de_multi_table_using.test
mysql-test/suite/engines/funcs/t/de_number_range.test
mysql-test/suite/engines/funcs/t/de_quick.test
mysql-test/suite/engines/funcs/t/de_string_range.test
mysql-test/suite/engines/funcs/t/de_truncate.test
mysql-test/suite/engines/funcs/t/de_truncate_autoinc.test
mysql-test/suite/engines/funcs/t/disabled.def
mysql-test/suite/engines/funcs/t/fu_aggregate_avg_number.test
mysql-test/suite/engines/funcs/t/fu_aggregate_count_number.test
mysql-test/suite/engines/funcs/t/fu_aggregate_max_number.test
mysql-test/suite/engines/funcs/t/fu_aggregate_max_subquery.test
mysql-test/suite/engines/funcs/t/fu_aggregate_min_number.test
mysql-test/suite/engines/funcs/t/fu_aggregate_sum_number.test
mysql-test/suite/engines/funcs/t/general_no_data.test
mysql-test/suite/engines/funcs/t/general_not_null.test
mysql-test/suite/engines/funcs/t/general_null.test
mysql-test/suite/engines/funcs/t/in_calendar_2_unique_constraints_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_calendar_pk_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_calendar_pk_constraint_error.test
mysql-test/suite/engines/funcs/t/in_calendar_pk_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_calendar_unique_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_calendar_unique_constraint_error.test
mysql-test/suite/engines/funcs/t/in_calendar_unique_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_enum_null.test
mysql-test/suite/engines/funcs/t/in_enum_null_boundary_error.test
mysql-test/suite/engines/funcs/t/in_enum_null_large_error.test
mysql-test/suite/engines/funcs/t/in_insert_select.test
mysql-test/suite/engines/funcs/t/in_insert_select_autoinc.test
mysql-test/suite/engines/funcs/t/in_insert_select_unique_violation.test
mysql-test/suite/engines/funcs/t/in_lob_boundary_error.test
mysql-test/suite/engines/funcs/t/in_multicolumn_calendar_pk_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_multicolumn_calendar_pk_constraint_error.test
mysql-test/suite/engines/funcs/t/in_multicolumn_calendar_pk_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_multicolumn_calendar_unique_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_multicolumn_calendar_unique_constraint_error.test
mysql-test/suite/engines/funcs/t/in_multicolumn_calendar_unique_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_multicolumn_number_pk_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_multicolumn_number_pk_constraint_error.test
mysql-test/suite/engines/funcs/t/in_multicolumn_number_pk_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_multicolumn_number_unique_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_multicolumn_number_unique_constraint_error.test
mysql-test/suite/engines/funcs/t/in_multicolumn_number_unique_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_multicolumn_string_pk_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_multicolumn_string_pk_constraint_error.test
mysql-test/suite/engines/funcs/t/in_multicolumn_string_pk_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_multicolumn_string_unique_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_multicolumn_string_unique_constraint_error.test
mysql-test/suite/engines/funcs/t/in_multicolumn_string_unique_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_number_2_unique_constraints_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_number_boundary_error.test
mysql-test/suite/engines/funcs/t/in_number_decimal_boundary_error.test
mysql-test/suite/engines/funcs/t/in_number_length.test
mysql-test/suite/engines/funcs/t/in_number_null.test
mysql-test/suite/engines/funcs/t/in_number_pk_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_number_pk_constraint_error.test
mysql-test/suite/engines/funcs/t/in_number_pk_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_number_unique_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_number_unique_constraint_error.test
mysql-test/suite/engines/funcs/t/in_number_unique_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_set_null.test
mysql-test/suite/engines/funcs/t/in_set_null_boundary_error.test
mysql-test/suite/engines/funcs/t/in_set_null_large.test
mysql-test/suite/engines/funcs/t/in_string_2_unique_constraints_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_string_boundary_error.test
mysql-test/suite/engines/funcs/t/in_string_not_null.test
mysql-test/suite/engines/funcs/t/in_string_null.test
mysql-test/suite/engines/funcs/t/in_string_pk_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_string_pk_constraint_error.test
mysql-test/suite/engines/funcs/t/in_string_pk_constraint_ignore.test
mysql-test/suite/engines/funcs/t/in_string_set_enum_fail.test
mysql-test/suite/engines/funcs/t/in_string_unique_constraint_duplicate_update.test
mysql-test/suite/engines/funcs/t/in_string_unique_constraint_error.test
mysql-test/suite/engines/funcs/t/in_string_unique_constraint_ignore.test
mysql-test/suite/engines/funcs/t/ix_drop.test
mysql-test/suite/engines/funcs/t/ix_drop_error.test
mysql-test/suite/engines/funcs/t/ix_index_decimals.test
mysql-test/suite/engines/funcs/t/ix_index_lob.test
mysql-test/suite/engines/funcs/t/ix_index_non_string.test
mysql-test/suite/engines/funcs/t/ix_index_string.test
mysql-test/suite/engines/funcs/t/ix_index_string_length.test
mysql-test/suite/engines/funcs/t/ix_unique_decimals.test
mysql-test/suite/engines/funcs/t/ix_unique_lob.test
mysql-test/suite/engines/funcs/t/ix_unique_non_string.test
mysql-test/suite/engines/funcs/t/ix_unique_string.test
mysql-test/suite/engines/funcs/t/ix_unique_string_length.test
mysql-test/suite/engines/funcs/t/ix_using_order.test
mysql-test/suite/engines/funcs/t/jp_comment_column.test
mysql-test/suite/engines/funcs/t/jp_comment_older_compatibility1.test
mysql-test/suite/engines/funcs/t/jp_comment_table.test
mysql-test/suite/engines/funcs/t/ld_all_number_string_calendar_types.test
mysql-test/suite/engines/funcs/t/ld_bit.test
mysql-test/suite/engines/funcs/t/ld_enum_set.test
mysql-test/suite/engines/funcs/t/ld_less_columns.test
mysql-test/suite/engines/funcs/t/ld_more_columns_truncated.test
mysql-test/suite/engines/funcs/t/ld_null.test
mysql-test/suite/engines/funcs/t/ld_quote.test
mysql-test/suite/engines/funcs/t/ld_simple.test
mysql-test/suite/engines/funcs/t/ld_starting.test
mysql-test/suite/engines/funcs/t/ld_unique_error1.test
mysql-test/suite/engines/funcs/t/ld_unique_error1_local.test
mysql-test/suite/engines/funcs/t/ld_unique_error2.test
mysql-test/suite/engines/funcs/t/ld_unique_error2_local.test
mysql-test/suite/engines/funcs/t/ld_unique_error3.test
mysql-test/suite/engines/funcs/t/ld_unique_error3_local.test
mysql-test/suite/engines/funcs/t/load_bit.inc
mysql-test/suite/engines/funcs/t/load_enum_set.inc
mysql-test/suite/engines/funcs/t/load_less_columns.inc
mysql-test/suite/engines/funcs/t/load_more_columns.inc
mysql-test/suite/engines/funcs/t/load_null.inc
mysql-test/suite/engines/funcs/t/load_null2.inc
mysql-test/suite/engines/funcs/t/load_quote.inc
mysql-test/suite/engines/funcs/t/load_simple.inc
mysql-test/suite/engines/funcs/t/load_starting.inc
mysql-test/suite/engines/funcs/t/load_unique_error1.inc
mysql-test/suite/engines/funcs/t/load_unique_error2.inc
mysql-test/suite/engines/funcs/t/load_unique_error3.inc
mysql-test/suite/engines/funcs/t/ps_number_length.test
mysql-test/suite/engines/funcs/t/ps_number_null.test
mysql-test/suite/engines/funcs/t/ps_string_not_null.test
mysql-test/suite/engines/funcs/t/ps_string_null.test
mysql-test/suite/engines/funcs/t/re_number_range.test
mysql-test/suite/engines/funcs/t/re_number_range_set.test
mysql-test/suite/engines/funcs/t/re_number_select.test
mysql-test/suite/engines/funcs/t/re_string_range.test
mysql-test/suite/engines/funcs/t/re_string_range_set.test
mysql-test/suite/engines/funcs/t/rpl000010-slave.opt
mysql-test/suite/engines/funcs/t/rpl000010.test
mysql-test/suite/engines/funcs/t/rpl000011.test
mysql-test/suite/engines/funcs/t/rpl000013.test
mysql-test/suite/engines/funcs/t/rpl000017-slave.opt
mysql-test/suite/engines/funcs/t/rpl000017.test
mysql-test/suite/engines/funcs/t/rpl_000015.test
mysql-test/suite/engines/funcs/t/rpl_LD_INFILE.test
mysql-test/suite/engines/funcs/t/rpl_REDIRECT.test
mysql-test/suite/engines/funcs/t/rpl_alter.test
mysql-test/suite/engines/funcs/t/rpl_alter_db.test
mysql-test/suite/engines/funcs/t/rpl_bit.test
mysql-test/suite/engines/funcs/t/rpl_bit_npk.test
mysql-test/suite/engines/funcs/t/rpl_change_master.test
mysql-test/suite/engines/funcs/t/rpl_create_database-master.opt
mysql-test/suite/engines/funcs/t/rpl_create_database-slave.opt
mysql-test/suite/engines/funcs/t/rpl_create_database.test
mysql-test/suite/engines/funcs/t/rpl_do_grant.test
mysql-test/suite/engines/funcs/t/rpl_drop.test
mysql-test/suite/engines/funcs/t/rpl_drop_db.test
mysql-test/suite/engines/funcs/t/rpl_dual_pos_advance-master.opt
mysql-test/suite/engines/funcs/t/rpl_dual_pos_advance.test
mysql-test/suite/engines/funcs/t/rpl_empty_master_crash-master.opt
mysql-test/suite/engines/funcs/t/rpl_empty_master_crash.test
mysql-test/suite/engines/funcs/t/rpl_err_ignoredtable-slave.opt
mysql-test/suite/engines/funcs/t/rpl_err_ignoredtable.test
mysql-test/suite/engines/funcs/t/rpl_flushlog_loop.test
mysql-test/suite/engines/funcs/t/rpl_free_items-slave.opt
mysql-test/suite/engines/funcs/t/rpl_free_items.test
mysql-test/suite/engines/funcs/t/rpl_get_lock.test
mysql-test/suite/engines/funcs/t/rpl_ignore_grant-slave.opt
mysql-test/suite/engines/funcs/t/rpl_ignore_grant.test
mysql-test/suite/engines/funcs/t/rpl_ignore_revoke-slave.opt
mysql-test/suite/engines/funcs/t/rpl_ignore_revoke.test
mysql-test/suite/engines/funcs/t/rpl_ignore_table_update-slave.opt
mysql-test/suite/engines/funcs/t/rpl_ignore_table_update.test
mysql-test/suite/engines/funcs/t/rpl_init_slave-slave.opt
mysql-test/suite/engines/funcs/t/rpl_init_slave.test
mysql-test/suite/engines/funcs/t/rpl_insert.test
mysql-test/suite/engines/funcs/t/rpl_insert_select.test
mysql-test/suite/engines/funcs/t/rpl_loaddata2.test
mysql-test/suite/engines/funcs/t/rpl_loaddata_m-master.opt
mysql-test/suite/engines/funcs/t/rpl_loaddata_m.test
mysql-test/suite/engines/funcs/t/rpl_loaddata_s-slave.opt
mysql-test/suite/engines/funcs/t/rpl_loaddata_s.test
mysql-test/suite/engines/funcs/t/rpl_loaddatalocal.test
mysql-test/suite/engines/funcs/t/rpl_loadfile.test
mysql-test/suite/engines/funcs/t/rpl_log_pos.test
mysql-test/suite/engines/funcs/t/rpl_many_optimize.test
mysql-test/suite/engines/funcs/t/rpl_master_pos_wait.test
mysql-test/suite/engines/funcs/t/rpl_misc_functions.test
mysql-test/suite/engines/funcs/t/rpl_multi_delete-slave.opt
mysql-test/suite/engines/funcs/t/rpl_multi_delete.test
mysql-test/suite/engines/funcs/t/rpl_multi_delete2-slave.opt
mysql-test/suite/engines/funcs/t/rpl_multi_delete2.test
mysql-test/suite/engines/funcs/t/rpl_multi_update4-slave.opt
mysql-test/suite/engines/funcs/t/rpl_multi_update4.test
mysql-test/suite/engines/funcs/t/rpl_ps.test
mysql-test/suite/engines/funcs/t/rpl_rbr_to_sbr.test
mysql-test/suite/engines/funcs/t/rpl_relayspace-slave.opt
mysql-test/suite/engines/funcs/t/rpl_relayspace.test
mysql-test/suite/engines/funcs/t/rpl_replicate_ignore_db-slave.opt
mysql-test/suite/engines/funcs/t/rpl_replicate_ignore_db.test
mysql-test/suite/engines/funcs/t/rpl_row_NOW.test
mysql-test/suite/engines/funcs/t/rpl_row_USER.test
mysql-test/suite/engines/funcs/t/rpl_row_drop.test
mysql-test/suite/engines/funcs/t/rpl_row_func001.test
mysql-test/suite/engines/funcs/t/rpl_row_inexist_tbl-slave.opt
mysql-test/suite/engines/funcs/t/rpl_row_inexist_tbl.test
mysql-test/suite/engines/funcs/t/rpl_row_max_relay_size.test
mysql-test/suite/engines/funcs/t/rpl_row_reset_slave.test
mysql-test/suite/engines/funcs/t/rpl_row_sp001.test
mysql-test/suite/engines/funcs/t/rpl_row_sp005.test
mysql-test/suite/engines/funcs/t/rpl_row_sp008.test
mysql-test/suite/engines/funcs/t/rpl_row_sp009.test
mysql-test/suite/engines/funcs/t/rpl_row_sp010.test
mysql-test/suite/engines/funcs/t/rpl_row_sp011.test
mysql-test/suite/engines/funcs/t/rpl_row_sp012.test
mysql-test/suite/engines/funcs/t/rpl_row_stop_middle.test
mysql-test/suite/engines/funcs/t/rpl_row_trig001.test
mysql-test/suite/engines/funcs/t/rpl_row_trig002.test
mysql-test/suite/engines/funcs/t/rpl_row_trig003.test
mysql-test/suite/engines/funcs/t/rpl_row_until.test
mysql-test/suite/engines/funcs/t/rpl_row_view01.test
mysql-test/suite/engines/funcs/t/rpl_server_id1.test
mysql-test/suite/engines/funcs/t/rpl_server_id2-slave.opt
mysql-test/suite/engines/funcs/t/rpl_server_id2.test
mysql-test/suite/engines/funcs/t/rpl_session_var.test
mysql-test/suite/engines/funcs/t/rpl_sf.test
mysql-test/suite/engines/funcs/t/rpl_skip_error-slave.opt
mysql-test/suite/engines/funcs/t/rpl_skip_error.test
mysql-test/suite/engines/funcs/t/rpl_slave_status.test
mysql-test/suite/engines/funcs/t/rpl_sp-master.opt
mysql-test/suite/engines/funcs/t/rpl_sp-slave.opt
mysql-test/suite/engines/funcs/t/rpl_sp.test
mysql-test/suite/engines/funcs/t/rpl_sp004.test
mysql-test/suite/engines/funcs/t/rpl_sp_effects-master.opt
mysql-test/suite/engines/funcs/t/rpl_sp_effects-slave.opt
mysql-test/suite/engines/funcs/t/rpl_sp_effects.test
mysql-test/suite/engines/funcs/t/rpl_start_stop_slave.test
mysql-test/suite/engines/funcs/t/rpl_stm_max_relay_size.test
mysql-test/suite/engines/funcs/t/rpl_stm_mystery22.test
mysql-test/suite/engines/funcs/t/rpl_stm_no_op.test
mysql-test/suite/engines/funcs/t/rpl_stm_reset_slave.test
mysql-test/suite/engines/funcs/t/rpl_switch_stm_row_mixed.test
mysql-test/suite/engines/funcs/t/rpl_temp_table.test
mysql-test/suite/engines/funcs/t/rpl_temporary.test
mysql-test/suite/engines/funcs/t/rpl_trigger.test
mysql-test/suite/engines/funcs/t/rpl_trunc_temp.test
mysql-test/suite/engines/funcs/t/rpl_user_variables.test
mysql-test/suite/engines/funcs/t/rpl_variables-master.opt
mysql-test/suite/engines/funcs/t/rpl_variables.test
mysql-test/suite/engines/funcs/t/rpl_view-slave.opt
mysql-test/suite/engines/funcs/t/rpl_view.test
mysql-test/suite/engines/funcs/t/se_join_cross.test
mysql-test/suite/engines/funcs/t/se_join_default.test
mysql-test/suite/engines/funcs/t/se_join_inner.test
mysql-test/suite/engines/funcs/t/se_join_left.test
mysql-test/suite/engines/funcs/t/se_join_left_outer.test
mysql-test/suite/engines/funcs/t/se_join_natural_left.test
mysql-test/suite/engines/funcs/t/se_join_natural_left_outer.test
mysql-test/suite/engines/funcs/t/se_join_natural_right.test
mysql-test/suite/engines/funcs/t/se_join_natural_right_outer.test
mysql-test/suite/engines/funcs/t/se_join_right.test
mysql-test/suite/engines/funcs/t/se_join_right_outer.test
mysql-test/suite/engines/funcs/t/se_join_straight.test
mysql-test/suite/engines/funcs/t/se_rowid.test
mysql-test/suite/engines/funcs/t/se_string_distinct.test
mysql-test/suite/engines/funcs/t/se_string_from.test
mysql-test/suite/engines/funcs/t/se_string_groupby.test
mysql-test/suite/engines/funcs/t/se_string_having.test
mysql-test/suite/engines/funcs/t/se_string_limit.test
mysql-test/suite/engines/funcs/t/se_string_orderby.test
mysql-test/suite/engines/funcs/t/se_string_union.test
mysql-test/suite/engines/funcs/t/se_string_where.test
mysql-test/suite/engines/funcs/t/se_string_where_and.test
mysql-test/suite/engines/funcs/t/se_string_where_or.test
mysql-test/suite/engines/funcs/t/sf_alter.test
mysql-test/suite/engines/funcs/t/sf_cursor.test
mysql-test/suite/engines/funcs/t/sf_simple1.test
mysql-test/suite/engines/funcs/t/sp_alter.test
mysql-test/suite/engines/funcs/t/sp_cursor.test
mysql-test/suite/engines/funcs/t/sp_simple1.test
mysql-test/suite/engines/funcs/t/sq_all.test
mysql-test/suite/engines/funcs/t/sq_any.test
mysql-test/suite/engines/funcs/t/sq_corr.test
mysql-test/suite/engines/funcs/t/sq_error.test
mysql-test/suite/engines/funcs/t/sq_exists.test
mysql-test/suite/engines/funcs/t/sq_from.test
mysql-test/suite/engines/funcs/t/sq_in.test
mysql-test/suite/engines/funcs/t/sq_row.test
mysql-test/suite/engines/funcs/t/sq_scalar.test
mysql-test/suite/engines/funcs/t/sq_some.test
mysql-test/suite/engines/funcs/t/ta_2part_column_to_pk.test
mysql-test/suite/engines/funcs/t/ta_2part_diff_string_to_pk.test
mysql-test/suite/engines/funcs/t/ta_2part_diff_to_pk.test
mysql-test/suite/engines/funcs/t/ta_2part_string_to_pk.test
mysql-test/suite/engines/funcs/t/ta_3part_column_to_pk.test
mysql-test/suite/engines/funcs/t/ta_3part_string_to_pk.test
mysql-test/suite/engines/funcs/t/ta_add_column.test
mysql-test/suite/engines/funcs/t/ta_add_column2.test
mysql-test/suite/engines/funcs/t/ta_add_column_first.test
mysql-test/suite/engines/funcs/t/ta_add_column_first2.test
mysql-test/suite/engines/funcs/t/ta_add_column_middle.test
mysql-test/suite/engines/funcs/t/ta_add_column_middle2.test
mysql-test/suite/engines/funcs/t/ta_add_string.test
mysql-test/suite/engines/funcs/t/ta_add_string2.test
mysql-test/suite/engines/funcs/t/ta_add_string_first.test
mysql-test/suite/engines/funcs/t/ta_add_string_first2.test
mysql-test/suite/engines/funcs/t/ta_add_string_middle.test
mysql-test/suite/engines/funcs/t/ta_add_string_middle2.test
mysql-test/suite/engines/funcs/t/ta_add_string_unique_index.test
mysql-test/suite/engines/funcs/t/ta_add_unique_index.test
mysql-test/suite/engines/funcs/t/ta_column_from_unsigned.test
mysql-test/suite/engines/funcs/t/ta_column_from_zerofill.test
mysql-test/suite/engines/funcs/t/ta_column_to_index.test
mysql-test/suite/engines/funcs/t/ta_column_to_not_null.test
mysql-test/suite/engines/funcs/t/ta_column_to_null.test
mysql-test/suite/engines/funcs/t/ta_column_to_pk.test
mysql-test/suite/engines/funcs/t/ta_column_to_unsigned.test
mysql-test/suite/engines/funcs/t/ta_column_to_zerofill.test
mysql-test/suite/engines/funcs/t/ta_drop_column.test
mysql-test/suite/engines/funcs/t/ta_drop_index.test
mysql-test/suite/engines/funcs/t/ta_drop_pk_autoincrement.test
mysql-test/suite/engines/funcs/t/ta_drop_pk_number.test
mysql-test/suite/engines/funcs/t/ta_drop_pk_string.test
mysql-test/suite/engines/funcs/t/ta_drop_string_index.test
mysql-test/suite/engines/funcs/t/ta_orderby.test
mysql-test/suite/engines/funcs/t/ta_rename.test
mysql-test/suite/engines/funcs/t/ta_set_drop_default.test
mysql-test/suite/engines/funcs/t/ta_string_drop_column.test
mysql-test/suite/engines/funcs/t/ta_string_to_index.test
mysql-test/suite/engines/funcs/t/ta_string_to_not_null.test
mysql-test/suite/engines/funcs/t/ta_string_to_null.test
mysql-test/suite/engines/funcs/t/ta_string_to_pk.test
mysql-test/suite/engines/funcs/t/tc_column_autoincrement.test
mysql-test/suite/engines/funcs/t/tc_column_comment.test
mysql-test/suite/engines/funcs/t/tc_column_comment_string.test
mysql-test/suite/engines/funcs/t/tc_column_default_decimal.test
mysql-test/suite/engines/funcs/t/tc_column_default_number.test
mysql-test/suite/engines/funcs/t/tc_column_default_string.test
mysql-test/suite/engines/funcs/t/tc_column_enum.test
mysql-test/suite/engines/funcs/t/tc_column_enum_long.test
mysql-test/suite/engines/funcs/t/tc_column_key.test
mysql-test/suite/engines/funcs/t/tc_column_key_length.test
mysql-test/suite/engines/funcs/t/tc_column_length.test
mysql-test/suite/engines/funcs/t/tc_column_length_decimals.test
mysql-test/suite/engines/funcs/t/tc_column_length_zero.test
mysql-test/suite/engines/funcs/t/tc_column_not_null.test
mysql-test/suite/engines/funcs/t/tc_column_null.test
mysql-test/suite/engines/funcs/t/tc_column_primary_key_number.test
mysql-test/suite/engines/funcs/t/tc_column_primary_key_string.test
mysql-test/suite/engines/funcs/t/tc_column_serial.test
mysql-test/suite/engines/funcs/t/tc_column_set.test
mysql-test/suite/engines/funcs/t/tc_column_set_long.test
mysql-test/suite/engines/funcs/t/tc_column_unique_key.test
mysql-test/suite/engines/funcs/t/tc_column_unique_key_string.test
mysql-test/suite/engines/funcs/t/tc_column_unsigned.test
mysql-test/suite/engines/funcs/t/tc_column_zerofill.test
mysql-test/suite/engines/funcs/t/tc_drop_table.test
mysql-test/suite/engines/funcs/t/tc_multicolumn_different.test
mysql-test/suite/engines/funcs/t/tc_multicolumn_same.test
mysql-test/suite/engines/funcs/t/tc_multicolumn_same_string.test
mysql-test/suite/engines/funcs/t/tc_partition_analyze.test
mysql-test/suite/engines/funcs/t/tc_partition_change_from_range_to_hash_key.test
mysql-test/suite/engines/funcs/t/tc_partition_check.test
mysql-test/suite/engines/funcs/t/tc_partition_hash.test
mysql-test/suite/engines/funcs/t/tc_partition_hash_date_function.test
mysql-test/suite/engines/funcs/t/tc_partition_key.test
mysql-test/suite/engines/funcs/t/tc_partition_linear_key.test
mysql-test/suite/engines/funcs/t/tc_partition_list_directory.test
mysql-test/suite/engines/funcs/t/tc_partition_list_error.test
mysql-test/suite/engines/funcs/t/tc_partition_optimize.test
mysql-test/suite/engines/funcs/t/tc_partition_rebuild.test
mysql-test/suite/engines/funcs/t/tc_partition_remove.test
mysql-test/suite/engines/funcs/t/tc_partition_reorg_divide.test
mysql-test/suite/engines/funcs/t/tc_partition_reorg_hash_key.test
mysql-test/suite/engines/funcs/t/tc_partition_reorg_merge.test
mysql-test/suite/engines/funcs/t/tc_partition_repair.test
mysql-test/suite/engines/funcs/t/tc_partition_sub1.test
mysql-test/suite/engines/funcs/t/tc_partition_sub2.test
mysql-test/suite/engines/funcs/t/tc_partition_value.test
mysql-test/suite/engines/funcs/t/tc_partition_value_error.test
mysql-test/suite/engines/funcs/t/tc_partition_value_specific.test
mysql-test/suite/engines/funcs/t/tc_rename.test
mysql-test/suite/engines/funcs/t/tc_rename_across_database.test
mysql-test/suite/engines/funcs/t/tc_rename_error.test
mysql-test/suite/engines/funcs/t/tc_structure_comment.test
mysql-test/suite/engines/funcs/t/tc_structure_create_like.test
mysql-test/suite/engines/funcs/t/tc_structure_create_like_string.test
mysql-test/suite/engines/funcs/t/tc_structure_create_select.test
mysql-test/suite/engines/funcs/t/tc_structure_create_select_string.test
mysql-test/suite/engines/funcs/t/tc_structure_string_comment.test
mysql-test/suite/engines/funcs/t/tc_temporary_column.test
mysql-test/suite/engines/funcs/t/tc_temporary_column_length.test
mysql-test/suite/engines/funcs/t/time_function.test
mysql-test/suite/engines/funcs/t/tr_all_type_triggers.test
mysql-test/suite/engines/funcs/t/tr_delete.test
mysql-test/suite/engines/funcs/t/tr_delete_new_error.test
mysql-test/suite/engines/funcs/t/tr_insert.test
mysql-test/suite/engines/funcs/t/tr_insert_after_error.test
mysql-test/suite/engines/funcs/t/tr_insert_old_error.test
mysql-test/suite/engines/funcs/t/tr_update.test
mysql-test/suite/engines/funcs/t/tr_update_after_error.test
mysql-test/suite/engines/funcs/t/up_calendar_range.test
mysql-test/suite/engines/funcs/t/up_ignore.test
mysql-test/suite/engines/funcs/t/up_limit.test
mysql-test/suite/engines/funcs/t/up_multi_db_table.test
mysql-test/suite/engines/funcs/t/up_multi_table.test
mysql-test/suite/engines/funcs/t/up_nullcheck.test
mysql-test/suite/engines/funcs/t/up_number_range.test
mysql-test/suite/engines/funcs/t/up_string_range.test
mysql-test/suite/engines/funcs/t/wait_show_pattern.inc
mysql-test/suite/engines/funcs/t/wait_slave_status.inc
mysql-test/suite/engines/iuds/
mysql-test/suite/engines/iuds/r/
mysql-test/suite/engines/iuds/r/delete_decimal.result
mysql-test/suite/engines/iuds/r/delete_time.result
mysql-test/suite/engines/iuds/r/delete_year.result
mysql-test/suite/engines/iuds/r/insert_calendar.result
mysql-test/suite/engines/iuds/r/insert_decimal.result
mysql-test/suite/engines/iuds/r/insert_number.result
mysql-test/suite/engines/iuds/r/insert_time.result
mysql-test/suite/engines/iuds/r/insert_year.result
mysql-test/suite/engines/iuds/r/strings_charsets_update_delete.result
mysql-test/suite/engines/iuds/r/strings_update_delete.result
mysql-test/suite/engines/iuds/r/type_bit_iuds.result
mysql-test/suite/engines/iuds/r/update_decimal.result
mysql-test/suite/engines/iuds/r/update_delete_calendar.result
mysql-test/suite/engines/iuds/r/update_delete_number.result
mysql-test/suite/engines/iuds/r/update_time.result
mysql-test/suite/engines/iuds/r/update_year.result
mysql-test/suite/engines/iuds/t/
mysql-test/suite/engines/iuds/t/delete_decimal.test
mysql-test/suite/engines/iuds/t/delete_time.test
mysql-test/suite/engines/iuds/t/delete_year.test
mysql-test/suite/engines/iuds/t/disabled.def
mysql-test/suite/engines/iuds/t/hindi.txt
mysql-test/suite/engines/iuds/t/insert_calendar.test
mysql-test/suite/engines/iuds/t/insert_decimal.test
mysql-test/suite/engines/iuds/t/insert_number.test
mysql-test/suite/engines/iuds/t/insert_time.test
mysql-test/suite/engines/iuds/t/insert_year.test
mysql-test/suite/engines/iuds/t/sample.txt
mysql-test/suite/engines/iuds/t/strings_charsets_update_delete.test
mysql-test/suite/engines/iuds/t/strings_update_delete.test
mysql-test/suite/engines/iuds/t/type_bit_iuds.test
mysql-test/suite/engines/iuds/t/update_decimal.test
mysql-test/suite/engines/iuds/t/update_delete_calendar.test
mysql-test/suite/engines/iuds/t/update_delete_number.test
mysql-test/suite/engines/iuds/t/update_time.test
mysql-test/suite/engines/iuds/t/update_year.test
mysql-test/suite/engines/rr_trx/
mysql-test/suite/engines/rr_trx/check_consistency.sql
mysql-test/suite/engines/rr_trx/include/
mysql-test/suite/engines/rr_trx/include/check_for_error_rollback.inc
mysql-test/suite/engines/rr_trx/include/check_for_error_rollback_skip.inc
mysql-test/suite/engines/rr_trx/include/check_repeatable_read_all_columns.inc
mysql-test/suite/engines/rr_trx/include/record_query_all_columns.inc
mysql-test/suite/engines/rr_trx/include/rr_init.test
mysql-test/suite/engines/rr_trx/init_innodb.txt
mysql-test/suite/engines/rr_trx/r/
mysql-test/suite/engines/rr_trx/r/init_innodb.result
mysql-test/suite/engines/rr_trx/r/rr_c_count_not_zero.result
mysql-test/suite/engines/rr_trx/r/rr_c_stats.result
mysql-test/suite/engines/rr_trx/r/rr_i_40-44.result
mysql-test/suite/engines/rr_trx/r/rr_id_3.result
mysql-test/suite/engines/rr_trx/r/rr_id_900.result
mysql-test/suite/engines/rr_trx/r/rr_insert_select_2.result
mysql-test/suite/engines/rr_trx/r/rr_iud_rollback-multi-50.result
mysql-test/suite/engines/rr_trx/r/rr_replace_7-8.result
mysql-test/suite/engines/rr_trx/r/rr_s_select-uncommitted.result
mysql-test/suite/engines/rr_trx/r/rr_sc_select-limit-nolimit_4.result
mysql-test/suite/engines/rr_trx/r/rr_sc_select-same_2.result
mysql-test/suite/engines/rr_trx/r/rr_sc_sum_total.result
mysql-test/suite/engines/rr_trx/r/rr_u_10-19.result
mysql-test/suite/engines/rr_trx/r/rr_u_10-19_nolimit.result
mysql-test/suite/engines/rr_trx/r/rr_u_4.result
mysql-test/suite/engines/rr_trx/run.txt
mysql-test/suite/engines/rr_trx/run_stress_tx_rr.pl
mysql-test/suite/engines/rr_trx/t/
mysql-test/suite/engines/rr_trx/t/init_innodb.test
mysql-test/suite/engines/rr_trx/t/rr_c_count_not_zero.test
mysql-test/suite/engines/rr_trx/t/rr_c_stats.test
mysql-test/suite/engines/rr_trx/t/rr_i_40-44.test
mysql-test/suite/engines/rr_trx/t/rr_id_3.test
mysql-test/suite/engines/rr_trx/t/rr_id_900.test
mysql-test/suite/engines/rr_trx/t/rr_insert_select_2.test
mysql-test/suite/engines/rr_trx/t/rr_iud_rollback-multi-50.test
mysql-test/suite/engines/rr_trx/t/rr_replace_7-8.test
mysql-test/suite/engines/rr_trx/t/rr_s_select-uncommitted.test
mysql-test/suite/engines/rr_trx/t/rr_sc_select-limit-nolimit_4.test
mysql-test/suite/engines/rr_trx/t/rr_sc_select-same_2.test
mysql-test/suite/engines/rr_trx/t/rr_sc_sum_total.test
mysql-test/suite/engines/rr_trx/t/rr_u_10-19.test
mysql-test/suite/engines/rr_trx/t/rr_u_10-19_nolimit.test
mysql-test/suite/engines/rr_trx/t/rr_u_4.test
mysql-test/suite/innodb/r/innodb_bug47622.result
mysql-test/suite/innodb/r/innodb_bug51378.result
mysql-test/suite/innodb/t/innodb_bug47622.test
mysql-test/suite/innodb/t/innodb_bug51378.test
mysql-test/suite/pbxt/r/pbxt_xa.result
mysql-test/suite/pbxt/t/multi_statement-master.opt
mysql-test/suite/pbxt/t/pbxt_xa.test
mysql-test/suite/pbxt/t/suite.opt
mysql-test/suite/rpl/r/rpl_show_slave_running.result
mysql-test/suite/rpl/r/rpl_slow_query_log.result
mysql-test/suite/rpl/r/rpl_stm_sql_mode.result
mysql-test/suite/rpl/r/rpl_typeconv_innodb.result
mysql-test/suite/rpl/t/rpl_begin_commit_rollback-master.opt
mysql-test/suite/rpl/t/rpl_show_slave_running.test
mysql-test/suite/rpl/t/rpl_slow_query_log-slave.opt
mysql-test/suite/rpl/t/rpl_slow_query_log.test
mysql-test/suite/rpl/t/rpl_stm_sql_mode.test
mysql-test/suite/rpl/t/rpl_typeconv-slave.opt
mysql-test/suite/rpl/t/rpl_typeconv_innodb.test
mysql-test/t/bug39022.test
mysql-test/t/innodb_bug47621.test
mysql-test/t/log_tables_upgrade.test
mysql-test/t/no_binlog.test
mysql-test/t/partition_debug_sync.test
mysql-test/t/plugin_not_embedded-master.opt
mysql-test/t/plugin_not_embedded.test
mysql-test/t/view_alias.test
storage/innodb_plugin/include/ut0rbt.h
storage/innodb_plugin/ut/ut0rbt.c
storage/xtradb/build/
storage/xtradb/build/debian/
storage/xtradb/build/debian/README.Maintainer
storage/xtradb/build/debian/additions/
storage/xtradb/build/debian/additions/Docs__Images__Makefile.in
storage/xtradb/build/debian/additions/Docs__Makefile.in
storage/xtradb/build/debian/additions/debian-start
storage/xtradb/build/debian/additions/debian-start.inc.sh
storage/xtradb/build/debian/additions/echo_stderr
storage/xtradb/build/debian/additions/innotop/
storage/xtradb/build/debian/additions/innotop/InnoDBParser.pm
storage/xtradb/build/debian/additions/innotop/changelog.innotop
storage/xtradb/build/debian/additions/innotop/innotop
storage/xtradb/build/debian/additions/innotop/innotop.1
storage/xtradb/build/debian/additions/msql2mysql.1
storage/xtradb/build/debian/additions/my.cnf
storage/xtradb/build/debian/additions/my_print_defaults.1
storage/xtradb/build/debian/additions/myisam_ftdump.1
storage/xtradb/build/debian/additions/myisamchk.1
storage/xtradb/build/debian/additions/myisamlog.1
storage/xtradb/build/debian/additions/myisampack.1
storage/xtradb/build/debian/additions/mysql-server.lintian-overrides
storage/xtradb/build/debian/additions/mysql_config.1
storage/xtradb/build/debian/additions/mysql_convert_table_format.1
storage/xtradb/build/debian/additions/mysql_find_rows.1
storage/xtradb/build/debian/additions/mysql_fix_extensions.1
storage/xtradb/build/debian/additions/mysql_install_db.1
storage/xtradb/build/debian/additions/mysql_secure_installation.1
storage/xtradb/build/debian/additions/mysql_setpermission.1
storage/xtradb/build/debian/additions/mysql_tableinfo.1
storage/xtradb/build/debian/additions/mysql_waitpid.1
storage/xtradb/build/debian/additions/mysqlbinlog.1
storage/xtradb/build/debian/additions/mysqlbug.1
storage/xtradb/build/debian/additions/mysqlcheck.1
storage/xtradb/build/debian/additions/mysqld_safe_syslog.cnf
storage/xtradb/build/debian/additions/mysqldumpslow.1
storage/xtradb/build/debian/additions/mysqlimport.1
storage/xtradb/build/debian/additions/mysqlmanager.1
storage/xtradb/build/debian/additions/mysqlreport
storage/xtradb/build/debian/additions/mysqlreport.1
storage/xtradb/build/debian/additions/mysqltest.1
storage/xtradb/build/debian/additions/pack_isam.1
storage/xtradb/build/debian/additions/resolve_stack_dump.1
storage/xtradb/build/debian/additions/resolveip.1
storage/xtradb/build/debian/changelog
storage/xtradb/build/debian/compat
storage/xtradb/build/debian/control
storage/xtradb/build/debian/copyright
storage/xtradb/build/debian/libpercona-xtradb-client-dev.README.Maintainer
storage/xtradb/build/debian/libpercona-xtradb-client-dev.dirs
storage/xtradb/build/debian/libpercona-xtradb-client-dev.docs
storage/xtradb/build/debian/libpercona-xtradb-client-dev.examples
storage/xtradb/build/debian/libpercona-xtradb-client-dev.files
storage/xtradb/build/debian/libpercona-xtradb-client-dev.links
storage/xtradb/build/debian/libpercona-xtradb-client16.dirs
storage/xtradb/build/debian/libpercona-xtradb-client16.docs
storage/xtradb/build/debian/libpercona-xtradb-client16.files
storage/xtradb/build/debian/libpercona-xtradb-client16.postinst
storage/xtradb/build/debian/patches/
storage/xtradb/build/debian/patches/00list
storage/xtradb/build/debian/patches/01_MAKEFILES__Docs_Images_Makefile.in.dpatch
storage/xtradb/build/debian/patches/01_MAKEFILES__Docs_Makefile.in.dpatch
storage/xtradb/build/debian/patches/33_scripts__mysql_create_system_tables__no_test.dpatch
storage/xtradb/build/debian/patches/38_scripts__mysqld_safe.sh__signals.dpatch
storage/xtradb/build/debian/patches/41_scripts__mysql_install_db.sh__no_test.dpatch
storage/xtradb/build/debian/patches/44_scripts__mysql_config__libs.dpatch
storage/xtradb/build/debian/patches/50_mysql-test__db_test.dpatch
storage/xtradb/build/debian/patches/60_percona_support.dpatch
storage/xtradb/build/debian/percona-xtradb-client-5.1.README.Debian
storage/xtradb/build/debian/percona-xtradb-client-5.1.dirs
storage/xtradb/build/debian/percona-xtradb-client-5.1.docs
storage/xtradb/build/debian/percona-xtradb-client-5.1.files
storage/xtradb/build/debian/percona-xtradb-client-5.1.links
storage/xtradb/build/debian/percona-xtradb-client-5.1.lintian-overrides
storage/xtradb/build/debian/percona-xtradb-client-5.1.menu
storage/xtradb/build/debian/percona-xtradb-common.dirs
storage/xtradb/build/debian/percona-xtradb-common.files
storage/xtradb/build/debian/percona-xtradb-common.lintian-overrides
storage/xtradb/build/debian/percona-xtradb-common.postrm
storage/xtradb/build/debian/percona-xtradb-server-5.1.NEWS
storage/xtradb/build/debian/percona-xtradb-server-5.1.README.Debian
storage/xtradb/build/debian/percona-xtradb-server-5.1.config
storage/xtradb/build/debian/percona-xtradb-server-5.1.dirs
storage/xtradb/build/debian/percona-xtradb-server-5.1.docs
storage/xtradb/build/debian/percona-xtradb-server-5.1.files
storage/xtradb/build/debian/percona-xtradb-server-5.1.links
storage/xtradb/build/debian/percona-xtradb-server-5.1.lintian-overrides
storage/xtradb/build/debian/percona-xtradb-server-5.1.logcheck.ignore.paranoid
storage/xtradb/build/debian/percona-xtradb-server-5.1.logcheck.ignore.server
storage/xtradb/build/debian/percona-xtradb-server-5.1.logcheck.ignore.workstation
storage/xtradb/build/debian/percona-xtradb-server-5.1.mysql.init
storage/xtradb/build/debian/percona-xtradb-server-5.1.percona-xtradb-server.logrotate
storage/xtradb/build/debian/percona-xtradb-server-5.1.postinst
storage/xtradb/build/debian/percona-xtradb-server-5.1.postrm
storage/xtradb/build/debian/percona-xtradb-server-5.1.preinst
storage/xtradb/build/debian/percona-xtradb-server-5.1.prerm
storage/xtradb/build/debian/percona-xtradb-server-5.1.templates
storage/xtradb/build/debian/po/
storage/xtradb/build/debian/po/POTFILES.in
storage/xtradb/build/debian/po/ar.po
storage/xtradb/build/debian/po/ca.po
storage/xtradb/build/debian/po/cs.po
storage/xtradb/build/debian/po/da.po
storage/xtradb/build/debian/po/de.po
storage/xtradb/build/debian/po/es.po
storage/xtradb/build/debian/po/eu.po
storage/xtradb/build/debian/po/fr.po
storage/xtradb/build/debian/po/gl.po
storage/xtradb/build/debian/po/it.po
storage/xtradb/build/debian/po/ja.po
storage/xtradb/build/debian/po/nb.po
storage/xtradb/build/debian/po/nl.po
storage/xtradb/build/debian/po/pt.po
storage/xtradb/build/debian/po/pt_BR.po
storage/xtradb/build/debian/po/ro.po
storage/xtradb/build/debian/po/ru.po
storage/xtradb/build/debian/po/sv.po
storage/xtradb/build/debian/po/templates.pot
storage/xtradb/build/debian/po/tr.po
storage/xtradb/build/debian/rules
storage/xtradb/build/debian/source.lintian-overrides
storage/xtradb/build/debian/watch
storage/xtradb/build/percona-sql.spec
renamed:
mysql-test/r/variables+c.result => mysql-test/r/variables_community.result
mysql-test/t/variables+c.test => mysql-test/t/variables_community.test
modified:
BUILD/SETUP.sh
BUILD/compile-solaris-sparc
COPYING
INSTALL-SOURCE
INSTALL-WIN-SOURCE
client/mysql.cc
client/mysql_upgrade.c
client/mysqladmin.cc
client/mysqlbinlog.cc
client/mysqlcheck.c
client/mysqldump.c
client/mysqlimport.c
client/mysqlshow.c
client/mysqlslap.c
client/mysqltest.cc
cmd-line-utils/libedit/filecomplete.c
cmd-line-utils/readline/rlmbutil.h
extra/comp_err.c
extra/libevent/event-internal.h
extra/libevent/kqueue.c
extra/yassl/include/yassl_error.hpp
extra/yassl/src/ssl.cpp
extra/yassl/src/yassl_error.cpp
extra/yassl/taocrypt/src/algebra.cpp
include/my_sys.h
include/mysql/plugin.h
libmysql/libmysql.c
man/comp_err.1
man/innochecksum.1
man/make_win_bin_dist.1
man/msql2mysql.1
man/my_print_defaults.1
man/myisam_ftdump.1
man/myisamchk.1
man/myisamlog.1
man/myisampack.1
man/mysql-stress-test.pl.1
man/mysql-test-run.pl.1
man/mysql.1
man/mysql.server.1
man/mysql_client_test.1
man/mysql_config.1
man/mysql_convert_table_format.1
man/mysql_find_rows.1
man/mysql_fix_extensions.1
man/mysql_fix_privilege_tables.1
man/mysql_install_db.1
man/mysql_secure_installation.1
man/mysql_setpermission.1
man/mysql_tzinfo_to_sql.1
man/mysql_upgrade.1
man/mysql_waitpid.1
man/mysql_zap.1
man/mysqlaccess.1
man/mysqladmin.1
man/mysqlbinlog.1
man/mysqlbug.1
man/mysqlcheck.1
man/mysqld.8
man/mysqld_multi.1
man/mysqld_safe.1
man/mysqldump.1
man/mysqldumpslow.1
man/mysqlhotcopy.1
man/mysqlimport.1
man/mysqlmanager.8
man/mysqlshow.1
man/mysqlslap.1
man/mysqltest.1
man/ndbd.8
man/ndbd_redo_log_reader.1
man/ndbmtd.8
man/perror.1
man/replace.1
man/resolve_stack_dump.1
man/resolveip.1
mysql-test/Makefile.am
mysql-test/collections/default.daily
mysql-test/collections/default.push
mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
mysql-test/extra/rpl_tests/rpl_insert_id_pk.test
mysql-test/extra/rpl_tests/rpl_loaddata.test
mysql-test/extra/rpl_tests/rpl_tmp_table_and_DDL.test
mysql-test/include/default_mysqld.cnf
mysql-test/include/maria_empty_logs.inc
mysql-test/include/mtr_warnings.sql
mysql-test/include/test_fieldsize.inc
mysql-test/lib/My/ConfigFactory.pm
mysql-test/lib/My/SafeProcess.pm
mysql-test/lib/My/SafeProcess/safe_process.cc
mysql-test/lib/My/SafeProcess/safe_process_win.cc
mysql-test/lib/mtr_cases.pm
mysql-test/lib/mtr_gprof.pl
mysql-test/lib/mtr_misc.pl
mysql-test/lib/mtr_report.pm
mysql-test/lib/mtr_stress.pl
mysql-test/lib/v1/mtr_stress.pl
mysql-test/lib/v1/mysql-test-run.pl
mysql-test/mysql-stress-test.pl
mysql-test/mysql-test-run.pl
mysql-test/r/archive.result
mysql-test/r/backup.result
mysql-test/r/bigint.result
mysql-test/r/compare.result
mysql-test/r/csv.result
mysql-test/r/ctype_ldml.result
mysql-test/r/default.result
mysql-test/r/delete.result
mysql-test/r/explain.result
mysql-test/r/fulltext.result
mysql-test/r/func_concat.result
mysql-test/r/func_gconcat.result
mysql-test/r/func_str.result
mysql-test/r/func_time.result
mysql-test/r/gis-rtree.result
mysql-test/r/grant.result
mysql-test/r/group_by.result
mysql-test/r/group_min_max.result
mysql-test/r/handler_myisam.result
mysql-test/r/having.result
mysql-test/r/information_schema.result
mysql-test/r/innodb-autoinc.result
mysql-test/r/innodb_mysql.result
mysql-test/r/join.result
mysql-test/r/join_outer.result
mysql-test/r/join_outer_jcl6.result
mysql-test/r/loaddata.result*
mysql-test/r/log_state.result
mysql-test/r/merge.result
mysql-test/r/metadata.result
mysql-test/r/multi_update.result
mysql-test/r/myisam.result
mysql-test/r/mysqlbinlog.result
mysql-test/r/mysqlbinlog_row_innodb.result
mysql-test/r/mysqltest.result
mysql-test/r/partition.result
mysql-test/r/partition_error.result
mysql-test/r/partition_innodb.result
mysql-test/r/partition_pruning.result
mysql-test/r/partition_range.result
mysql-test/r/ps.result
mysql-test/r/query_cache_with_views.result
mysql-test/r/select.result
mysql-test/r/select_jcl6.result
mysql-test/r/show_check.result
mysql-test/r/skip_name_resolve.result
mysql-test/r/sp-bugs.result
mysql-test/r/sp-error.result
mysql-test/r/sp.result
mysql-test/r/sp_notembedded.result
mysql-test/r/sp_trans.result
mysql-test/r/subselect.result
mysql-test/r/subselect3.result
mysql-test/r/symlink.result
mysql-test/r/table_elim.result
mysql-test/r/trigger.result
mysql-test/r/type_bit.result
mysql-test/r/type_blob.result
mysql-test/r/type_date.result
mysql-test/r/type_timestamp.result
mysql-test/r/type_year.result
mysql-test/r/union.result
mysql-test/r/update.result
mysql-test/r/variables.result
mysql-test/r/view.result
mysql-test/r/view_grant.result
mysql-test/r/warnings.result
mysql-test/r/xa.result
mysql-test/suite/binlog/r/binlog_index.result
mysql-test/suite/binlog/r/binlog_innodb_row.result
mysql-test/suite/binlog/r/binlog_row_mix_innodb_myisam.result
mysql-test/suite/binlog/r/binlog_stm_mix_innodb_myisam.result
mysql-test/suite/binlog/r/binlog_tmp_table.result
mysql-test/suite/binlog/t/binlog_index.test
mysql-test/suite/binlog/t/binlog_innodb_row.test
mysql-test/suite/binlog/t/binlog_tmp_table.test
mysql-test/suite/federated/federated.result
mysql-test/suite/federated/federated.test
mysql-test/suite/funcs_1/datadict/processlist_priv.inc
mysql-test/suite/funcs_1/r/is_columns_is.result
mysql-test/suite/funcs_1/r/is_tables_is.result
mysql-test/suite/innodb/r/innodb-index.result
mysql-test/suite/innodb/r/innodb_bug44571.result
mysql-test/suite/innodb/t/innodb-consistent.test
mysql-test/suite/innodb/t/innodb-index.test
mysql-test/suite/innodb/t/innodb_bug44571.test
mysql-test/suite/maria/t/maria-recovery-bitmap.test
mysql-test/suite/parts/inc/partition_auto_increment.inc
mysql-test/suite/parts/r/partition_auto_increment_archive.result
mysql-test/suite/parts/r/partition_auto_increment_blackhole.result
mysql-test/suite/parts/r/partition_auto_increment_innodb.result
mysql-test/suite/parts/r/partition_auto_increment_maria.result
mysql-test/suite/parts/r/partition_auto_increment_memory.result
mysql-test/suite/parts/r/partition_auto_increment_myisam.result
mysql-test/suite/parts/r/partition_auto_increment_ndb.result
mysql-test/suite/parts/t/rpl_partition.test
mysql-test/suite/pbxt/r/default.result
mysql-test/suite/pbxt/r/func_str.result
mysql-test/suite/pbxt/r/group_min_max.result
mysql-test/suite/pbxt/r/join_nested.result
mysql-test/suite/pbxt/r/multi_statement.result
mysql-test/suite/pbxt/r/mysqlshow.result
mysql-test/suite/pbxt/r/negation_elimination.result
mysql-test/suite/pbxt/r/null.result
mysql-test/suite/pbxt/r/order_by.result
mysql-test/suite/pbxt/r/pbxt_ref_int.result
mysql-test/suite/pbxt/r/range.result
mysql-test/suite/pbxt/r/type_timestamp.result
mysql-test/suite/pbxt/t/status.test
mysql-test/suite/rpl/r/rpl_begin_commit_rollback.result
mysql-test/suite/rpl/r/rpl_do_grant.result
mysql-test/suite/rpl/r/rpl_events.result
mysql-test/suite/rpl/r/rpl_get_master_version_and_clock.result
mysql-test/suite/rpl/r/rpl_innodb_mixed_dml.result
mysql-test/suite/rpl/r/rpl_optimize.result
mysql-test/suite/rpl/r/rpl_row_create_table.result
mysql-test/suite/rpl/r/rpl_sp.result
mysql-test/suite/rpl/t/disabled.def
mysql-test/suite/rpl/t/rpl_begin_commit_rollback.test
mysql-test/suite/rpl/t/rpl_do_grant.test
mysql-test/suite/rpl/t/rpl_events.test
mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
mysql-test/suite/rpl/t/rpl_loaddata_symlink.test
mysql-test/suite/rpl/t/rpl_name_const.test
mysql-test/suite/rpl/t/rpl_optimize.test
mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test
mysql-test/suite/rpl/t/rpl_row_create_table.test
mysql-test/suite/rpl/t/rpl_row_trig003.test
mysql-test/suite/rpl/t/rpl_slave_skip.test
mysql-test/suite/sys_vars/r/log_basic.result
mysql-test/suite/sys_vars/r/log_bin_trust_routine_creators_basic.result
mysql-test/suite/sys_vars/r/myisam_sort_buffer_size_basic_32.result
mysql-test/suite/sys_vars/r/myisam_sort_buffer_size_basic_64.result
mysql-test/suite/sys_vars/r/slow_query_log_func.result
mysql-test/suite/sys_vars/t/innodb_table_locks_func.test
mysql-test/suite/sys_vars/t/slow_query_log_func.test
mysql-test/suite/sys_vars/t/sql_low_priority_updates_func.test
mysql-test/t/archive.test
mysql-test/t/bigint.test
mysql-test/t/bug47671-master.opt
mysql-test/t/csv.test
mysql-test/t/ctype_latin1_de-master.opt
mysql-test/t/ctype_ldml.test
mysql-test/t/ctype_ucs2_def-master.opt
mysql-test/t/delete.test
mysql-test/t/explain.test
mysql-test/t/fulltext.test
mysql-test/t/func_concat.test
mysql-test/t/func_gconcat.test
mysql-test/t/func_str.test
mysql-test/t/func_time.test
mysql-test/t/gis-rtree.test
mysql-test/t/grant.test
mysql-test/t/group_by.test
mysql-test/t/group_min_max.test
mysql-test/t/handler_myisam.test
mysql-test/t/having.test
mysql-test/t/innodb-autoinc.test
mysql-test/t/innodb_bug38231.test
mysql-test/t/innodb_mysql.test
mysql-test/t/join.test
mysql-test/t/join_outer.test
mysql-test/t/loaddata.test
mysql-test/t/merge.test
mysql-test/t/metadata.test
mysql-test/t/multi_update.test
mysql-test/t/myisam.test
mysql-test/t/mysql_upgrade.test
mysql-test/t/mysqlbinlog.test
mysql-test/t/mysqltest.test
mysql-test/t/partition.test
mysql-test/t/partition_error.test
mysql-test/t/partition_innodb.test
mysql-test/t/partition_innodb_semi_consistent.test
mysql-test/t/partition_pruning.test
mysql-test/t/partition_range.test
mysql-test/t/query_cache_with_views.test
mysql-test/t/skip_name_resolve.test
mysql-test/t/sp-bugs.test
mysql-test/t/sp_notembedded.test
mysql-test/t/subselect.test
mysql-test/t/symlink.test
mysql-test/t/trigger.test
mysql-test/t/type_bit.test
mysql-test/t/type_date.test
mysql-test/t/type_year.test
mysql-test/t/udf.test
mysql-test/t/update.test
mysql-test/t/variables.test
mysql-test/t/view.test
mysql-test/t/view_grant.test
mysql-test/t/xa.test
mysys/charset.c
mysys/default.c
mysys/mf_keycache.c
mysys/mf_pack.c
mysys/my_gethostbyname.c
mysys/my_init.c
scripts/fill_help_tables.sql
scripts/mysql_system_tables_fix.sql
scripts/mysqld_multi.sh
server-tools/instance-manager/options.cc
sql-common/client.c
sql/debug_sync.cc
sql/debug_sync.h
sql/events.cc
sql/field.cc
sql/field.h
sql/field_conv.cc
sql/ha_partition.cc
sql/handler.cc
sql/hash_filo.cc
sql/item.cc
sql/item.h
sql/item_cmpfunc.cc
sql/item_cmpfunc.h
sql/item_create.cc
sql/item_create.h
sql/item_func.cc
sql/item_row.cc
sql/item_row.h
sql/item_strfunc.cc
sql/item_strfunc.h
sql/item_subselect.cc
sql/item_sum.cc
sql/item_sum.h
sql/item_timefunc.cc
sql/log.cc
sql/log_event.cc
sql/log_event.h
sql/log_event_old.cc
sql/mf_iocache.cc
sql/mysql_priv.h
sql/mysqld.cc
sql/net_serv.cc
sql/opt_range.cc
sql/opt_sum.cc
sql/partition_info.cc
sql/protocol.cc
sql/repl_failsafe.cc
sql/rpl_utility.cc
sql/rpl_utility.h
sql/set_var.cc
sql/share/errmsg.txt
sql/slave.cc
sql/sp.cc
sql/sp_cache.cc
sql/sp_head.cc
sql/sql_acl.cc
sql/sql_base.cc
sql/sql_class.cc
sql/sql_class.h
sql/sql_delete.cc
sql/sql_insert.cc
sql/sql_lex.cc
sql/sql_lex.h
sql/sql_load.cc
sql/sql_parse.cc
sql/sql_partition.cc
sql/sql_plugin.cc
sql/sql_profile.cc
sql/sql_repl.cc
sql/sql_select.cc
sql/sql_select.h
sql/sql_show.cc
sql/sql_table.cc
sql/sql_trigger.cc
sql/sql_update.cc
sql/sql_view.cc
sql/sql_yacc.yy
sql/table.cc
sql/table.h
storage/archive/ha_archive.cc
storage/csv/ha_tina.cc
storage/example/ha_example.h
storage/federated/ha_federated.cc
storage/federated/ha_federated.h
storage/innobase/buf/buf0buf.c
storage/innobase/buf/buf0rea.c
storage/innobase/handler/ha_innodb.cc
storage/innobase/include/buf0rea.h
storage/innobase/lock/lock0lock.c
storage/innobase/os/os0file.c
storage/innobase/plug.in.disabled
storage/innobase/row/row0sel.c
storage/innobase/trx/trx0sys.c
storage/innodb_plugin/CMakeLists.txt
storage/innodb_plugin/ChangeLog
storage/innodb_plugin/Makefile.am
storage/innodb_plugin/btr/btr0btr.c
storage/innodb_plugin/btr/btr0cur.c
storage/innodb_plugin/btr/btr0pcur.c
storage/innodb_plugin/buf/buf0buddy.c
storage/innodb_plugin/buf/buf0buf.c
storage/innodb_plugin/buf/buf0flu.c
storage/innodb_plugin/buf/buf0lru.c
storage/innodb_plugin/buf/buf0rea.c
storage/innodb_plugin/dict/dict0boot.c
storage/innodb_plugin/dict/dict0crea.c
storage/innodb_plugin/dict/dict0dict.c
storage/innodb_plugin/dict/dict0load.c
storage/innodb_plugin/dict/dict0mem.c
storage/innodb_plugin/fil/fil0fil.c
storage/innodb_plugin/fsp/fsp0fsp.c
storage/innodb_plugin/ha/ha0ha.c
storage/innodb_plugin/ha/hash0hash.c
storage/innodb_plugin/handler/ha_innodb.cc
storage/innodb_plugin/handler/ha_innodb.h
storage/innodb_plugin/handler/handler0alter.cc
storage/innodb_plugin/ibuf/ibuf0ibuf.c
storage/innodb_plugin/include/btr0btr.h
storage/innodb_plugin/include/btr0btr.ic
storage/innodb_plugin/include/btr0cur.h
storage/innodb_plugin/include/btr0pcur.h
storage/innodb_plugin/include/btr0pcur.ic
storage/innodb_plugin/include/buf0buf.h
storage/innodb_plugin/include/buf0buf.ic
storage/innodb_plugin/include/buf0flu.h
storage/innodb_plugin/include/data0type.ic
storage/innodb_plugin/include/dict0boot.h
storage/innodb_plugin/include/dict0mem.h
storage/innodb_plugin/include/fil0fil.h
storage/innodb_plugin/include/hash0hash.h
storage/innodb_plugin/include/hash0hash.ic
storage/innodb_plugin/include/lock0lock.h
storage/innodb_plugin/include/log0log.h
storage/innodb_plugin/include/log0log.ic
storage/innodb_plugin/include/log0recv.h
storage/innodb_plugin/include/mem0dbg.h
storage/innodb_plugin/include/mem0dbg.ic
storage/innodb_plugin/include/mem0mem.h
storage/innodb_plugin/include/mem0mem.ic
storage/innodb_plugin/include/mtr0mtr.ic
storage/innodb_plugin/include/os0file.h
storage/innodb_plugin/include/que0que.h
storage/innodb_plugin/include/que0que.ic
storage/innodb_plugin/include/row0mysql.h
storage/innodb_plugin/include/row0sel.h
storage/innodb_plugin/include/srv0srv.h
storage/innodb_plugin/include/sync0rw.h
storage/innodb_plugin/include/sync0sync.h
storage/innodb_plugin/include/trx0rseg.h
storage/innodb_plugin/include/trx0sys.h
storage/innodb_plugin/include/trx0trx.h
storage/innodb_plugin/include/trx0types.h
storage/innodb_plugin/include/univ.i
storage/innodb_plugin/include/ut0rnd.ic
storage/innodb_plugin/lock/lock0lock.c
storage/innodb_plugin/log/log0log.c
storage/innodb_plugin/log/log0recv.c
storage/innodb_plugin/mem/mem0dbg.c
storage/innodb_plugin/mem/mem0mem.c
storage/innodb_plugin/os/os0file.c
storage/innodb_plugin/page/page0page.c
storage/innodb_plugin/plug.in.disabled
storage/innodb_plugin/rem/rem0rec.c
storage/innodb_plugin/row/row0ins.c
storage/innodb_plugin/row/row0merge.c
storage/innodb_plugin/row/row0mysql.c
storage/innodb_plugin/row/row0row.c
storage/innodb_plugin/row/row0sel.c
storage/innodb_plugin/row/row0umod.c
storage/innodb_plugin/row/row0upd.c
storage/innodb_plugin/srv/srv0srv.c
storage/innodb_plugin/srv/srv0start.c
storage/innodb_plugin/sync/sync0sync.c
storage/innodb_plugin/trx/trx0i_s.c
storage/innodb_plugin/trx/trx0rec.c
storage/innodb_plugin/trx/trx0rseg.c
storage/innodb_plugin/trx/trx0sys.c
storage/innodb_plugin/trx/trx0trx.c
storage/maria/ma_loghandler.c
storage/maria/ma_search.c
storage/maria/maria_def.h
storage/myisam/ft_boolean_search.c
storage/myisam/ft_stopwords.c
storage/myisam/ha_myisam.cc
storage/myisam/mi_check.c
storage/myisam/mi_delete_all.c
storage/myisam/mi_delete_table.c
storage/myisam/mi_dynrec.c
storage/myisam/mi_extra.c
storage/myisam/mi_locking.c
storage/myisam/mi_open.c
storage/myisam/mi_page.c
storage/myisam/mi_rnext.c
storage/myisam/mi_write.c
storage/myisam/myisamdef.h
storage/myisam/rt_index.c
storage/myisam/rt_split.c
storage/myisam/sort.c
storage/myisammrg/ha_myisammrg.cc
storage/myisammrg/myrg_open.c
storage/pbxt/ChangeLog
storage/pbxt/src/backup_xt.cc
storage/pbxt/src/cache_xt.cc
storage/pbxt/src/cache_xt.h
storage/pbxt/src/database_xt.cc
storage/pbxt/src/database_xt.h
storage/pbxt/src/datadic_xt.cc
storage/pbxt/src/datadic_xt.h
storage/pbxt/src/datalog_xt.cc
storage/pbxt/src/filesys_xt.h
storage/pbxt/src/ha_pbxt.cc
storage/pbxt/src/index_xt.cc
storage/pbxt/src/index_xt.h
storage/pbxt/src/lock_xt.cc
storage/pbxt/src/lock_xt.h
storage/pbxt/src/locklist_xt.cc
storage/pbxt/src/myxt_xt.cc
storage/pbxt/src/pbms_enabled.cc
storage/pbxt/src/pthread_xt.cc
storage/pbxt/src/pthread_xt.h
storage/pbxt/src/restart_xt.cc
storage/pbxt/src/restart_xt.h
storage/pbxt/src/strutil_xt.cc
storage/pbxt/src/tabcache_xt.cc
storage/pbxt/src/tabcache_xt.h
storage/pbxt/src/table_xt.cc
storage/pbxt/src/table_xt.h
storage/pbxt/src/thread_xt.cc
storage/pbxt/src/thread_xt.h
storage/pbxt/src/trace_xt.cc
storage/pbxt/src/trace_xt.h
storage/pbxt/src/xaction_xt.cc
storage/pbxt/src/xaction_xt.h
storage/pbxt/src/xactlog_xt.cc
storage/pbxt/src/xactlog_xt.h
storage/pbxt/src/xt_defs.h
storage/xtradb/btr/btr0btr.c
storage/xtradb/btr/btr0cur.c
storage/xtradb/btr/btr0pcur.c
storage/xtradb/btr/btr0sea.c
storage/xtradb/buf/buf0buddy.c
storage/xtradb/buf/buf0buf.c
storage/xtradb/buf/buf0flu.c
storage/xtradb/buf/buf0rea.c
storage/xtradb/dict/dict0dict.c
storage/xtradb/dict/dict0mem.c
storage/xtradb/fil/fil0fil.c
storage/xtradb/fsp/fsp0fsp.c
storage/xtradb/handler/ha_innodb.cc
storage/xtradb/handler/ha_innodb.h
storage/xtradb/handler/i_s.cc
storage/xtradb/handler/i_s.h
storage/xtradb/handler/innodb_patch_info.h
storage/xtradb/include/btr0btr.ic
storage/xtradb/include/buf0buddy.h
storage/xtradb/include/buf0buf.h
storage/xtradb/include/buf0buf.ic
storage/xtradb/include/buf0types.h
storage/xtradb/include/dict0dict.h
storage/xtradb/include/dict0mem.h
storage/xtradb/include/fil0fil.h
storage/xtradb/include/fsp0types.h
storage/xtradb/include/fut0fut.ic
storage/xtradb/include/ha_prototypes.h
storage/xtradb/include/page0cur.h
storage/xtradb/include/page0page.h
storage/xtradb/include/page0page.ic
storage/xtradb/include/page0types.h
storage/xtradb/include/srv0srv.h
storage/xtradb/include/trx0sys.h
storage/xtradb/include/univ.i
storage/xtradb/include/ut0lst.h
storage/xtradb/include/ut0rnd.h
storage/xtradb/include/ut0rnd.ic
storage/xtradb/lock/lock0lock.c
storage/xtradb/log/log0log.c
storage/xtradb/log/log0recv.c
storage/xtradb/mtr/mtr0log.c
storage/xtradb/page/page0cur.c
storage/xtradb/page/page0zip.c
storage/xtradb/row/row0ins.c
storage/xtradb/row/row0merge.c
storage/xtradb/row/row0sel.c
storage/xtradb/srv/srv0srv.c
storage/xtradb/srv/srv0start.c
storage/xtradb/sync/sync0sync.c
storage/xtradb/trx/trx0i_s.c
storage/xtradb/trx/trx0trx.c
strings/ctype-ucs2.c
strings/ctype-utf8.c
support-files/compiler_warnings.supp
support-files/mysql.spec.sh
tests/mysql_client_test.c
unittest/mysys/waiting_threads-t.c
Diff too large for email (1027113 lines, the limit is 1000000).
1
0

[Maria-developers] bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (igor:2789)
by Igor Babaev 12 May '10
by Igor Babaev 12 May '10
12 May '10
#At lp:maria/5.2 based on revid:igor@askmonty.org-20100429211039-rp1mza3xjeqd4t1w
2789 Igor Babaev 2010-05-11
Fixed several bugs in the backport code (mwl#106).
modified:
mysql-test/r/innodb_lock_wait_timeout_1.result
mysql-test/r/lock_multi_bug38499.result
mysql-test/r/ps_ddl.result
mysql-test/t/lock_multi_bug38499.test
sql/sql_base.cc
sql/sql_delete.cc
sql/sql_derived.cc
sql/sql_select.cc
sql/sql_union.cc
sql/table.cc
=== modified file 'mysql-test/r/innodb_lock_wait_timeout_1.result'
--- a/mysql-test/r/innodb_lock_wait_timeout_1.result 2009-11-12 11:43:33 +0000
+++ b/mysql-test/r/innodb_lock_wait_timeout_1.result 2010-05-12 04:09:58 +0000
@@ -104,7 +104,7 @@ id 1
select_type PRIMARY
table <derived2>
type ALL
-possible_keys NULL
+possible_keys key0
key NULL
key_len NULL
ref NULL
@@ -308,7 +308,7 @@ id 1
select_type PRIMARY
table <derived2>
type ALL
-possible_keys NULL
+possible_keys key0
key NULL
key_len NULL
ref NULL
=== modified file 'mysql-test/r/lock_multi_bug38499.result'
--- a/mysql-test/r/lock_multi_bug38499.result 2009-08-28 21:49:16 +0000
+++ b/mysql-test/r/lock_multi_bug38499.result 2010-05-12 04:09:58 +0000
@@ -2,7 +2,9 @@ SET @odl_sync_frm = @@global.sync_frm;
SET @@global.sync_frm = OFF;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1( a INT, b INT );
+CREATE TABLE t2( a INT, b INT );
INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4);
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4);
# 1. test regular tables
# 1.1. test altering of columns that multiupdate doesn't use
# 1.1.1. normal mode
@@ -18,5 +20,5 @@ ALTER TABLE t1 ADD COLUMN a INT;
# 2.2. test altering of columns that multiupdate uses
# 2.2.1. normal mode
# 2.2.2. PS mode
-DROP TABLE t1;
+DROP TABLE t1,t2;
SET @@global.sync_frm = @odl_sync_frm;
=== modified file 'mysql-test/r/ps_ddl.result'
--- a/mysql-test/r/ps_ddl.result 2010-01-16 07:44:24 +0000
+++ b/mysql-test/r/ps_ddl.result 2010-05-12 04:09:58 +0000
@@ -1507,12 +1507,12 @@ create view v_27690_1 as select A.a, A.b
execute stmt;
a b a b
1 1 1 1
-2 2 1 1
-1 1 1 1
-2 2 1 1
1 1 2 2
+2 2 1 1
2 2 2 2
+1 1 1 1
1 1 2 2
+2 2 1 1
2 2 2 2
call p_verify_reprepare_count(1);
SUCCESS
@@ -1520,12 +1520,12 @@ SUCCESS
execute stmt;
a b a b
1 1 1 1
-2 2 1 1
-1 1 1 1
-2 2 1 1
1 1 2 2
+2 2 1 1
2 2 2 2
+1 1 1 1
1 1 2 2
+2 2 1 1
2 2 2 2
call p_verify_reprepare_count(0);
SUCCESS
=== modified file 'mysql-test/t/lock_multi_bug38499.test'
--- a/mysql-test/t/lock_multi_bug38499.test 2009-08-28 21:49:16 +0000
+++ b/mysql-test/t/lock_multi_bug38499.test 2010-05-12 04:09:58 +0000
@@ -16,7 +16,9 @@ connect (writer,localhost,root,,);
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1( a INT, b INT );
+CREATE TABLE t2( a INT, b INT );
INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4);
+INSERT INTO t2 VALUES (1, 1), (2, 2), (3, 3), (4, 4);
--echo # 1. test regular tables
--echo # 1.1. test altering of columns that multiupdate doesn't use
@@ -28,7 +30,7 @@ while ($i) {
--dec $i
--connection writer
- send UPDATE t1, (SELECT 1 FROM t1 t1i) d SET a = 0 WHERE 1=0;
+ send UPDATE t1, (SELECT 1 FROM t2 t1i) d SET a = 0 WHERE 1=0;
--connection locker
ALTER TABLE t1 ADD COLUMN (c INT);
@@ -41,7 +43,7 @@ while ($i) {
--echo # 1.1.2. PS mode
--connection writer
-PREPARE stmt FROM 'UPDATE t1, (SELECT 1 FROM t1 t1i) d SET a = 0 WHERE 1=0';
+PREPARE stmt FROM 'UPDATE t1, (SELECT 1 FROM t2 t1i) d SET a = 0 WHERE 1=0';
let $i = 100;
while ($i) {
@@ -75,7 +77,7 @@ while ($i) {
UPDATE t1 SET a=b;
--connection writer
---send UPDATE t1, (SELECT 1 FROM t1 t1i) d SET a = 0 WHERE 1=0;
+--send UPDATE t1, (SELECT 1 FROM t2 t1i) d SET a = 0 WHERE 1=0;
--connection locker
--error 0,ER_CANT_DROP_FIELD_OR_KEY
@@ -100,7 +102,7 @@ while ($i) {
UPDATE t1 SET a=b;
--connection writer
- PREPARE stmt FROM 'UPDATE t1, (SELECT 1 FROM t1 t1i) d SET a = 0 WHERE 1=0';
+ PREPARE stmt FROM 'UPDATE t1, (SELECT 1 FROM t2 t1i) d SET a = 0 WHERE 1=0';
--send EXECUTE stmt
--connection locker
@@ -210,7 +212,7 @@ while ($i) {
}
--enable_query_log
--connection default
-DROP TABLE t1;
+DROP TABLE t1,t2;
# Close connections
=== modified file 'sql/sql_base.cc'
--- a/sql/sql_base.cc 2010-04-29 21:10:39 +0000
+++ b/sql/sql_base.cc 2010-05-12 04:09:58 +0000
@@ -7642,7 +7642,8 @@ bool setup_tables(THD *thd, Name_resolut
TABLE_LIST *first_select_table= (select_insert ?
tables->next_local:
0);
- SELECT_LEX *select_lex= thd->lex->current_select;
+ SELECT_LEX *select_lex= select_insert ? &thd->lex->select_lex :
+ thd->lex->current_select;
if (select_lex->first_cond_optimization)
{
leaves.empty();
=== modified file 'sql/sql_delete.cc'
--- a/sql/sql_delete.cc 2010-04-29 21:10:39 +0000
+++ b/sql/sql_delete.cc 2010-05-12 04:09:58 +0000
@@ -397,6 +397,12 @@ cleanup:
query_cache_invalidate3(thd, table_list, 1);
}
+ if (thd->lex->current_select->first_cond_optimization)
+ {
+ thd->lex->current_select->save_leaf_tables(thd);
+ thd->lex->current_select->first_cond_optimization= 0;
+ }
+
delete select;
transactional_table= table->file->has_transactions();
=== modified file 'sql/sql_derived.cc'
--- a/sql/sql_derived.cc 2010-04-29 21:10:39 +0000
+++ b/sql/sql_derived.cc 2010-05-12 04:09:58 +0000
@@ -159,8 +159,12 @@ mysql_handle_single_derived(LEX *lex, TA
uint phase_flag= DT_INIT << phase;
if (phase_flag > phases)
break;
+#if 0
if (!(phases & phase_flag) ||
derived->merged_for_insert && phase_flag != DT_REINIT)
+#else
+ if (!(phases & phase_flag))
+#endif
continue;
/* Skip derived tables to which the phase isn't applicable. */
if (phase_flag != DT_PREPARE &&
@@ -476,11 +480,27 @@ bool mysql_derived_merge_for_insert(THD
derived->table= table;
derived->schema_table=
((TABLE_LIST*)dt_select->table_list.first)->schema_table;
- derived->select_lex->leaf_tables.push_back(tl);
+ if (!derived->merged)
+ {
+ Query_arena *arena, backup;
+ arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test
+ derived->select_lex->leaf_tables.push_back(tl);
+ derived->nested_join= (NESTED_JOIN*) thd->calloc(sizeof(NESTED_JOIN));
+ if (derived->nested_join)
+ {
+ derived->wrap_into_nested_join(tl->select_lex->top_join_list);
+ derived->get_unit()->exclude_level();
+ }
+ if (arena)
+ thd->restore_active_arena(arena, &backup);
+ derived->merged= TRUE;
+ if (!derived->nested_join)
+ return TRUE;
+ }
}
else
{
- if (mysql_derived_merge(thd, lex, derived))
+ if (!derived->merged_for_insert && mysql_derived_merge(thd, lex, derived))
return TRUE;
}
derived->merged_for_insert= TRUE;
@@ -585,11 +605,19 @@ bool mysql_derived_prepare(THD *thd, LEX
bool res= FALSE;
// Skip already prepared views/DT
+#if 0
if (!unit || unit->prepared || derived->merged_for_insert)
+#else
+ if (!unit || unit->prepared)
+#endif
DBUG_RETURN(FALSE);
/* It's a target view for an INSERT, create field translation only. */
+#if 0
if (derived->skip_prepare_derived && !derived->is_multitable())
+#else
+ if (derived->merged_for_insert)
+#endif
{
res= derived->create_field_translation(thd);
DBUG_RETURN(res);
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-04-29 21:10:39 +0000
+++ b/sql/sql_select.cc 2010-05-12 04:09:58 +0000
@@ -7836,7 +7836,7 @@ void JOIN_TAB::cleanup()
bool JOIN_TAB::preread_init()
{
TABLE_LIST *derived= table->pos_in_table_list;
- if (!derived->is_materialized_derived())
+ if (!derived || !derived->is_materialized_derived())
{
preread_init_done= TRUE;
return FALSE;
@@ -9923,12 +9923,14 @@ simplify_joins(JOIN *join, List<TABLE_LI
{
TABLE_LIST *tbl;
List_iterator<TABLE_LIST> it(nested_join->join_list);
+ List<TABLE_LIST> repl_list;
while ((tbl= it++))
{
tbl->embedding= table->embedding;
tbl->join_list= table->join_list;
+ repl_list.push_back(tbl);
}
- li.replace(nested_join->join_list);
+ li.replace(repl_list);
/* Need to update the name resolution table chain when flattening joins */
fix_name_res= TRUE;
table= *li.ref();
=== modified file 'sql/sql_union.cc'
--- a/sql/sql_union.cc 2010-04-29 21:10:39 +0000
+++ b/sql/sql_union.cc 2010-05-12 04:09:58 +0000
@@ -394,7 +394,7 @@ bool st_select_lex_unit::prepare(THD *th
if (union_result->create_result_table(thd, &types, test(union_distinct),
create_options, "", FALSE, TRUE))
goto err;
- if (!lex_select_save->first_cond_optimization)
+ if (fake_select_lex && !fake_select_lex->first_cond_optimization)
{
save_tablenr= result_table_list.tablenr_exec;
save_map= result_table_list.map_exec;
@@ -403,7 +403,7 @@ bool st_select_lex_unit::prepare(THD *th
result_table_list.db= (char*) "";
result_table_list.table_name= result_table_list.alias= (char*) "union";
result_table_list.table= table= union_result->table;
- if (!lex_select_save->first_cond_optimization)
+ if (fake_select_lex && !fake_select_lex->first_cond_optimization)
{
result_table_list.tablenr_exec= save_tablenr;
result_table_list.map_exec= save_map;
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-04-29 21:10:39 +0000
+++ b/sql/table.cc 2010-05-12 04:09:58 +0000
@@ -5638,7 +5638,7 @@ bool TABLE_LIST::handle_derived(struct s
@return 0 when it's not a derived table/view.
*/
-inline st_select_lex_unit *TABLE_LIST::get_unit()
+st_select_lex_unit *TABLE_LIST::get_unit()
{
return (view ? &view->unit : derived);
}
@@ -5652,7 +5652,7 @@ inline st_select_lex_unit *TABLE_LIST::g
@return 0 when it's not a derived table.
*/
-inline st_select_lex *TABLE_LIST::get_single_select()
+st_select_lex *TABLE_LIST::get_single_select()
{
SELECT_LEX_UNIT *unit= get_unit();
return (unit ? unit->first_select() : 0);
1
0

[Maria-developers] bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (knielsen:2758)
by knielsen@knielsen-hq.org 11 May '10
by knielsen@knielsen-hq.org 11 May '10
11 May '10
#At lp:maria/5.2
2758 knielsen(a)knielsen-hq.org 2010-05-11
Fix Windows ^M line ending.
modified:
sql/slave.h
=== modified file 'sql/slave.h'
--- a/sql/slave.h 2010-03-22 07:34:28 +0000
+++ b/sql/slave.h 2010-05-11 13:24:37 +0000
@@ -106,7 +106,7 @@ extern MYSQL_PLUGIN_IMPORT char *relay_l
extern char *opt_relay_logname, *opt_relaylog_index_name;
extern my_bool opt_skip_slave_start, opt_reckless_slave;
extern my_bool opt_log_slave_updates;
-extern my_bool opt_replicate_annotate_rows_events;
+extern my_bool opt_replicate_annotate_rows_events;
extern ulonglong relay_log_space_limit;
/*
1
0

[Maria-developers] [Branch ~maria-captains/maria/5.1] Rev 2854: Automerge MariaDB 5.1.44b into trunk.
by noreply@launchpad.net 11 May '10
by noreply@launchpad.net 11 May '10
11 May '10
Merge authors:
Kristian Nielsen (knielsen)
------------------------------------------------------------
revno: 2854 [merge]
committer: knielsen(a)knielsen-hq.org
branch nick: mariadb-5.1
timestamp: Tue 2010-05-11 13:28:14 +0200
message:
Automerge MariaDB 5.1.44b into trunk.
modified:
configure.in
mysql-test/r/grant.result
mysql-test/t/grant.test
sql/mysql_priv.h
sql/partition_info.cc
sql/sql_parse.cc
sql/sql_table.cc
sql/sql_yacc.yy
sql/table.cc
tests/mysql_client_test.c
--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0

[Maria-developers] [Branch ~maria-captains/maria/5.1] Rev 2853: Removed extra } that caused script to fail with syntax error
by noreply@launchpad.net 10 May '10
by noreply@launchpad.net 10 May '10
10 May '10
------------------------------------------------------------
revno: 2853
committer: Michael Widenius <monty(a)askmonty.org>
branch nick: maria-5.1
timestamp: Mon 2010-05-10 21:23:16 +0300
message:
Removed extra } that caused script to fail with syntax error
modified:
scripts/mysqld_multi.sh
--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0
Hi everyone,
I have been looking at getting our binary windows distribution back on
track. One of the options would be to create the zip file like we did
earlier.
Another way would be to create a proper windows installer. For this,
there are several options: NSIS, WIX, etc. I spent a bit of time
investigating the CPack parts of CMake. And during the investigation, I
managed to pretty much write an entire installer. It was quite easy.
The way this works is to add INSTALL instructions in the CMakeFiles.txt,
plus a bit of extra information for building the installer package. You
can see this in the patch I have attached.
With the patch applied, you have to install NSIS
(http://nsis.sourceforge.net) and add it to the path. Build MariaDB in
release, and run "cpack" in the MariaDB tree. It's NSIS based because
this seems to be the one cpack works best with.
The question is what direction to continue in. I'd appreciate some
feedback on this, because I'm not certain if it's the right way to go.
It has been pretty easy so far, so I'm pretty happy to continue with it.
IMHO, the most important thing not implemented in this installer yet is
to set up MariaDB as a service.
I'm going to focus on getting Windows running in KVM for our buildbot
system now. And then I'll get back to this later.
Cheers,
Bo Thorsen.
6
9

[Maria-developers] [Branch ~maria-captains/maria/5.1] Rev 2852: bugfix from mysql-5.1, apparently lost in a merge
by noreply@launchpad.net 10 May '10
by noreply@launchpad.net 10 May '10
10 May '10
------------------------------------------------------------
revno: 2852
committer: Sergei Golubchik <sergii(a)pisem.net>
branch nick: maria-5.1
timestamp: Mon 2010-05-10 16:23:08 +0200
message:
bugfix from mysql-5.1, apparently lost in a merge
modified:
sql/sql_select.cc
--
lp:maria
https://code.launchpad.net/~maria-captains/maria/5.1
Your team Maria developers is subscribed to branch lp:maria.
To unsubscribe from this branch go to https://code.launchpad.net/~maria-captains/maria/5.1/+edit-subscription
1
0

[Maria-developers] Updated (by Serg): innodb statistics in the slow log (115)
by worklog-noreply@askmonty.org 10 May '10
by worklog-noreply@askmonty.org 10 May '10
10 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: innodb statistics in the slow log
CREATION DATE..: Sun, 25 Apr 2010, 16:35
SUPERVISOR.....: Sergei
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Server-BackLog
TASK ID........: 115 (http://askmonty.org/worklog/?tid=115)
VERSION........: Server-5.3
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Serg - Mon, 10 May 2010, 14:36)=-=-
Category updated.
--- /tmp/wklog.115.old.7130 2010-05-10 14:36:08.000000000 +0000
+++ /tmp/wklog.115.new.7130 2010-05-10 14:36:08.000000000 +0000
@@ -1 +1 @@
-Server-Sprint
+Server-BackLog
-=-=(Serg - Mon, 10 May 2010, 14:36)=-=-
Version updated.
--- /tmp/wklog.115.old.7130 2010-05-10 14:36:08.000000000 +0000
+++ /tmp/wklog.115.new.7130 2010-05-10 14:36:08.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.3
DESCRIPTION:
We need to find some way for innoDB/XtraDB specific information to appear in the
slow log. Same effect as in Percona patches, but a with cleaner implementation.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Serg): innodb statistics in the slow log (115)
by worklog-noreply@askmonty.org 10 May '10
by worklog-noreply@askmonty.org 10 May '10
10 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: innodb statistics in the slow log
CREATION DATE..: Sun, 25 Apr 2010, 16:35
SUPERVISOR.....: Sergei
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Server-BackLog
TASK ID........: 115 (http://askmonty.org/worklog/?tid=115)
VERSION........: Server-5.3
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Serg - Mon, 10 May 2010, 14:36)=-=-
Category updated.
--- /tmp/wklog.115.old.7130 2010-05-10 14:36:08.000000000 +0000
+++ /tmp/wklog.115.new.7130 2010-05-10 14:36:08.000000000 +0000
@@ -1 +1 @@
-Server-Sprint
+Server-BackLog
-=-=(Serg - Mon, 10 May 2010, 14:36)=-=-
Version updated.
--- /tmp/wklog.115.old.7130 2010-05-10 14:36:08.000000000 +0000
+++ /tmp/wklog.115.new.7130 2010-05-10 14:36:08.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.3
DESCRIPTION:
We need to find some way for innoDB/XtraDB specific information to appear in the
slow log. Same effect as in Percona patches, but a with cleaner implementation.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Serg): insert ignore ha_extra hint (114)
by worklog-noreply@askmonty.org 10 May '10
by worklog-noreply@askmonty.org 10 May '10
10 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: insert ignore ha_extra hint
CREATION DATE..: Sun, 25 Apr 2010, 16:32
SUPERVISOR.....: Sergei
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Server-BackLog
TASK ID........: 114 (http://askmonty.org/worklog/?tid=114)
VERSION........: Server-5.3
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Category updated.
--- /tmp/wklog.114.old.7124 2010-05-10 14:35:59.000000000 +0000
+++ /tmp/wklog.114.new.7124 2010-05-10 14:35:59.000000000 +0000
@@ -1 +1 @@
-Server-Sprint
+Server-BackLog
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Version updated.
--- /tmp/wklog.114.old.7124 2010-05-10 14:35:59.000000000 +0000
+++ /tmp/wklog.114.new.7124 2010-05-10 14:35:59.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.3
DESCRIPTION:
There is HA_EXTRA_WRITE_CAN_REPLACE hint that tells the engine that the
following ::write_row() calls are part of the REPLACE statement, not INSERT.
With this knowledge the engine can execute the replace internally, deleting the
conflicting row in the ::write_row() method instead of returning an error.
We need a similar HA_EXTRA_WRITE_CAN_IGNORE hint to allow engines to optimize
INSERT IGNORE in a similar way.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Serg): insert ignore ha_extra hint (114)
by worklog-noreply@askmonty.org 10 May '10
by worklog-noreply@askmonty.org 10 May '10
10 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: insert ignore ha_extra hint
CREATION DATE..: Sun, 25 Apr 2010, 16:32
SUPERVISOR.....: Sergei
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Server-BackLog
TASK ID........: 114 (http://askmonty.org/worklog/?tid=114)
VERSION........: Server-5.3
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Category updated.
--- /tmp/wklog.114.old.7124 2010-05-10 14:35:59.000000000 +0000
+++ /tmp/wklog.114.new.7124 2010-05-10 14:35:59.000000000 +0000
@@ -1 +1 @@
-Server-Sprint
+Server-BackLog
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Version updated.
--- /tmp/wklog.114.old.7124 2010-05-10 14:35:59.000000000 +0000
+++ /tmp/wklog.114.new.7124 2010-05-10 14:35:59.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.3
DESCRIPTION:
There is HA_EXTRA_WRITE_CAN_REPLACE hint that tells the engine that the
following ::write_row() calls are part of the REPLACE statement, not INSERT.
With this knowledge the engine can execute the replace internally, deleting the
conflicting row in the ::write_row() method instead of returning an error.
We need a similar HA_EXTRA_WRITE_CAN_IGNORE hint to allow engines to optimize
INSERT IGNORE in a similar way.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Serg): support many clustered keys per table (113)
by worklog-noreply@askmonty.org 10 May '10
by worklog-noreply@askmonty.org 10 May '10
10 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: support many clustered keys per table
CREATION DATE..: Sun, 25 Apr 2010, 16:23
SUPERVISOR.....: Sergei
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Server-BackLog
TASK ID........: 113 (http://askmonty.org/worklog/?tid=113)
VERSION........: Server-5.3
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Category updated.
--- /tmp/wklog.113.old.7118 2010-05-10 14:35:50.000000000 +0000
+++ /tmp/wklog.113.new.7118 2010-05-10 14:35:50.000000000 +0000
@@ -1 +1 @@
-Server-Sprint
+Server-BackLog
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Version updated.
--- /tmp/wklog.113.old.7105 2010-05-10 14:35:25.000000000 +0000
+++ /tmp/wklog.113.new.7105 2010-05-10 14:35:25.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.3
DESCRIPTION:
The server now assumes that there can be only one clustered key per table, and
only primary key can be clustered.
It's not true for certain storage engines, we need to remove this limitation
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Serg): support many clustered keys per table (113)
by worklog-noreply@askmonty.org 10 May '10
by worklog-noreply@askmonty.org 10 May '10
10 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: support many clustered keys per table
CREATION DATE..: Sun, 25 Apr 2010, 16:23
SUPERVISOR.....: Sergei
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Server-BackLog
TASK ID........: 113 (http://askmonty.org/worklog/?tid=113)
VERSION........: Server-5.3
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Category updated.
--- /tmp/wklog.113.old.7118 2010-05-10 14:35:50.000000000 +0000
+++ /tmp/wklog.113.new.7118 2010-05-10 14:35:50.000000000 +0000
@@ -1 +1 @@
-Server-Sprint
+Server-BackLog
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Version updated.
--- /tmp/wklog.113.old.7105 2010-05-10 14:35:25.000000000 +0000
+++ /tmp/wklog.113.new.7105 2010-05-10 14:35:25.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.3
DESCRIPTION:
The server now assumes that there can be only one clustered key per table, and
only primary key can be clustered.
It's not true for certain storage engines, we need to remove this limitation
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Serg): support many clustered keys per table (113)
by worklog-noreply@askmonty.org 10 May '10
by worklog-noreply@askmonty.org 10 May '10
10 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: support many clustered keys per table
CREATION DATE..: Sun, 25 Apr 2010, 16:23
SUPERVISOR.....: Sergei
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 113 (http://askmonty.org/worklog/?tid=113)
VERSION........: Server-5.3
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Version updated.
--- /tmp/wklog.113.old.7105 2010-05-10 14:35:25.000000000 +0000
+++ /tmp/wklog.113.new.7105 2010-05-10 14:35:25.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.3
DESCRIPTION:
The server now assumes that there can be only one clustered key per table, and
only primary key can be clustered.
It's not true for certain storage engines, we need to remove this limitation
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] Updated (by Serg): support many clustered keys per table (113)
by worklog-noreply@askmonty.org 10 May '10
by worklog-noreply@askmonty.org 10 May '10
10 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: support many clustered keys per table
CREATION DATE..: Sun, 25 Apr 2010, 16:23
SUPERVISOR.....: Sergei
IMPLEMENTOR....:
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 113 (http://askmonty.org/worklog/?tid=113)
VERSION........: Server-5.3
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Serg - Mon, 10 May 2010, 14:35)=-=-
Version updated.
--- /tmp/wklog.113.old.7105 2010-05-10 14:35:25.000000000 +0000
+++ /tmp/wklog.113.new.7105 2010-05-10 14:35:25.000000000 +0000
@@ -1 +1 @@
-Server-9.x
+Server-5.3
DESCRIPTION:
The server now assumes that there can be only one clustered key per table, and
only primary key can be clustered.
It's not true for certain storage engines, we need to remove this limitation
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0

[Maria-developers] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (knielsen:2860)
by knielsen@knielsen-hq.org 10 May '10
by knielsen@knielsen-hq.org 10 May '10
10 May '10
#At lp:maria
2860 knielsen(a)knielsen-hq.org 2010-05-10 [merge]
Automerge MariaDB 5.1.44b release.
modified:
mysql-test/r/grant.result
mysql-test/t/grant.test
sql/mysql_priv.h
sql/partition_info.cc
sql/sql_parse.cc
sql/sql_table.cc
sql/sql_yacc.yy
sql/table.cc
tests/mysql_client_test.c
=== modified file 'mysql-test/r/grant.result'
--- a/mysql-test/r/grant.result 2010-01-29 10:42:31 +0000
+++ b/mysql-test/r/grant.result 2010-05-09 19:30:06 +0000
@@ -1413,3 +1413,19 @@ DROP USER 'user1';
DROP USER 'user1'@'localhost';
DROP USER 'user2';
DROP DATABASE db1;
+CREATE DATABASE db1;
+CREATE DATABASE db2;
+GRANT SELECT ON db1.* to 'testbug'@localhost;
+USE db2;
+CREATE TABLE t1 (a INT);
+USE test;
+SELECT * FROM `../db2/tb2`;
+ERROR 42S02: Table 'db1.../db2/tb2' doesn't exist
+SELECT * FROM `../db2`.tb2;
+ERROR 42000: SELECT command denied to user 'testbug'@'localhost' for table 'tb2'
+SELECT * FROM `#mysql50#/../db2/tb2`;
+ERROR 42S02: Table 'db1.#mysql50#/../db2/tb2' doesn't exist
+DROP USER 'testbug'@localhost;
+DROP TABLE db2.t1;
+DROP DATABASE db1;
+DROP DATABASE db2;
=== modified file 'mysql-test/t/grant.test'
--- a/mysql-test/t/grant.test 2010-01-29 10:42:31 +0000
+++ b/mysql-test/t/grant.test 2010-05-09 19:30:06 +0000
@@ -1525,5 +1525,30 @@ DROP USER 'user1'@'localhost';
DROP USER 'user2';
DROP DATABASE db1;
+
+#
+# Bug #53371: COM_FIELD_LIST can be abused to bypass table level grants.
+#
+
+CREATE DATABASE db1;
+CREATE DATABASE db2;
+GRANT SELECT ON db1.* to 'testbug'@localhost;
+USE db2;
+CREATE TABLE t1 (a INT);
+USE test;
+connect (con1,localhost,testbug,,db1);
+--error ER_NO_SUCH_TABLE
+SELECT * FROM `../db2/tb2`;
+--error ER_TABLEACCESS_DENIED_ERROR
+SELECT * FROM `../db2`.tb2;
+--error ER_NO_SUCH_TABLE
+SELECT * FROM `#mysql50#/../db2/tb2`;
+connection default;
+disconnect con1;
+DROP USER 'testbug'@localhost;
+DROP TABLE db2.t1;
+DROP DATABASE db1;
+DROP DATABASE db2;
+
# Wait till we reached the initial number of concurrent sessions
--source include/wait_until_count_sessions.inc
=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h 2010-04-28 12:52:24 +0000
+++ b/sql/mysql_priv.h 2010-05-10 07:34:49 +0000
@@ -2289,7 +2289,7 @@ void update_create_info_from_table(HA_CR
int rename_file_ext(const char * from,const char * to,const char * ext);
bool check_db_name(LEX_STRING *db);
bool check_column_name(const char *name);
-bool check_table_name(const char *name, uint length);
+bool check_table_name(const char *name, uint length, bool check_for_path_chars);
char *get_field(MEM_ROOT *mem, Field *field);
bool get_field(MEM_ROOT *mem, Field *field, class String *res);
int wild_case_compare(CHARSET_INFO *cs, const char *str,const char *wildstr);
=== modified file 'sql/partition_info.cc'
--- a/sql/partition_info.cc 2009-12-03 11:19:05 +0000
+++ b/sql/partition_info.cc 2010-05-09 19:30:06 +0000
@@ -972,7 +972,7 @@ bool partition_info::check_partition_inf
part_elem->engine_type= default_engine_type;
}
if (check_table_name(part_elem->partition_name,
- strlen(part_elem->partition_name)))
+ strlen(part_elem->partition_name), FALSE))
{
my_error(ER_WRONG_PARTITION_NAME, MYF(0));
goto end;
@@ -990,7 +990,7 @@ bool partition_info::check_partition_inf
{
sub_elem= sub_it++;
if (check_table_name(sub_elem->partition_name,
- strlen(sub_elem->partition_name)))
+ strlen(sub_elem->partition_name), FALSE))
{
my_error(ER_WRONG_PARTITION_NAME, MYF(0));
goto end;
=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc 2010-04-30 04:23:39 +0000
+++ b/sql/sql_parse.cc 2010-05-10 07:34:49 +0000
@@ -1334,6 +1334,11 @@ bool dispatch_command(enum enum_server_c
system_charset_info, packet, db_length,
thd->charset(), &dummy_errors);
db_buff[db_length]= '\0';
+ if (check_table_name(db_buff, db_length, FALSE))
+ {
+ my_error(ER_WRONG_TABLE_NAME, MYF(0), db_buff);
+ break;
+ }
table_list.alias= table_list.table_name= db_buff;
if (!(fields= (char *) thd->memdup(wildcard, query_length + 1)))
break;
@@ -6298,7 +6303,7 @@ TABLE_LIST *st_select_lex::add_table_to_
DBUG_RETURN(0); // End of memory
alias_str= alias ? alias->str : table->table.str;
if (!test(table_options & TL_OPTION_ALIAS) &&
- check_table_name(table->table.str, table->table.length))
+ check_table_name(table->table.str, table->table.length, FALSE))
{
my_error(ER_WRONG_TABLE_NAME, MYF(0), table->table.str);
DBUG_RETURN(0);
=== modified file 'sql/sql_table.cc'
--- a/sql/sql_table.cc 2010-04-28 12:52:24 +0000
+++ b/sql/sql_table.cc 2010-05-10 07:34:49 +0000
@@ -435,7 +435,21 @@ uint tablename_to_filename(const char *f
DBUG_PRINT("enter", ("from '%s'", from));
if ((length= check_n_cut_mysql50_prefix(from, to, to_length)))
+ {
+ /*
+ Check if the name supplied is a valid mysql 5.0 name and
+ make the name a zero length string if it's not.
+ Note that just returning zero length is not enough :
+ a lot of places don't check the return value and expect
+ a zero terminated string.
+ */
+ if (check_table_name(to, length, TRUE))
+ {
+ to[0]= 0;
+ length= 0;
+ }
DBUG_RETURN(length);
+ }
length= strconvert(system_charset_info, from,
&my_charset_filename, to, to_length, &errors);
if (check_if_legal_tablename(to) &&
=== modified file 'sql/sql_yacc.yy'
--- a/sql/sql_yacc.yy 2010-04-28 12:52:24 +0000
+++ b/sql/sql_yacc.yy 2010-05-10 07:34:49 +0000
@@ -6149,7 +6149,7 @@ alter_list_item:
{
MYSQL_YYABORT;
}
- if (check_table_name($3->table.str,$3->table.length) ||
+ if (check_table_name($3->table.str,$3->table.length, FALSE) ||
($3->db.str && check_db_name(&$3->db)))
{
my_error(ER_WRONG_TABLE_NAME, MYF(0), $3->table.str);
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-04-28 12:52:24 +0000
+++ b/sql/table.cc 2010-05-10 07:34:49 +0000
@@ -494,6 +494,19 @@ inline bool is_system_table_name(const c
}
+/**
+ Check if a string contains path elements
+*/
+
+static inline bool has_disabled_path_chars(const char *str)
+{
+ for (; *str; str++)
+ if (*str == FN_EXTCHAR || *str == '/' || *str == '\\' || *str == '~' || *str == '@')
+ return TRUE;
+ return FALSE;
+}
+
+
/*
Read table definition from a binary / text based .frm file
@@ -548,7 +561,8 @@ int open_table_def(THD *thd, TABLE_SHARE
This kind of tables must have been opened only by the
my_open() above.
*/
- if (strchr(share->table_name.str, '@') ||
+ if (has_disabled_path_chars(share->table_name.str) ||
+ has_disabled_path_chars(share->db.str) ||
!strncmp(share->db.str, MYSQL50_TABLE_NAME_PREFIX,
MYSQL50_TABLE_NAME_PREFIX_LENGTH) ||
!strncmp(share->table_name.str, MYSQL50_TABLE_NAME_PREFIX,
@@ -2718,7 +2732,6 @@ bool check_db_name(LEX_STRING *org_name)
(name_length > NAME_CHAR_LEN)); /* purecov: inspected */
}
-
/*
Allow anything as a table name, as long as it doesn't contain an
' ' at the end
@@ -2726,7 +2739,7 @@ bool check_db_name(LEX_STRING *org_name)
*/
-bool check_table_name(const char *name, uint length)
+bool check_table_name(const char *name, uint length, bool check_for_path_chars)
{
uint name_length= 0; // name length in symbols
const char *end= name+length;
@@ -2753,6 +2766,9 @@ bool check_table_name(const char *name,
continue;
}
}
+ if (check_for_path_chars &&
+ (*name == '/' || *name == '\\' || *name == '~' || *name == FN_EXTCHAR))
+ return 1;
#endif
name++;
name_length++;
=== modified file 'tests/mysql_client_test.c'
--- a/tests/mysql_client_test.c 2010-01-11 13:15:28 +0000
+++ b/tests/mysql_client_test.c 2010-05-09 19:30:06 +0000
@@ -18092,6 +18092,50 @@ static void test_bug44495()
DBUG_VOID_RETURN;
}
+static void test_bug53371()
+{
+ int rc;
+ MYSQL_RES *result;
+
+ myheader("test_bug53371");
+
+ rc= mysql_query(mysql, "DROP TABLE IF EXISTS t1");
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP DATABASE IF EXISTS bug53371");
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP USER 'testbug'@localhost");
+
+ rc= mysql_query(mysql, "CREATE TABLE t1 (a INT)");
+ myquery(rc);
+ rc= mysql_query(mysql, "CREATE DATABASE bug53371");
+ myquery(rc);
+ rc= mysql_query(mysql, "GRANT SELECT ON bug53371.* to 'testbug'@localhost");
+ myquery(rc);
+
+ rc= mysql_change_user(mysql, "testbug", NULL, "bug53371");
+ myquery(rc);
+
+ rc= mysql_query(mysql, "SHOW COLUMNS FROM client_test_db.t1");
+ DIE_UNLESS(rc);
+ DIE_UNLESS(mysql_errno(mysql) == 1142);
+
+ result= mysql_list_fields(mysql, "../client_test_db/t1", NULL);
+ DIE_IF(result);
+
+ result= mysql_list_fields(mysql, "#mysql50#/../client_test_db/t1", NULL);
+ DIE_IF(result);
+
+ rc= mysql_change_user(mysql, opt_user, opt_password, current_db);
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP TABLE t1");
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP DATABASE bug53371");
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP USER 'testbug'@localhost");
+ myquery(rc);
+}
+
+
/*
Read and parse arguments and MySQL options from my.cnf
*/
@@ -18401,6 +18445,7 @@ static struct my_tests_st my_tests[]= {
{ "test_bug30472", test_bug30472 },
{ "test_bug20023", test_bug20023 },
{ "test_bug45010", test_bug45010 },
+ { "test_bug53371", test_bug53371 },
{ "test_bug31418", test_bug31418 },
{ "test_bug31669", test_bug31669 },
{ "test_bug28386", test_bug28386 },
1
0

[Maria-developers] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (knielsen:2859)
by knielsen@knielsen-hq.org 10 May '10
by knielsen@knielsen-hq.org 10 May '10
10 May '10
#At lp:maria
2859 knielsen(a)knielsen-hq.org 2010-05-10
Suppress a safemutex warning pending fix of MBug#578117.
modified:
mysql-test/suite/pbxt/r/pbxt_xa.result
mysql-test/suite/pbxt/t/pbxt_xa.test
=== modified file 'mysql-test/suite/pbxt/r/pbxt_xa.result'
--- a/mysql-test/suite/pbxt/r/pbxt_xa.result 2010-03-24 22:12:39 +0000
+++ b/mysql-test/suite/pbxt/r/pbxt_xa.result 2010-05-10 07:29:30 +0000
@@ -1,4 +1,5 @@
drop table if exists t1, t2;
+CALL mtr.add_suppression("Found wrong usage of mutex 'LOCK_sync' and 'LOCK_active'");
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
CREATE TABLE t2 (b INT PRIMARY KEY) ENGINE=pbxt;
BEGIN;
=== modified file 'mysql-test/suite/pbxt/t/pbxt_xa.test'
--- a/mysql-test/suite/pbxt/t/pbxt_xa.test 2010-03-24 22:12:39 +0000
+++ b/mysql-test/suite/pbxt/t/pbxt_xa.test 2010-05-10 07:29:30 +0000
@@ -4,6 +4,11 @@
drop table if exists t1, t2;
--enable_warnings
+# This warning is indication of a real bug, MBug#578117.
+# But it is not a regression, so we suppress it to get a clean test run.
+# This suppression must be removed as part of MBug#578117 fix.
+CALL mtr.add_suppression("Found wrong usage of mutex 'LOCK_sync' and 'LOCK_active'");
+
#
# bug lp:544173, xa crash with two 2pc-capable storage engines without binlog
#
1
0

[Maria-developers] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (knielsen:2834) Bug#53371
by knielsen@knielsen-hq.org 09 May '10
by knielsen@knielsen-hq.org 09 May '10
09 May '10
#At lp:maria
2834 knielsen(a)knielsen-hq.org 2010-05-09
Cherry-pick fix for Bug#53371, security hole with bypassing grants using special path in db/table names.
Bump MariaDB version for security fix release.
modified:
configure.in
mysql-test/r/grant.result
mysql-test/t/grant.test
sql/mysql_priv.h
sql/partition_info.cc
sql/sql_parse.cc
sql/sql_table.cc
sql/sql_yacc.yy
sql/table.cc
tests/mysql_client_test.c
=== modified file 'configure.in'
--- a/configure.in 2010-04-29 07:57:25 +0000
+++ b/configure.in 2010-05-09 19:30:06 +0000
@@ -7,7 +7,7 @@ AC_PREREQ(2.59)
# Remember to also update version.c in ndb.
# When changing major version number please also check switch statement
# in mysqlbinlog::check_master_version().
-AC_INIT([MariaDB Server], [5.1.44a-MariaDB], [], [mysql])
+AC_INIT([MariaDB Server], [5.1.44b-MariaDB], [], [mysql])
AC_CONFIG_SRCDIR([sql/mysqld.cc])
AC_CANONICAL_SYSTEM
# USTAR format gives us the possibility to store longer path names in
=== modified file 'mysql-test/r/grant.result'
--- a/mysql-test/r/grant.result 2010-01-29 10:42:31 +0000
+++ b/mysql-test/r/grant.result 2010-05-09 19:30:06 +0000
@@ -1413,3 +1413,19 @@ DROP USER 'user1';
DROP USER 'user1'@'localhost';
DROP USER 'user2';
DROP DATABASE db1;
+CREATE DATABASE db1;
+CREATE DATABASE db2;
+GRANT SELECT ON db1.* to 'testbug'@localhost;
+USE db2;
+CREATE TABLE t1 (a INT);
+USE test;
+SELECT * FROM `../db2/tb2`;
+ERROR 42S02: Table 'db1.../db2/tb2' doesn't exist
+SELECT * FROM `../db2`.tb2;
+ERROR 42000: SELECT command denied to user 'testbug'@'localhost' for table 'tb2'
+SELECT * FROM `#mysql50#/../db2/tb2`;
+ERROR 42S02: Table 'db1.#mysql50#/../db2/tb2' doesn't exist
+DROP USER 'testbug'@localhost;
+DROP TABLE db2.t1;
+DROP DATABASE db1;
+DROP DATABASE db2;
=== modified file 'mysql-test/t/grant.test'
--- a/mysql-test/t/grant.test 2010-01-29 10:42:31 +0000
+++ b/mysql-test/t/grant.test 2010-05-09 19:30:06 +0000
@@ -1525,5 +1525,30 @@ DROP USER 'user1'@'localhost';
DROP USER 'user2';
DROP DATABASE db1;
+
+#
+# Bug #53371: COM_FIELD_LIST can be abused to bypass table level grants.
+#
+
+CREATE DATABASE db1;
+CREATE DATABASE db2;
+GRANT SELECT ON db1.* to 'testbug'@localhost;
+USE db2;
+CREATE TABLE t1 (a INT);
+USE test;
+connect (con1,localhost,testbug,,db1);
+--error ER_NO_SUCH_TABLE
+SELECT * FROM `../db2/tb2`;
+--error ER_TABLEACCESS_DENIED_ERROR
+SELECT * FROM `../db2`.tb2;
+--error ER_NO_SUCH_TABLE
+SELECT * FROM `#mysql50#/../db2/tb2`;
+connection default;
+disconnect con1;
+DROP USER 'testbug'@localhost;
+DROP TABLE db2.t1;
+DROP DATABASE db1;
+DROP DATABASE db2;
+
# Wait till we reached the initial number of concurrent sessions
--source include/wait_until_count_sessions.inc
=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h 2010-03-04 08:03:07 +0000
+++ b/sql/mysql_priv.h 2010-05-09 19:30:06 +0000
@@ -2300,7 +2300,7 @@ void update_create_info_from_table(HA_CR
int rename_file_ext(const char * from,const char * to,const char * ext);
bool check_db_name(LEX_STRING *db);
bool check_column_name(const char *name);
-bool check_table_name(const char *name, uint length);
+bool check_table_name(const char *name, uint length, bool check_for_path_chars);
char *get_field(MEM_ROOT *mem, Field *field);
bool get_field(MEM_ROOT *mem, Field *field, class String *res);
int wild_case_compare(CHARSET_INFO *cs, const char *str,const char *wildstr);
=== modified file 'sql/partition_info.cc'
--- a/sql/partition_info.cc 2009-12-03 11:19:05 +0000
+++ b/sql/partition_info.cc 2010-05-09 19:30:06 +0000
@@ -972,7 +972,7 @@ bool partition_info::check_partition_inf
part_elem->engine_type= default_engine_type;
}
if (check_table_name(part_elem->partition_name,
- strlen(part_elem->partition_name)))
+ strlen(part_elem->partition_name), FALSE))
{
my_error(ER_WRONG_PARTITION_NAME, MYF(0));
goto end;
@@ -990,7 +990,7 @@ bool partition_info::check_partition_inf
{
sub_elem= sub_it++;
if (check_table_name(sub_elem->partition_name,
- strlen(sub_elem->partition_name)))
+ strlen(sub_elem->partition_name), FALSE))
{
my_error(ER_WRONG_PARTITION_NAME, MYF(0));
goto end;
=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc 2010-04-29 07:57:25 +0000
+++ b/sql/sql_parse.cc 2010-05-09 19:30:06 +0000
@@ -1334,6 +1334,11 @@ bool dispatch_command(enum enum_server_c
system_charset_info, packet, db_length,
thd->charset(), &dummy_errors);
db_buff[db_length]= '\0';
+ if (check_table_name(db_buff, db_length, FALSE))
+ {
+ my_error(ER_WRONG_TABLE_NAME, MYF(0), db_buff);
+ break;
+ }
table_list.alias= table_list.table_name= db_buff;
if (!(fields= (char *) thd->memdup(wildcard, query_length + 1)))
break;
@@ -6298,7 +6303,7 @@ TABLE_LIST *st_select_lex::add_table_to_
DBUG_RETURN(0); // End of memory
alias_str= alias ? alias->str : table->table.str;
if (!test(table_options & TL_OPTION_ALIAS) &&
- check_table_name(table->table.str, table->table.length))
+ check_table_name(table->table.str, table->table.length, FALSE))
{
my_error(ER_WRONG_TABLE_NAME, MYF(0), table->table.str);
DBUG_RETURN(0);
=== modified file 'sql/sql_table.cc'
--- a/sql/sql_table.cc 2010-03-04 08:03:07 +0000
+++ b/sql/sql_table.cc 2010-05-09 19:30:06 +0000
@@ -434,7 +434,21 @@ uint tablename_to_filename(const char *f
DBUG_PRINT("enter", ("from '%s'", from));
if ((length= check_n_cut_mysql50_prefix(from, to, to_length)))
+ {
+ /*
+ Check if the name supplied is a valid mysql 5.0 name and
+ make the name a zero length string if it's not.
+ Note that just returning zero length is not enough :
+ a lot of places don't check the return value and expect
+ a zero terminated string.
+ */
+ if (check_table_name(to, length, TRUE))
+ {
+ to[0]= 0;
+ length= 0;
+ }
DBUG_RETURN(length);
+ }
length= strconvert(system_charset_info, from,
&my_charset_filename, to, to_length, &errors);
if (check_if_legal_tablename(to) &&
=== modified file 'sql/sql_yacc.yy'
--- a/sql/sql_yacc.yy 2010-03-04 08:03:07 +0000
+++ b/sql/sql_yacc.yy 2010-05-09 19:30:06 +0000
@@ -6149,7 +6149,7 @@ alter_list_item:
{
MYSQL_YYABORT;
}
- if (check_table_name($3->table.str,$3->table.length) ||
+ if (check_table_name($3->table.str,$3->table.length, FALSE) ||
($3->db.str && check_db_name(&$3->db)))
{
my_error(ER_WRONG_TABLE_NAME, MYF(0), $3->table.str);
=== modified file 'sql/table.cc'
--- a/sql/table.cc 2010-03-10 10:32:14 +0000
+++ b/sql/table.cc 2010-05-09 19:30:06 +0000
@@ -494,6 +494,19 @@ inline bool is_system_table_name(const c
}
+/**
+ Check if a string contains path elements
+*/
+
+static inline bool has_disabled_path_chars(const char *str)
+{
+ for (; *str; str++)
+ if (*str == FN_EXTCHAR || *str == '/' || *str == '\\' || *str == '~' || *str == '@')
+ return TRUE;
+ return FALSE;
+}
+
+
/*
Read table definition from a binary / text based .frm file
@@ -548,7 +561,8 @@ int open_table_def(THD *thd, TABLE_SHARE
This kind of tables must have been opened only by the
my_open() above.
*/
- if (strchr(share->table_name.str, '@') ||
+ if (has_disabled_path_chars(share->table_name.str) ||
+ has_disabled_path_chars(share->db.str) ||
!strncmp(share->db.str, MYSQL50_TABLE_NAME_PREFIX,
MYSQL50_TABLE_NAME_PREFIX_LENGTH) ||
!strncmp(share->table_name.str, MYSQL50_TABLE_NAME_PREFIX,
@@ -2718,7 +2732,6 @@ bool check_db_name(LEX_STRING *org_name)
(name_length > NAME_CHAR_LEN)); /* purecov: inspected */
}
-
/*
Allow anything as a table name, as long as it doesn't contain an
' ' at the end
@@ -2726,7 +2739,7 @@ bool check_db_name(LEX_STRING *org_name)
*/
-bool check_table_name(const char *name, uint length)
+bool check_table_name(const char *name, uint length, bool check_for_path_chars)
{
uint name_length= 0; // name length in symbols
const char *end= name+length;
@@ -2753,6 +2766,9 @@ bool check_table_name(const char *name,
continue;
}
}
+ if (check_for_path_chars &&
+ (*name == '/' || *name == '\\' || *name == '~' || *name == FN_EXTCHAR))
+ return 1;
#endif
name++;
name_length++;
=== modified file 'tests/mysql_client_test.c'
--- a/tests/mysql_client_test.c 2010-01-11 13:15:28 +0000
+++ b/tests/mysql_client_test.c 2010-05-09 19:30:06 +0000
@@ -18092,6 +18092,50 @@ static void test_bug44495()
DBUG_VOID_RETURN;
}
+static void test_bug53371()
+{
+ int rc;
+ MYSQL_RES *result;
+
+ myheader("test_bug53371");
+
+ rc= mysql_query(mysql, "DROP TABLE IF EXISTS t1");
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP DATABASE IF EXISTS bug53371");
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP USER 'testbug'@localhost");
+
+ rc= mysql_query(mysql, "CREATE TABLE t1 (a INT)");
+ myquery(rc);
+ rc= mysql_query(mysql, "CREATE DATABASE bug53371");
+ myquery(rc);
+ rc= mysql_query(mysql, "GRANT SELECT ON bug53371.* to 'testbug'@localhost");
+ myquery(rc);
+
+ rc= mysql_change_user(mysql, "testbug", NULL, "bug53371");
+ myquery(rc);
+
+ rc= mysql_query(mysql, "SHOW COLUMNS FROM client_test_db.t1");
+ DIE_UNLESS(rc);
+ DIE_UNLESS(mysql_errno(mysql) == 1142);
+
+ result= mysql_list_fields(mysql, "../client_test_db/t1", NULL);
+ DIE_IF(result);
+
+ result= mysql_list_fields(mysql, "#mysql50#/../client_test_db/t1", NULL);
+ DIE_IF(result);
+
+ rc= mysql_change_user(mysql, opt_user, opt_password, current_db);
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP TABLE t1");
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP DATABASE bug53371");
+ myquery(rc);
+ rc= mysql_query(mysql, "DROP USER 'testbug'@localhost");
+ myquery(rc);
+}
+
+
/*
Read and parse arguments and MySQL options from my.cnf
*/
@@ -18401,6 +18445,7 @@ static struct my_tests_st my_tests[]= {
{ "test_bug30472", test_bug30472 },
{ "test_bug20023", test_bug20023 },
{ "test_bug45010", test_bug45010 },
+ { "test_bug53371", test_bug53371 },
{ "test_bug31418", test_bug31418 },
{ "test_bug31669", test_bug31669 },
{ "test_bug28386", test_bug28386 },
1
0
The following buildbots will be unavailable until May 12th:
adutko-centos5-amd64
adutko-ultrasparc3
mariadb-brs
Sorry for the inconvenience.
-Adam
1
0

08 May '10
Hi
You should check this link, as it has everything on storage engine API:
http://forge.mysql.com/wiki/MySQL_Internals_Custom_Engine
Also; you should check the example storage engine source code
(storage/example) for bootstrapping, so that you can get your dummy storage
engine ready in few minutes.
Once you start, it will be real fun and easy to start though...
Have fun
Venu
On Mon, Apr 19, 2010 at 1:35 PM, Igor K <igor175(a)gmail.com> wrote:
> Dear all,
>
> I am new to the community and would like to ask a question:
>
> What is the best approach, in your opinion, to start learning storage
> engine API? So that one can write a dummy storage engine in no time.
>
> Advices from the experts as well as various experiences from life are
> welcomed.
>
> Igor Kozachenko
> University of California, Berkeley
>
> _______________________________________________
> Mailing list: https://launchpad.net/~drizzle-discuss<https://launchpad.net/%7Edrizzle-discuss>
> Post to : drizzle-discuss(a)lists.launchpad.net
> Unsubscribe : https://launchpad.net/~drizzle-discuss<https://launchpad.net/%7Edrizzle-discuss>
> More help : https://help.launchpad.net/ListHelp
>
4
4
FYI: I just updated the Release Process page on the wiki. I removed
some outdated information and added some new information (such as
sending a note to maria-docs(a)lists.launchpad.net when a tree is ready
for release instead of to docs(a)askmonty.org)
http://askmonty.org/wiki/Release_Process
Take a look if you are interested and let me know of any errors or
omissions.
Thanks.
--
Daniel Bartholomew
Monty Program - http://askmonty.org
1
0

Re: [Maria-developers] [Commits] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (knielsen:2851)
by Sergei Golubchik 05 May '10
by Sergei Golubchik 05 May '10
05 May '10
Hi, knielsen!
On May 05, knielsen(a)knielsen-hq.org wrote:
> #At lp:maria
>
> 2851 knielsen(a)knielsen-hq.org 2010-05-05
> Change commit mails to go to commits(a)mariadb.org
> modified:
> .bzr-mysql/default.conf
>
> === modified file '.bzr-mysql/default.conf'
> --- a/.bzr-mysql/default.conf 2010-03-04 08:03:07 +0000
> +++ b/.bzr-mysql/default.conf 2010-05-05 12:58:26 +0000
> @@ -1,6 +1,6 @@
> [MYSQL]
> tree_location = lp:maria
> -post_commit_to = maria-developers(a)lists.launchpad.net
> +post_commit_to = commits(a)mariadb.org
> post_commit_url = lp:maria
> tree_name = maria
> project_name = "MariaDB 5.1, with Maria 1.5"
Don't bother chaning it - it only affects internal MySQL bzr plugin, we
do not (and can not) use it.
Regards,
Sergei
1
0

[Maria-developers] Progress (by Knielsen): Store in binlog text of statements that caused RBR events (47)
by worklog-noreply@askmonty.org 05 May '10
by worklog-noreply@askmonty.org 05 May '10
05 May '10
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Store in binlog text of statements that caused RBR events
CREATION DATE..: Sat, 15 Aug 2009, 23:48
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Knielsen, Serg
CATEGORY.......: Server-Sprint
TASK ID........: 47 (http://askmonty.org/worklog/?tid=47)
VERSION........: Server-9.x
STATUS.........: Code-Review
PRIORITY.......: 60
WORKED HOURS...: 29
ESTIMATE.......: 6 (hours remain)
ORIG. ESTIMATE.: 35
PROGRESS NOTES:
-=-=(Knielsen - Wed, 05 May 2010, 13:53)=-=-
Review of fixes to first review done. No new issues found.
Worked 2 hours and estimate 6 hours remain (original estimate unchanged).
-=-=(Knielsen - Fri, 23 Apr 2010, 12:51)=-=-
Status updated.
--- /tmp/wklog.47.old.28747 2010-04-23 12:51:36.000000000 +0000
+++ /tmp/wklog.47.new.28747 2010-04-23 12:51:36.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Knielsen - Tue, 06 Apr 2010, 15:26)=-=-
Code review (mailed to maria-developers@).
Worked 7 hours and estimate 8 hours remain (original estimate unchanged).
-=-=(Knielsen - Tue, 06 Apr 2010, 15:25)=-=-
Status updated.
--- /tmp/wklog.47.old.12734 2010-04-06 15:25:54.000000000 +0000
+++ /tmp/wklog.47.new.12734 2010-04-06 15:25:54.000000000 +0000
@@ -1 +1 @@
-Code-Review
+In-Progress
-=-=(Knielsen - Mon, 29 Mar 2010, 10:59)=-=-
Status updated.
--- /tmp/wklog.47.old.27790 2010-03-29 10:59:53.000000000 +0000
+++ /tmp/wklog.47.new.27790 2010-03-29 10:59:53.000000000 +0000
@@ -1 +1 @@
-In-Progress
+Code-Review
-=-=(Alexi - Thu, 18 Feb 2010, 19:29)=-=-
Worked 20 hours (alexi)
Worked 20 hours and estimate 15 hours remain (original estimate unchanged).
-=-=(Serg - Fri, 05 Feb 2010, 14:04)=-=-
Observers changed: Knielsen,Serg
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Category updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Server-RawIdeaBin
+Server-Sprint
-=-=(Guest - Fri, 05 Feb 2010, 13:40)=-=-
Status updated.
--- /tmp/wklog.47.old.9197 2010-02-05 13:40:36.000000000 +0200
+++ /tmp/wklog.47.new.9197 2010-02-05 13:40:36.000000000 +0200
@@ -1 +1 @@
-Un-Assigned
+In-Progress
-=-=(Alexi - Thu, 04 Feb 2010, 09:54)=-=-
Low Level Design modified.
--- /tmp/wklog.47.old.16174 2010-02-04 09:54:13.000000000 +0200
+++ /tmp/wklog.47.new.16174 2010-02-04 09:54:13.000000000 +0200
@@ -171,35 +171,20 @@
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When requesting an event, the slave should inform the master whether
-it should send Annotate_rows events or not. To that end we add a new
-BINLOG_SEND_ANNOTATE_ROWS_EVENT flag used when requesting an event:
+If the replicate-annotate-rows-events option is not set on a slave, there
+is no need for master to send Annotate_rows events to this slave. The slave
+(or mysqlbinlog in remote case), before requesting binlog dump via the
+COM_BINLOG_DUMP command, informs the master whether it should send these
+events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
+command:
+
+ case COM_BINLOG_DUMP_OPTIONS_EXT:
+ thd->binlog_dump_flags_ext= packet[0];
+ my_ok(thd);
+ break;
- #define BINLOG_DUMP_NON_BLOCK 1
- #define BINLOG_SEND_ANNOTATE_ROWS_EVENT 2
-
- pthread_handler_t handle_slave_io(void *arg)
- { ...
- request_dump(mysql, ...);
- ...
- }
-
- int request_dump(MYSQL* mysql, ...)
- { ...
- if (opt_log_slave_updates &&
- mi->io_thd->variables.binlog_annotate_rows_events)
- binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT;
- ...
- int2store(buf + 4, binlog_flags);
- ...
- simple_command(mysql, COM_BINLOG_DUMP, buf, ...);
- ...
- }
-
-NOTE. mysqlbinlog, when remotely requesting BINLOG_DUMP by calling the
-simple_command() function, should also use this flag if it wants (in case
-of the --print-annotate-rows-events option set) to recieve Annotate_rows
-events.
+Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
+conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -338,10 +323,4 @@
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
-Also we should notice the introduction of the BINLOG_SEND_ANNOTATE_ROWS_EVENT
-flag taking into account that MySQL/Sun may also introduce a flag with the
-same value to be used in the request_dump-mysql_binlog_send interface.
-But this is mainly the question of merging: if a conflict concerning this
-flag occur, we may simply change the BINLOG_SEND_ANNOTATE_ROWS_EVENT value
-(this does not require additional changes in the code).
------------------------------------------------------------
-=-=(View All Progress Notes, 29 total)=-=-
http://askmonty.org/worklog/index.pl?tid=47&nolimit=1
DESCRIPTION:
Store in binlog (and show in mysqlbinlog output) texts of statements that
caused RBR events
This is needed for (list from Monty):
- Easier to understand why updates happened
- Would make it easier to find out where in application things went
wrong (as you can search for exact strings)
- Allow one to filter things based on comments in the statement.
The cost of this can be that the binlog will be approximately 2x in size
(especially insert of big blob's would be a bit painful), so this should
be an optional feature.
HIGH-LEVEL SPECIFICATION:
Content
~~~~~~~
1. Annotate_rows_log_event
2. Server option: --binlog-annotate-rows-events
3. Server option: --replicate-annotate-rows-events
4. mysqlbinlog option: --print-annotate-rows-events
5. mysqlbinlog output
1. Annotate_rows_log_event [ ANNOTATE_ROWS_EVENT ]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Describes the query which caused the corresponding rows events. Has empty
post-header and contains the query text in its data part. Example:
************************
ANNOTATE_ROWS_EVENT
************************
00000220 | B6 A0 2C 4B | time_when = 1261215926
00000224 | 33 | event_type = 51
00000225 | 64 00 00 00 | server_id = 100
00000229 | 36 00 00 00 | event_len = 54
0000022D | 56 02 00 00 | log_pos = 00000256
00000231 | 00 00 | flags = <none>
------------------------
00000233 | 49 4E 53 45 | query = "INSERT INTO t1 VALUES (1), (2), (3)"
00000237 | 52 54 20 49 |
0000023B | 4E 54 4F 20 |
0000023F | 74 31 20 56 |
00000243 | 41 4C 55 45 |
00000247 | 53 20 28 31 |
0000024B | 29 2C 20 28 |
0000024F | 32 29 2C 20 |
00000253 | 28 33 29 |
************************
In binary log, Annotate_rows event follows the (possible) 'BEGIN' Query event
and precedes the first of Table map events which accompany the corresponding
rows events. (See example in the "mysqlbinlog output" section below.)
2. Server option: --binlog-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the master to write Annotate_rows events to the binary log.
* Variable Name: binlog_annotate_rows_events
* Scope: Global & Session
* Access Type: Dynamic
* Data Type: bool
* Default Value: OFF
NOTE. Session values allows to annotate only some selected statements:
...
SET SESSION binlog_annotate_rows_events=ON;
... statements to be annotated ...
SET SESSION binlog_annotate_rows_events=OFF;
... statements not to be annotated ...
3. Server option: --replicate-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Tells the slave to reproduce Annotate_rows events recieved from the master
in its own binary log (sensible only in pair with log-slave-updates option).
* Variable Name: replicate_annotate_rows_events
* Scope: Global
* Access Type: Read only
* Data Type: bool
* Default Value: OFF
NOTE. Why do we additionally need this 'replicate' option? Why not to make
the slave to reproduce this events when its binlog-annotate-rows-events
global value is ON? Well, because, for example, we may want to configure
the slave which should reproduce Annotate_rows events but has global
binlog-annotate-rows-events = OFF meaning this to be the default value for
the client threads (see also "How slave treats replicate-annotate-rows-events
option" in LLD part).
4. mysqlbinlog option: --print-annotate-rows-events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
With this option, mysqlbinlog prints the content of Annotate_rows events (if
the binary log does contain them). Without this option (i.e. by default),
mysqlbinlog skips Annotate_rows events.
5. mysqlbinlog output
~~~~~~~~~~~~~~~~~~~~~
With --print-annotate-rows-events, mysqlbinlog outputs Annotate_rows events
in a form like this:
...
# at 1646
#091219 12:45:26 server id 100 end_log_pos 1714 Query thread_id=1
exec_time=0 error_code=0
SET TIMESTAMP=1261215926/*!*/;
BEGIN
/*!*/;
# at 1714
# at 1812
# at 1853
# at 1894
# at 1938
#091219 12:45:26 server id 100 end_log_pos 1812 Query: `DELETE t1, t2 FROM
t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a`
#091219 12:45:26 server id 100 end_log_pos 1853 Table_map: `test`.`t1`
mapped to number 16
#091219 12:45:26 server id 100 end_log_pos 1894 Table_map: `test`.`t2`
mapped to number 17
#091219 12:45:26 server id 100 end_log_pos 1938 Delete_rows: table id 16
#091219 12:45:26 server id 100 end_log_pos 1982 Delete_rows: table id 17
flags: STMT_END_F
...
LOW-LEVEL DESIGN:
Content
~~~~~~~
1. Annotate_rows event number
2. Outline of Annotate_rows event behavior
3. How Master writes Annotate_rows events to the binary log
4. How slave treats replicate-annotate-rows-events option
5. How slave IO thread requests Annotate_rows events
6. How master executes the request
7. How slave SQL thread processes Annotate_rows events
8. General remarks
1. Annotate_rows event number
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To avoid possible event numbers conflict with MySQL/Sun, we leave a gap
between the last MySQL event number and the Annotate_rows event number:
enum Log_event_type
{ ...
INCIDENT_EVENT= 26,
// New MySQL event numbers are to be added here
MYSQL_EVENTS_END,
MARIA_EVENTS_BEGIN= 51,
// New Maria event numbers start from here
ANNOTATE_ROWS_EVENT= 51,
ENUM_END_EVENT
};
together with the corresponding extension of 'post_header_len' array in the
Format description event. (This extension does not affect the compatibility
of the binary log). Here is how Format description event looks like with
this extension:
************************
FORMAT_DESCRIPTION_EVENT
************************
00000004 | A1 A0 2C 4B | time_when = 1261215905
00000008 | 0F | event_type = 15
00000009 | 64 00 00 00 | server_id = 100
0000000D | 7F 00 00 00 | event_len = 127
00000011 | 83 00 00 00 | log_pos = 00000083
00000015 | 01 00 | flags = LOG_EVENT_BINLOG_IN_USE_F
------------------------
00000017 | 04 00 | binlog_ver = 4
00000019 | 35 2E 32 2E | server_ver = 5.2.0-MariaDB-alpha-debug-log
..... ...
0000004B | A1 A0 2C 4B | time_created = 1261215905
0000004F | 13 | common_header_len = 19
------------------------
post_header_len
------------------------
00000050 | 38 | 56 - START_EVENT_V3 [1]
..... ...
00000069 | 02 | 2 - INCIDENT_EVENT [26]
0000006A | 00 | 0 - RESERVED [27]
..... ...
00000081 | 00 | 0 - RESERVED [50]
00000082 | 00 | 0 - ANNOTATE_ROWS_EVENT [51]
************************
2. Outline of Annotate_rows event behavior
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each Annotate_rows_log_event object has two private members describing the
corresponding query:
char *m_query_txt;
uint m_query_len;
When the object is created for writing to a binary log, this query is taken
from 'thd' (for short, below we omit the 'Annotate_rows_log_event::' prefix
as well as other implementation details):
Annotate_rows_log_event(THD *thd)
{
m_query_txt = thd->query();
m_query_len = thd->query_length();
}
When the object is read from a binary log, the query is taken from the buffer
containing the binary log representation of the event (this buffer is allocated
in Log_event object from which all Log events are derived):
Annotate_rows_log_event(char *buf, uint event_len,
Format_description_log_event *desc)
{
m_query_len = event_len - desc->common_header_len;
m_query_txt = buf + desc->common_header_len;
}
The events are written to the binary log by the Log_event::write() member
which calls virtual write_data_header() and write_data_body() members
("data header" and "post header" are synonym in replication terminology).
In our case, data header is empty and data body is just the query:
bool write_data_body(IO_CACHE *file)
{
return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
}
Printing the event is just printing the query:
void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
{
my_b_printf(&pinfo->head_cache, "\tQuery: `%s`\n", m_query_txt);
}
3. How Master writes Annotate_rows events to the binary log
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The event is written to the binary log just before the group of Table_map
events which precede corresponding Rows events (one query may generate
several Table map events in the binary log, but the corresponding
Annotate_rows event must be written only once before the first Table map
event; hence the boolean variable 'with_annotate' below):
int write_locked_table_maps(THD *thd)
{ ...
bool with_annotate= thd->variables.binlog_annotate_rows_events;
...
for (uint i= 0; i < ... <number of tables> ...; ++i)
{ ...
thd->binlog_write_table_map(table, ..., with_annotate);
with_annotate= 0; // write Annotate_event not more than once
...
}
...
}
int THD::binlog_write_table_map(TABLE *table, ..., bool with_annotate)
{ ...
Table_map_log_event the_event(...);
...
if (with_annotate)
{
Annotate_rows_log_event anno(this);
mysql_bin_log.write(&anno);
}
mysql_bin_log.write(&the_event);
...
}
4. How slave treats replicate-annotate-rows-events option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The replicate-annotate-rows-events option is treated just as the session
value of the binlog_annotate_rows_events variable for the slave IO and
SQL threads. This setting is done during initialization of these threads:
pthread_handler_t handle_slave_io(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_IO);
...
}
pthread_handler_t handle_slave_sql(void *arg)
{
THD *thd= new THD;
...
init_slave_thread(thd, SLAVE_THD_SQL);
...
}
int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
{ ...
thd->variables.binlog_annotate_rows_events=
opt_replicate_annotate_rows_events;
...
}
5. How slave IO thread requests Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If the replicate-annotate-rows-events option is not set on a slave, there
is no need for master to send Annotate_rows events to this slave. The slave
(or mysqlbinlog in remote case), before requesting binlog dump via the
COM_BINLOG_DUMP command, informs the master whether it should send these
events by executing the newly added COM_BINLOG_DUMP_OPTIONS_EXT server
command:
case COM_BINLOG_DUMP_OPTIONS_EXT:
thd->binlog_dump_flags_ext= packet[0];
my_ok(thd);
break;
Note. We add this new command and don't use COM_BINLOG_DUMP to avoid possible
conflicts with MySQL/Sun.
6. How master executes the request
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
case COM_BINLOG_DUMP:
{ ...
flags= uint2korr(packet + 4);
...
mysql_binlog_send(thd, ..., flags);
...
}
void mysql_binlog_send(THD* thd, ..., ushort flags)
{ ...
Log_event::read_log_event(&log, packet, ...);
...
if ((*packet)[EVENT_TYPE_OFFSET + 1] != ANNOTATE_ROWS_EVENT ||
flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)
{
my_net_write(net, packet->ptr(), packet->length());
}
...
}
7. How slave SQL thread processes Annotate_rows events
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The slave processes each recieved event by "applying" it, i.e. by
calling the Log_event::apply_event() function which in turn calls
the virtual do_apply_event() member specific for each type of the
event.
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev = next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
int apply_event_and_update_pos(Log_event *ev, ...)
{ ...
ev->apply_event(...);
...
}
int Log_event::apply_event(...)
{
return do_apply_event(...);
}
What does it mean to "apply" an Annotate_rows event? It means to set current
thd query to that of the described by the event, i.e. to the query which
caused the subsequent Rows events (see "How Master writes Annotate_rows
events to the binary log" to follow what happens further when the subsequent
Rows events are applied):
int Annotate_rows_log_event::do_apply_event(...)
{
thd->set_query(m_query_txt, m_query_len);
}
NOTE. I am not sure, but possibly current values of thd->query and
thd->query_length should be saved before calling set_query() and to be
restored on the Annotate_rows_log_event object deletion.
Is it really needed ?
After calling this do_apply_event() function we may not delete the
Annotate_rows_log_event object immediatedly (see exec_relay_log_event()
above) because thd->query now points to the string inside this object.
We may keep the pointer to this object in the Relay_log_info:
class Relay_log_info
{
public:
...
void set_annotate_event(Annotate_rows_log_event*);
Annotate_rows_log_event* get_annotate_event();
void free_annotate_event();
...
private:
Annotate_rows_log_event* m_annotate_event;
};
The saved Annotate_rows object should be deleted when all corresponding
Rows events will be processed:
int exec_relay_log_event(THD* thd, Relay_log_info* rli)
{ ...
Log_event *ev= next_event(rli);
...
apply_event_and_update_pos(ev, ...);
if (rli->get_annotate_event() && is_last_rows_event(ev))
rli->free_annotate_event();
else if (ev->get_type_code() == ANNOTATE_ROWS_EVENT)
rli->set_annotate_event((Annotate_rows_log_event*) ev);
else if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
...
}
where
bool is_last_rows_event(Log_event* ev)
{
Log_event_type type= ev->get_type_code();
if (IS_ROWS_EVENT_TYPE(type))
{
Rows_log_event* rows= (Rows_log_event*)ev;
return rows->get_flags(Rows_log_event::STMT_END_F);
}
return 0;
}
#define IS_ROWS_EVENT_TYPE(type) ((type) == WRITE_ROWS_EVENT || \
(type) == UPDATE_ROWS_EVENT || \
(type) == DELETE_ROWS_EVENT)
8. General remarks
~~~~~~~~~~~~~~~~~~
Kristian noticed that introducing new log event type should be coordinated
somehow with MySQL/Sun:
Kristian: The numeric code for this event must be assigned carefully.
It should be coordinated with MySQL/Sun, otherwise we can get into a
situation where MySQL uses the same numeric code for one event that
MariaDB uses for ANNOTATE_ROWS_EVENT, which would make merging the two
impossible.
Alex: I reserved about 20 numbers not to have possible conflicts
with MySQL.
Kristian: Still, I think it would be appropriate to send a polite email
to internals(a)lists.mysql.com about this and suggesting to reserve the
event number.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0