developers
Threads by month
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- 8 participants
- 6811 discussions
[Maria-developers] Updated (by Guest): Table Elimination: remove the facts table (20)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table Elimination: remove the facts table
CREATION DATE..: Thu, 21 May 2009, 01:07
SUPERVISOR.....: Monty
IMPLEMENTOR....:
COPIES TO......: Psergey
CATEGORY.......: Server-RawIdeaBin
TASK ID........: 20 (http://askmonty.org/worklog/?tid=20)
VERSION........: Server-9.x
STATUS.........: Un-Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:29)=-=-
High-Level Specification modified.
--- /tmp/wklog.20.old.31038 2009-05-22 17:29:58.000000000 +0300
+++ /tmp/wklog.20.new.31038 2009-05-22 17:29:58.000000000 +0300
@@ -68,7 +68,7 @@
where
ACBDT_Birthdate < '1950-01-01'
-At this point, it's possible to conclude that table anchor can be removed from
+At this point, it's possible to conclude that table actor can be removed from
the query plan: anchor model specifies that actor_name has a foreign key:
create table actor_name (
@@ -80,6 +80,18 @@
mean actors that have NULL/no names), but we're not interested in those as it
is an inner join.
+The result is (notice the changed select):
+
+select
+ actor_birthday.AC_ID,
+ actor_name.ACNAM_Name,
+ actor_birthdate.ACBDT_Birthdate
+from
+ actor_birthdate
+ left join actor_name on (actor_name.AC_ID = actor_birthday.AC_ID)
+where
+ ACBDT_Birthdate < '1950-01-01'
+
How to remove
-------------
* Find inner joins, such that
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
High-Level Specification modified.
--- /tmp/wklog.20.old.28489 2009-05-21 22:54:52.000000000 +0300
+++ /tmp/wklog.20.new.28489 2009-05-21 22:54:52.000000000 +0300
@@ -11,7 +11,7 @@
-- A physical attribute: actor name (non-historized for simplicity)
--
create table actor_name (
- AC_ID primary key,
+ AC_ID int primary key,
ACNAM_Name varchar(20),
index(ACNAM_Name),
foreign key (AC_ID) references actor(AC_ID),
@@ -21,7 +21,7 @@
-- Another physical attribute: actor birthdate
--
create table actor_birthdate (
- AC_ID primary key,
+ AC_ID int primary key,
ACBDT_Birthdate datetime,
index(ACBDT_Birthdate),
foreign key (AC_ID) references actor(AC_ID)
-=-=(Psergey - Thu, 21 May 2009, 01:07)=-=-
High-Level Specification modified.
--- /tmp/wklog.20.old.26317 2009-05-21 01:07:55.000000000 +0300
+++ /tmp/wklog.20.new.26317 2009-05-21 01:07:55.000000000 +0300
@@ -1 +1,91 @@
+Consider the following schema:
+
+--
+-- The physical anchor table
+--
+create table actor (
+ AC_ID int primary key
+);
+
+--
+-- A physical attribute: actor name (non-historized for simplicity)
+--
+create table actor_name (
+ AC_ID primary key,
+ ACNAM_Name varchar(20),
+ index(ACNAM_Name),
+ foreign key (AC_ID) references actor(AC_ID),
+);
+
+--
+-- Another physical attribute: actor birthdate
+--
+create table actor_birthdate (
+ AC_ID primary key,
+ ACBDT_Birthdate datetime,
+ index(ACBDT_Birthdate),
+ foreign key (AC_ID) references actor(AC_ID)
+);
+
+--
+-- The knot view: actor with its attributes
+--
+create view actor_attributes as
+select
+ actor.AC_ID,
+ actor_name.ACNAM_Name,
+ actor_birthdate.ACBDT_Birthdate
+ ... (other attributes follow)
+from
+ actor
+ left join actor_name on (actor_name.AC_ID = actor.AC_ID)
+ left join actor_birthdate on (actor_birthdate.AC_ID = actor.AC_ID)
+ ... other attributes follow ...
+;
+
+-- The query: names of senior actors
+select ACNAM_Name from actor_attributes where ACBDT_Birthdate < '1950-01-01'
+
+Query processing
+----------------
+The following steps will occur
+* The VIEW will be resolved with algorithm=merge
+* Outer-to-inner join conversion will make use of the
+ ACBDT_Birthdate < '1950-01-01' condition and change one outer join into
+ inner.
+* Table elimination variant#1 will remove all unused attributes.
+
+And we'll end up with:
+
+select
+ actor.AC_ID,
+ actor_name.ACNAM_Name,
+ actor_birthdate.ACBDT_Birthdate
+from
+ actor
+ join actor_birthdate on (actor_birthdate.AC_ID = actor.AC_ID)
+ left join actor_name on (actor_name.AC_ID = actor.AC_ID)
+where
+ ACBDT_Birthdate < '1950-01-01'
+
+At this point, it's possible to conclude that table anchor can be removed from
+the query plan: anchor model specifies that actor_name has a foreign key:
+
+ create table actor_name (
+ ...
+ foreign key (AC_ID) references actor(AC_ID),
+
+This means that each record in actor_name has exactly one match in table actor.
+There may be records in `actor` that have no matches in `actor_name` (they
+mean actors that have NULL/no names), but we're not interested in those as it
+is an inner join.
+
+How to remove
+-------------
+* Find inner joins, such that
+ - some table T can be accessed using eq_ref access method.
+ - some other table T2 has Foreign Key that refers to table T.
+ - there are no references to table T anywhere in the query, except for
+ references to T.primary_key, which can be substituted for columns of T2.
+
DESCRIPTION:
In certain cases it is possible to eliminate table when one is running inner
joins and there is a foreign key relationship between the tables.
HIGH-LEVEL SPECIFICATION:
Consider the following schema:
--
-- The physical anchor table
--
create table actor (
AC_ID int primary key
);
--
-- A physical attribute: actor name (non-historized for simplicity)
--
create table actor_name (
AC_ID int primary key,
ACNAM_Name varchar(20),
index(ACNAM_Name),
foreign key (AC_ID) references actor(AC_ID),
);
--
-- Another physical attribute: actor birthdate
--
create table actor_birthdate (
AC_ID int primary key,
ACBDT_Birthdate datetime,
index(ACBDT_Birthdate),
foreign key (AC_ID) references actor(AC_ID)
);
--
-- The knot view: actor with its attributes
--
create view actor_attributes as
select
actor.AC_ID,
actor_name.ACNAM_Name,
actor_birthdate.ACBDT_Birthdate
... (other attributes follow)
from
actor
left join actor_name on (actor_name.AC_ID = actor.AC_ID)
left join actor_birthdate on (actor_birthdate.AC_ID = actor.AC_ID)
... other attributes follow ...
;
-- The query: names of senior actors
select ACNAM_Name from actor_attributes where ACBDT_Birthdate < '1950-01-01'
Query processing
----------------
The following steps will occur
* The VIEW will be resolved with algorithm=merge
* Outer-to-inner join conversion will make use of the
ACBDT_Birthdate < '1950-01-01' condition and change one outer join into
inner.
* Table elimination variant#1 will remove all unused attributes.
And we'll end up with:
select
actor.AC_ID,
actor_name.ACNAM_Name,
actor_birthdate.ACBDT_Birthdate
from
actor
join actor_birthdate on (actor_birthdate.AC_ID = actor.AC_ID)
left join actor_name on (actor_name.AC_ID = actor.AC_ID)
where
ACBDT_Birthdate < '1950-01-01'
At this point, it's possible to conclude that table actor can be removed from
the query plan: anchor model specifies that actor_name has a foreign key:
create table actor_name (
...
foreign key (AC_ID) references actor(AC_ID),
This means that each record in actor_name has exactly one match in table actor.
There may be records in `actor` that have no matches in `actor_name` (they
mean actors that have NULL/no names), but we're not interested in those as it
is an inner join.
The result is (notice the changed select):
select
actor_birthday.AC_ID,
actor_name.ACNAM_Name,
actor_birthdate.ACBDT_Birthdate
from
actor_birthdate
left join actor_name on (actor_name.AC_ID = actor_birthday.AC_ID)
where
ACBDT_Birthdate < '1950-01-01'
How to remove
-------------
* Find inner joins, such that
- some table T can be accessed using eq_ref access method.
- some other table T2 has Foreign Key that refers to table T.
- there are no references to table T anywhere in the query, except for
references to T.primary_key, which can be substituted for columns of T2.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] Updated (by Guest): Table elimination (17)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table elimination
CREATION DATE..: Sun, 10 May 2009, 19:57
SUPERVISOR.....: Monty
IMPLEMENTOR....: Psergey
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 17 (http://askmonty.org/worklog/?tid=17)
VERSION........: Server-5.1
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:23)=-=-
High-Level Specification modified.
--- /tmp/wklog.17.old.30851 2009-05-22 17:23:38.000000000 +0300
+++ /tmp/wklog.17.new.30851 2009-05-22 17:23:38.000000000 +0300
@@ -6,7 +6,7 @@
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
-execution plan when it is unneccessary to include them. This can, of
+execution plan when it is unnecessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
@@ -22,30 +22,26 @@
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
-contain a NULL value.
+still contain it's original value. The not seen B.* row would contain all NULL:s.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
-A
-contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
+A contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
-what
-the result will look like is to actually touch both tables during
+what the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
-as
-many rows as there are in tableA, since joining with tableB cannot
+as many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
-unneccessary. We can remove the whole join operation from the execution
+unnecessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
-in
-the case described above. Let us look at a more advanced query, where
+in the case described above. Let us look at a more advanced query, where
Oracle fails.
select
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30176 2009-05-22 17:00:35.000000000 +0300
+++ /tmp/wklog.17.new.30176 2009-05-22 17:00:35.000000000 +0300
@@ -1 +1 @@
-Maria-2.0
+Server-5.1
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-Maria-Sprint
+Server-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-2.0
+9.x
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-9.x
+Maria-2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-RawIdeaBin
+Maria-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-5.2
+2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Status updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.28468 2009-05-21 22:54:28.000000000 +0300
+++ /tmp/wklog.17.new.28468 2009-05-21 22:54:28.000000000 +0300
@@ -77,7 +77,7 @@
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
- to limit the applicablity by removing [MARK1] as that can change during
+ to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
-=-=(Psergey - Tue, 19 May 2009, 20:20)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27126 2009-05-19 20:20:06.000000000 +0300
+++ /tmp/wklog.17.new.27126 2009-05-19 20:20:06.000000000 +0300
@@ -48,6 +48,10 @@
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
+* Update join->table_count and all-join-tables bitmap.
+
+* That's it. Nothing else?
+
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
------------------------------------------------------------
-=-=(View All Progress Notes, 13 total)=-=-
http://askmonty.org/worklog/index.pl?tid=17&nolimit=1
DESCRIPTION:
Eliminate not needed tables from SELECT queries..
This will speed up some views and automatically generated queries.
Example:
CREATE TABLE B (id int primary key);
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
In this case we can remove table B and the join from the query.
HIGH-LEVEL SPECIFICATION:
Here is an extended explanation of table elimination.
Table elimination is a feature found in some modern query optimizers, of
which Microsoft SQL Server 2005/2008 seems to have the most advanced
implementation. Oracle 11g has also been confirmed to use table
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
execution plan when it is unnecessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
still contain it's original value. The not seen B.* row would contain all NULL:s.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
A contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
what the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
as many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
unnecessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
in the case described above. Let us look at a more advanced query, where
Oracle fails.
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id
and
B.fromDate = (
select
max(sub.fromDate)
from
tableB sub
where
sub.id = A.id
);
In this example we have added another join condition, which ensures
that we only pick the matching row from tableB having the latest
fromDate. In this case tableB will contain duplicates of the column
B.id, so in order to ensure uniqueness the primary key has to contain
the fromDate column as well. In other words the primary key of tableB
is (B.id, B.fromDate).
Furthermore, since the subselect ensures that we only pick the latest
B.fromDate for a given B.id we know that at most one row will match
the join condition. We will again have the situation where joining
with tableB cannot affect the number of rows in the result set. Since
we do not select any columns from tableB, the whole join operation can
be eliminated from the execution plan.
SQL Server 2005/2008 will deploy table elimination in this situation as
well. We have not found a way to make Oracle 11g use it for this type of
query. Queries like these arise in two situations. Either when you have
denormalized model consisting of a fact table with several related
dimension tables, or when you have a highly normalized model where each
attribute is stored in its own table. The example with the subselect is
common whenever you store historized/versioned data.
LOW-LEVEL DESIGN:
<contents>
1. Conditions for removal
2. Removal operation properties
3. Removal operation
4. User interface
5. Todo, issues to resolve
5.1 To resolve
5.2 Resolved
</contents>
It's not really about elimination of tables, it's about elimination of inner
sides of outer joins.
1. Conditions for removal
-------------------------
We can eliminate an inner side of outer join if:
1. For each record combination of outer tables, it will always produce
exactly one record.
2. There are no references to columns of the inner tables anywhere else in
the query.
#1 means that every table inside the outer join nest is:
- is a constant table:
= because it can be accessed via eq_ref(const) access, or
= it is a zero-rows or one-row MyISAM-like table [MARK1]
- has an eq_ref access method candidate.
#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
GROUP BY and HAVING do not refer to the inner tables of the outer join
nest.
2. Removal operation properties
-------------------------------
* There is always one way to remove (no choice to remove either this or that)
* It is always better to remove as much tables as possible (at least within
our cost model).
Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
3. Removal operation
--------------------
* Remove the outer join nest's nested join structure (i.e. get the
outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
$OJ->embedding->nested_join. Update table_map's of all ancestor nested
joins). [MARK2]
* Move the tables and their JOIN_TABs to front like it is done with const
tables, with exception that if eliminated outer join nest was within
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
* Update join->table_count and all-join-tables bitmap.
* That's it. Nothing else?
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
name: 'table_elimination'.
* With EXPLAIN, there are two options:
- Show removed tables in a way similar to const tables, with some
indication that they are removed.
- Do not show them altogether.
(the second one seems to be better? We're targeting a situation with VIEWs,
where the user would not care about what tables were added into his query
and then discarded from it?)
5. Todo, issues to resolve
--------------------------
5.1 To resolve
~~~~~~~~~~~~~~
- See EXPLAIN question in section #4.
- Re-check how this works with equality propagation.
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
rework operation [MARK2] to be undoable.
I'm leaning towards doing the former. With anchor modeling, it is unlikely
that we'll meet outer joins which have N inner tables of which some are 1-row
MyISAM tables that do not have primary key.
5.2 Resolved
~~~~~~~~~~~~
- outer->inner join conversion is not a problem for table elimination.
We make outer->inner conversions based on predicates in WHERE. If the WHERE
referred to an inner table (requirement for OJ->IJ conversion) then table
elimination would not be applicable anyway.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] Updated (by Guest): Table elimination (17)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table elimination
CREATION DATE..: Sun, 10 May 2009, 19:57
SUPERVISOR.....: Monty
IMPLEMENTOR....: Psergey
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 17 (http://askmonty.org/worklog/?tid=17)
VERSION........: Server-5.1
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:23)=-=-
High-Level Specification modified.
--- /tmp/wklog.17.old.30851 2009-05-22 17:23:38.000000000 +0300
+++ /tmp/wklog.17.new.30851 2009-05-22 17:23:38.000000000 +0300
@@ -6,7 +6,7 @@
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
-execution plan when it is unneccessary to include them. This can, of
+execution plan when it is unnecessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
@@ -22,30 +22,26 @@
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
-contain a NULL value.
+still contain it's original value. The not seen B.* row would contain all NULL:s.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
-A
-contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
+A contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
-what
-the result will look like is to actually touch both tables during
+what the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
-as
-many rows as there are in tableA, since joining with tableB cannot
+as many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
-unneccessary. We can remove the whole join operation from the execution
+unnecessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
-in
-the case described above. Let us look at a more advanced query, where
+in the case described above. Let us look at a more advanced query, where
Oracle fails.
select
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30176 2009-05-22 17:00:35.000000000 +0300
+++ /tmp/wklog.17.new.30176 2009-05-22 17:00:35.000000000 +0300
@@ -1 +1 @@
-Maria-2.0
+Server-5.1
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-Maria-Sprint
+Server-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-2.0
+9.x
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-9.x
+Maria-2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-RawIdeaBin
+Maria-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-5.2
+2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Status updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.28468 2009-05-21 22:54:28.000000000 +0300
+++ /tmp/wklog.17.new.28468 2009-05-21 22:54:28.000000000 +0300
@@ -77,7 +77,7 @@
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
- to limit the applicablity by removing [MARK1] as that can change during
+ to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
-=-=(Psergey - Tue, 19 May 2009, 20:20)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27126 2009-05-19 20:20:06.000000000 +0300
+++ /tmp/wklog.17.new.27126 2009-05-19 20:20:06.000000000 +0300
@@ -48,6 +48,10 @@
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
+* Update join->table_count and all-join-tables bitmap.
+
+* That's it. Nothing else?
+
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
------------------------------------------------------------
-=-=(View All Progress Notes, 13 total)=-=-
http://askmonty.org/worklog/index.pl?tid=17&nolimit=1
DESCRIPTION:
Eliminate not needed tables from SELECT queries..
This will speed up some views and automatically generated queries.
Example:
CREATE TABLE B (id int primary key);
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
In this case we can remove table B and the join from the query.
HIGH-LEVEL SPECIFICATION:
Here is an extended explanation of table elimination.
Table elimination is a feature found in some modern query optimizers, of
which Microsoft SQL Server 2005/2008 seems to have the most advanced
implementation. Oracle 11g has also been confirmed to use table
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
execution plan when it is unnecessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
still contain it's original value. The not seen B.* row would contain all NULL:s.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
A contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
what the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
as many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
unnecessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
in the case described above. Let us look at a more advanced query, where
Oracle fails.
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id
and
B.fromDate = (
select
max(sub.fromDate)
from
tableB sub
where
sub.id = A.id
);
In this example we have added another join condition, which ensures
that we only pick the matching row from tableB having the latest
fromDate. In this case tableB will contain duplicates of the column
B.id, so in order to ensure uniqueness the primary key has to contain
the fromDate column as well. In other words the primary key of tableB
is (B.id, B.fromDate).
Furthermore, since the subselect ensures that we only pick the latest
B.fromDate for a given B.id we know that at most one row will match
the join condition. We will again have the situation where joining
with tableB cannot affect the number of rows in the result set. Since
we do not select any columns from tableB, the whole join operation can
be eliminated from the execution plan.
SQL Server 2005/2008 will deploy table elimination in this situation as
well. We have not found a way to make Oracle 11g use it for this type of
query. Queries like these arise in two situations. Either when you have
denormalized model consisting of a fact table with several related
dimension tables, or when you have a highly normalized model where each
attribute is stored in its own table. The example with the subselect is
common whenever you store historized/versioned data.
LOW-LEVEL DESIGN:
<contents>
1. Conditions for removal
2. Removal operation properties
3. Removal operation
4. User interface
5. Todo, issues to resolve
5.1 To resolve
5.2 Resolved
</contents>
It's not really about elimination of tables, it's about elimination of inner
sides of outer joins.
1. Conditions for removal
-------------------------
We can eliminate an inner side of outer join if:
1. For each record combination of outer tables, it will always produce
exactly one record.
2. There are no references to columns of the inner tables anywhere else in
the query.
#1 means that every table inside the outer join nest is:
- is a constant table:
= because it can be accessed via eq_ref(const) access, or
= it is a zero-rows or one-row MyISAM-like table [MARK1]
- has an eq_ref access method candidate.
#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
GROUP BY and HAVING do not refer to the inner tables of the outer join
nest.
2. Removal operation properties
-------------------------------
* There is always one way to remove (no choice to remove either this or that)
* It is always better to remove as much tables as possible (at least within
our cost model).
Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
3. Removal operation
--------------------
* Remove the outer join nest's nested join structure (i.e. get the
outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
$OJ->embedding->nested_join. Update table_map's of all ancestor nested
joins). [MARK2]
* Move the tables and their JOIN_TABs to front like it is done with const
tables, with exception that if eliminated outer join nest was within
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
* Update join->table_count and all-join-tables bitmap.
* That's it. Nothing else?
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
name: 'table_elimination'.
* With EXPLAIN, there are two options:
- Show removed tables in a way similar to const tables, with some
indication that they are removed.
- Do not show them altogether.
(the second one seems to be better? We're targeting a situation with VIEWs,
where the user would not care about what tables were added into his query
and then discarded from it?)
5. Todo, issues to resolve
--------------------------
5.1 To resolve
~~~~~~~~~~~~~~
- See EXPLAIN question in section #4.
- Re-check how this works with equality propagation.
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
rework operation [MARK2] to be undoable.
I'm leaning towards doing the former. With anchor modeling, it is unlikely
that we'll meet outer joins which have N inner tables of which some are 1-row
MyISAM tables that do not have primary key.
5.2 Resolved
~~~~~~~~~~~~
- outer->inner join conversion is not a problem for table elimination.
We make outer->inner conversions based on predicates in WHERE. If the WHERE
referred to an inner table (requirement for OJ->IJ conversion) then table
elimination would not be applicable anyway.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] Updated (by Guest): Table elimination (17)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table elimination
CREATION DATE..: Sun, 10 May 2009, 19:57
SUPERVISOR.....: Monty
IMPLEMENTOR....: Psergey
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 17 (http://askmonty.org/worklog/?tid=17)
VERSION........: Server-5.1
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30176 2009-05-22 17:00:35.000000000 +0300
+++ /tmp/wklog.17.new.30176 2009-05-22 17:00:35.000000000 +0300
@@ -1 +1 @@
-Maria-2.0
+Server-5.1
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-Maria-Sprint
+Server-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-2.0
+9.x
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-9.x
+Maria-2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-RawIdeaBin
+Maria-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-5.2
+2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Status updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.28468 2009-05-21 22:54:28.000000000 +0300
+++ /tmp/wklog.17.new.28468 2009-05-21 22:54:28.000000000 +0300
@@ -77,7 +77,7 @@
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
- to limit the applicablity by removing [MARK1] as that can change during
+ to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
-=-=(Psergey - Tue, 19 May 2009, 20:20)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27126 2009-05-19 20:20:06.000000000 +0300
+++ /tmp/wklog.17.new.27126 2009-05-19 20:20:06.000000000 +0300
@@ -48,6 +48,10 @@
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
+* Update join->table_count and all-join-tables bitmap.
+
+* That's it. Nothing else?
+
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
-=-=(Psergey - Tue, 19 May 2009, 20:18)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27035 2009-05-19 20:18:27.000000000 +0300
+++ /tmp/wklog.17.new.27035 2009-05-19 20:18:27.000000000 +0300
@@ -1 +1,92 @@
+<contents>
+1. Conditions for removal
+2. Removal operation properties
+3. Removal operation
+4. User interface
+5. Todo, issues to resolve
+5.1 To resolve
+5.2 Resolved
+</contents>
+
+It's not really about elimination of tables, it's about elimination of inner
+sides of outer joins.
+
+1. Conditions for removal
+-------------------------
+We can eliminate an inner side of outer join if:
+1. For each record combination of outer tables, it will always produce
+ exactly one record.
+2. There are no references to columns of the inner tables anywhere else in
+ the query.
+
+#1 means that every table inside the outer join nest is:
+ - is a constant table:
+ = because it can be accessed via eq_ref(const) access, or
+ = it is a zero-rows or one-row MyISAM-like table [MARK1]
+ - has an eq_ref access method candidate.
+
+#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
+ GROUP BY and HAVING do not refer to the inner tables of the outer join
+ nest.
+
+2. Removal operation properties
+-------------------------------
+* There is always one way to remove (no choice to remove either this or that)
+* It is always better to remove as much tables as possible (at least within
+ our cost model).
+Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
+
+3. Removal operation
+--------------------
+* Remove the outer join nest's nested join structure (i.e. get the
+ outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
+ $OJ->embedding->nested_join. Update table_map's of all ancestor nested
+ joins). [MARK2]
+
+* Move the tables and their JOIN_TABs to front like it is done with const
+ tables, with exception that if eliminated outer join nest was within
+ another outer join nest, that shouldn't prevent us from moving away the
+ eliminated tables.
+
+4. User interface
+-----------------
+* We'll add an @@optimizer switch flag for table elimination. Tentative
+ name: 'table_elimination'.
+
+* With EXPLAIN, there are two options:
+ - Show removed tables in a way similar to const tables, with some
+ indication that they are removed.
+ - Do not show them altogether.
+(the second one seems to be better? We're targeting a situation with VIEWs,
+where the user would not care about what tables were added into his query
+and then discarded from it?)
+
+5. Todo, issues to resolve
+--------------------------
+
+5.1 To resolve
+~~~~~~~~~~~~~~
+- See EXPLAIN question in section #4.
+
+- Re-check how this works with equality propagation.
+
+- Relationship with prepared statements.
+ On one hand, it's natural to desire to make table elimination a
+ once-per-statement operation, like outer->inner join conversion. We'll have
+ to limit the applicablity by removing [MARK1] as that can change during
+ lifetime of the statement.
+
+ The other option is to do table elimination every time. This will require to
+ rework operation [MARK2] to be undoable.
+
+ I'm leaning towards doing the former. With anchor modeling, it is unlikely
+ that we'll meet outer joins which have N inner tables of which some are 1-row
+ MyISAM tables that do not have primary key.
+
+5.2 Resolved
+~~~~~~~~~~~~
+- outer->inner join conversion is not a problem for table elimination.
+ We make outer->inner conversions based on predicates in WHERE. If the WHERE
+ referred to an inner table (requirement for OJ->IJ conversion) then table
+ elimination would not be applicable anyway.
------------------------------------------------------------
-=-=(View All Progress Notes, 12 total)=-=-
http://askmonty.org/worklog/index.pl?tid=17&nolimit=1
DESCRIPTION:
Eliminate not needed tables from SELECT queries..
This will speed up some views and automatically generated queries.
Example:
CREATE TABLE B (id int primary key);
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
In this case we can remove table B and the join from the query.
HIGH-LEVEL SPECIFICATION:
Here is an extended explanation of table elimination.
Table elimination is a feature found in some modern query optimizers, of
which Microsoft SQL Server 2005/2008 seems to have the most advanced
implementation. Oracle 11g has also been confirmed to use table
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
execution plan when it is unneccessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
contain a NULL value.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
A
contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
what
the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
as
many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
unneccessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
in
the case described above. Let us look at a more advanced query, where
Oracle fails.
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id
and
B.fromDate = (
select
max(sub.fromDate)
from
tableB sub
where
sub.id = A.id
);
In this example we have added another join condition, which ensures
that we only pick the matching row from tableB having the latest
fromDate. In this case tableB will contain duplicates of the column
B.id, so in order to ensure uniqueness the primary key has to contain
the fromDate column as well. In other words the primary key of tableB
is (B.id, B.fromDate).
Furthermore, since the subselect ensures that we only pick the latest
B.fromDate for a given B.id we know that at most one row will match
the join condition. We will again have the situation where joining
with tableB cannot affect the number of rows in the result set. Since
we do not select any columns from tableB, the whole join operation can
be eliminated from the execution plan.
SQL Server 2005/2008 will deploy table elimination in this situation as
well. We have not found a way to make Oracle 11g use it for this type of
query. Queries like these arise in two situations. Either when you have
denormalized model consisting of a fact table with several related
dimension tables, or when you have a highly normalized model where each
attribute is stored in its own table. The example with the subselect is
common whenever you store historized/versioned data.
LOW-LEVEL DESIGN:
<contents>
1. Conditions for removal
2. Removal operation properties
3. Removal operation
4. User interface
5. Todo, issues to resolve
5.1 To resolve
5.2 Resolved
</contents>
It's not really about elimination of tables, it's about elimination of inner
sides of outer joins.
1. Conditions for removal
-------------------------
We can eliminate an inner side of outer join if:
1. For each record combination of outer tables, it will always produce
exactly one record.
2. There are no references to columns of the inner tables anywhere else in
the query.
#1 means that every table inside the outer join nest is:
- is a constant table:
= because it can be accessed via eq_ref(const) access, or
= it is a zero-rows or one-row MyISAM-like table [MARK1]
- has an eq_ref access method candidate.
#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
GROUP BY and HAVING do not refer to the inner tables of the outer join
nest.
2. Removal operation properties
-------------------------------
* There is always one way to remove (no choice to remove either this or that)
* It is always better to remove as much tables as possible (at least within
our cost model).
Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
3. Removal operation
--------------------
* Remove the outer join nest's nested join structure (i.e. get the
outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
$OJ->embedding->nested_join. Update table_map's of all ancestor nested
joins). [MARK2]
* Move the tables and their JOIN_TABs to front like it is done with const
tables, with exception that if eliminated outer join nest was within
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
* Update join->table_count and all-join-tables bitmap.
* That's it. Nothing else?
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
name: 'table_elimination'.
* With EXPLAIN, there are two options:
- Show removed tables in a way similar to const tables, with some
indication that they are removed.
- Do not show them altogether.
(the second one seems to be better? We're targeting a situation with VIEWs,
where the user would not care about what tables were added into his query
and then discarded from it?)
5. Todo, issues to resolve
--------------------------
5.1 To resolve
~~~~~~~~~~~~~~
- See EXPLAIN question in section #4.
- Re-check how this works with equality propagation.
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
rework operation [MARK2] to be undoable.
I'm leaning towards doing the former. With anchor modeling, it is unlikely
that we'll meet outer joins which have N inner tables of which some are 1-row
MyISAM tables that do not have primary key.
5.2 Resolved
~~~~~~~~~~~~
- outer->inner join conversion is not a problem for table elimination.
We make outer->inner conversions based on predicates in WHERE. If the WHERE
referred to an inner table (requirement for OJ->IJ conversion) then table
elimination would not be applicable anyway.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] Updated (by Guest): Table elimination (17)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table elimination
CREATION DATE..: Sun, 10 May 2009, 19:57
SUPERVISOR.....: Monty
IMPLEMENTOR....: Psergey
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 17 (http://askmonty.org/worklog/?tid=17)
VERSION........: Server-5.1
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30176 2009-05-22 17:00:35.000000000 +0300
+++ /tmp/wklog.17.new.30176 2009-05-22 17:00:35.000000000 +0300
@@ -1 +1 @@
-Maria-2.0
+Server-5.1
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-Maria-Sprint
+Server-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-2.0
+9.x
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-9.x
+Maria-2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-RawIdeaBin
+Maria-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-5.2
+2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Status updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.28468 2009-05-21 22:54:28.000000000 +0300
+++ /tmp/wklog.17.new.28468 2009-05-21 22:54:28.000000000 +0300
@@ -77,7 +77,7 @@
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
- to limit the applicablity by removing [MARK1] as that can change during
+ to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
-=-=(Psergey - Tue, 19 May 2009, 20:20)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27126 2009-05-19 20:20:06.000000000 +0300
+++ /tmp/wklog.17.new.27126 2009-05-19 20:20:06.000000000 +0300
@@ -48,6 +48,10 @@
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
+* Update join->table_count and all-join-tables bitmap.
+
+* That's it. Nothing else?
+
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
-=-=(Psergey - Tue, 19 May 2009, 20:18)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27035 2009-05-19 20:18:27.000000000 +0300
+++ /tmp/wklog.17.new.27035 2009-05-19 20:18:27.000000000 +0300
@@ -1 +1,92 @@
+<contents>
+1. Conditions for removal
+2. Removal operation properties
+3. Removal operation
+4. User interface
+5. Todo, issues to resolve
+5.1 To resolve
+5.2 Resolved
+</contents>
+
+It's not really about elimination of tables, it's about elimination of inner
+sides of outer joins.
+
+1. Conditions for removal
+-------------------------
+We can eliminate an inner side of outer join if:
+1. For each record combination of outer tables, it will always produce
+ exactly one record.
+2. There are no references to columns of the inner tables anywhere else in
+ the query.
+
+#1 means that every table inside the outer join nest is:
+ - is a constant table:
+ = because it can be accessed via eq_ref(const) access, or
+ = it is a zero-rows or one-row MyISAM-like table [MARK1]
+ - has an eq_ref access method candidate.
+
+#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
+ GROUP BY and HAVING do not refer to the inner tables of the outer join
+ nest.
+
+2. Removal operation properties
+-------------------------------
+* There is always one way to remove (no choice to remove either this or that)
+* It is always better to remove as much tables as possible (at least within
+ our cost model).
+Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
+
+3. Removal operation
+--------------------
+* Remove the outer join nest's nested join structure (i.e. get the
+ outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
+ $OJ->embedding->nested_join. Update table_map's of all ancestor nested
+ joins). [MARK2]
+
+* Move the tables and their JOIN_TABs to front like it is done with const
+ tables, with exception that if eliminated outer join nest was within
+ another outer join nest, that shouldn't prevent us from moving away the
+ eliminated tables.
+
+4. User interface
+-----------------
+* We'll add an @@optimizer switch flag for table elimination. Tentative
+ name: 'table_elimination'.
+
+* With EXPLAIN, there are two options:
+ - Show removed tables in a way similar to const tables, with some
+ indication that they are removed.
+ - Do not show them altogether.
+(the second one seems to be better? We're targeting a situation with VIEWs,
+where the user would not care about what tables were added into his query
+and then discarded from it?)
+
+5. Todo, issues to resolve
+--------------------------
+
+5.1 To resolve
+~~~~~~~~~~~~~~
+- See EXPLAIN question in section #4.
+
+- Re-check how this works with equality propagation.
+
+- Relationship with prepared statements.
+ On one hand, it's natural to desire to make table elimination a
+ once-per-statement operation, like outer->inner join conversion. We'll have
+ to limit the applicablity by removing [MARK1] as that can change during
+ lifetime of the statement.
+
+ The other option is to do table elimination every time. This will require to
+ rework operation [MARK2] to be undoable.
+
+ I'm leaning towards doing the former. With anchor modeling, it is unlikely
+ that we'll meet outer joins which have N inner tables of which some are 1-row
+ MyISAM tables that do not have primary key.
+
+5.2 Resolved
+~~~~~~~~~~~~
+- outer->inner join conversion is not a problem for table elimination.
+ We make outer->inner conversions based on predicates in WHERE. If the WHERE
+ referred to an inner table (requirement for OJ->IJ conversion) then table
+ elimination would not be applicable anyway.
------------------------------------------------------------
-=-=(View All Progress Notes, 12 total)=-=-
http://askmonty.org/worklog/index.pl?tid=17&nolimit=1
DESCRIPTION:
Eliminate not needed tables from SELECT queries..
This will speed up some views and automatically generated queries.
Example:
CREATE TABLE B (id int primary key);
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
In this case we can remove table B and the join from the query.
HIGH-LEVEL SPECIFICATION:
Here is an extended explanation of table elimination.
Table elimination is a feature found in some modern query optimizers, of
which Microsoft SQL Server 2005/2008 seems to have the most advanced
implementation. Oracle 11g has also been confirmed to use table
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
execution plan when it is unneccessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
contain a NULL value.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
A
contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
what
the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
as
many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
unneccessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
in
the case described above. Let us look at a more advanced query, where
Oracle fails.
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id
and
B.fromDate = (
select
max(sub.fromDate)
from
tableB sub
where
sub.id = A.id
);
In this example we have added another join condition, which ensures
that we only pick the matching row from tableB having the latest
fromDate. In this case tableB will contain duplicates of the column
B.id, so in order to ensure uniqueness the primary key has to contain
the fromDate column as well. In other words the primary key of tableB
is (B.id, B.fromDate).
Furthermore, since the subselect ensures that we only pick the latest
B.fromDate for a given B.id we know that at most one row will match
the join condition. We will again have the situation where joining
with tableB cannot affect the number of rows in the result set. Since
we do not select any columns from tableB, the whole join operation can
be eliminated from the execution plan.
SQL Server 2005/2008 will deploy table elimination in this situation as
well. We have not found a way to make Oracle 11g use it for this type of
query. Queries like these arise in two situations. Either when you have
denormalized model consisting of a fact table with several related
dimension tables, or when you have a highly normalized model where each
attribute is stored in its own table. The example with the subselect is
common whenever you store historized/versioned data.
LOW-LEVEL DESIGN:
<contents>
1. Conditions for removal
2. Removal operation properties
3. Removal operation
4. User interface
5. Todo, issues to resolve
5.1 To resolve
5.2 Resolved
</contents>
It's not really about elimination of tables, it's about elimination of inner
sides of outer joins.
1. Conditions for removal
-------------------------
We can eliminate an inner side of outer join if:
1. For each record combination of outer tables, it will always produce
exactly one record.
2. There are no references to columns of the inner tables anywhere else in
the query.
#1 means that every table inside the outer join nest is:
- is a constant table:
= because it can be accessed via eq_ref(const) access, or
= it is a zero-rows or one-row MyISAM-like table [MARK1]
- has an eq_ref access method candidate.
#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
GROUP BY and HAVING do not refer to the inner tables of the outer join
nest.
2. Removal operation properties
-------------------------------
* There is always one way to remove (no choice to remove either this or that)
* It is always better to remove as much tables as possible (at least within
our cost model).
Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
3. Removal operation
--------------------
* Remove the outer join nest's nested join structure (i.e. get the
outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
$OJ->embedding->nested_join. Update table_map's of all ancestor nested
joins). [MARK2]
* Move the tables and their JOIN_TABs to front like it is done with const
tables, with exception that if eliminated outer join nest was within
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
* Update join->table_count and all-join-tables bitmap.
* That's it. Nothing else?
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
name: 'table_elimination'.
* With EXPLAIN, there are two options:
- Show removed tables in a way similar to const tables, with some
indication that they are removed.
- Do not show them altogether.
(the second one seems to be better? We're targeting a situation with VIEWs,
where the user would not care about what tables were added into his query
and then discarded from it?)
5. Todo, issues to resolve
--------------------------
5.1 To resolve
~~~~~~~~~~~~~~
- See EXPLAIN question in section #4.
- Re-check how this works with equality propagation.
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
rework operation [MARK2] to be undoable.
I'm leaning towards doing the former. With anchor modeling, it is unlikely
that we'll meet outer joins which have N inner tables of which some are 1-row
MyISAM tables that do not have primary key.
5.2 Resolved
~~~~~~~~~~~~
- outer->inner join conversion is not a problem for table elimination.
We make outer->inner conversions based on predicates in WHERE. If the WHERE
referred to an inner table (requirement for OJ->IJ conversion) then table
elimination would not be applicable anyway.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] Updated (by Guest): Table elimination (17)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table elimination
CREATION DATE..: Sun, 10 May 2009, 19:57
SUPERVISOR.....: Monty
IMPLEMENTOR....: Psergey
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 17 (http://askmonty.org/worklog/?tid=17)
VERSION........: Maria-2.0
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-Maria-Sprint
+Server-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-2.0
+9.x
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-9.x
+Maria-2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-RawIdeaBin
+Maria-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-5.2
+2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Status updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.28468 2009-05-21 22:54:28.000000000 +0300
+++ /tmp/wklog.17.new.28468 2009-05-21 22:54:28.000000000 +0300
@@ -77,7 +77,7 @@
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
- to limit the applicablity by removing [MARK1] as that can change during
+ to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
-=-=(Psergey - Tue, 19 May 2009, 20:20)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27126 2009-05-19 20:20:06.000000000 +0300
+++ /tmp/wklog.17.new.27126 2009-05-19 20:20:06.000000000 +0300
@@ -48,6 +48,10 @@
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
+* Update join->table_count and all-join-tables bitmap.
+
+* That's it. Nothing else?
+
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
-=-=(Psergey - Tue, 19 May 2009, 20:18)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27035 2009-05-19 20:18:27.000000000 +0300
+++ /tmp/wklog.17.new.27035 2009-05-19 20:18:27.000000000 +0300
@@ -1 +1,92 @@
+<contents>
+1. Conditions for removal
+2. Removal operation properties
+3. Removal operation
+4. User interface
+5. Todo, issues to resolve
+5.1 To resolve
+5.2 Resolved
+</contents>
+
+It's not really about elimination of tables, it's about elimination of inner
+sides of outer joins.
+
+1. Conditions for removal
+-------------------------
+We can eliminate an inner side of outer join if:
+1. For each record combination of outer tables, it will always produce
+ exactly one record.
+2. There are no references to columns of the inner tables anywhere else in
+ the query.
+
+#1 means that every table inside the outer join nest is:
+ - is a constant table:
+ = because it can be accessed via eq_ref(const) access, or
+ = it is a zero-rows or one-row MyISAM-like table [MARK1]
+ - has an eq_ref access method candidate.
+
+#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
+ GROUP BY and HAVING do not refer to the inner tables of the outer join
+ nest.
+
+2. Removal operation properties
+-------------------------------
+* There is always one way to remove (no choice to remove either this or that)
+* It is always better to remove as much tables as possible (at least within
+ our cost model).
+Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
+
+3. Removal operation
+--------------------
+* Remove the outer join nest's nested join structure (i.e. get the
+ outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
+ $OJ->embedding->nested_join. Update table_map's of all ancestor nested
+ joins). [MARK2]
+
+* Move the tables and their JOIN_TABs to front like it is done with const
+ tables, with exception that if eliminated outer join nest was within
+ another outer join nest, that shouldn't prevent us from moving away the
+ eliminated tables.
+
+4. User interface
+-----------------
+* We'll add an @@optimizer switch flag for table elimination. Tentative
+ name: 'table_elimination'.
+
+* With EXPLAIN, there are two options:
+ - Show removed tables in a way similar to const tables, with some
+ indication that they are removed.
+ - Do not show them altogether.
+(the second one seems to be better? We're targeting a situation with VIEWs,
+where the user would not care about what tables were added into his query
+and then discarded from it?)
+
+5. Todo, issues to resolve
+--------------------------
+
+5.1 To resolve
+~~~~~~~~~~~~~~
+- See EXPLAIN question in section #4.
+
+- Re-check how this works with equality propagation.
+
+- Relationship with prepared statements.
+ On one hand, it's natural to desire to make table elimination a
+ once-per-statement operation, like outer->inner join conversion. We'll have
+ to limit the applicablity by removing [MARK1] as that can change during
+ lifetime of the statement.
+
+ The other option is to do table elimination every time. This will require to
+ rework operation [MARK2] to be undoable.
+
+ I'm leaning towards doing the former. With anchor modeling, it is unlikely
+ that we'll meet outer joins which have N inner tables of which some are 1-row
+ MyISAM tables that do not have primary key.
+
+5.2 Resolved
+~~~~~~~~~~~~
+- outer->inner join conversion is not a problem for table elimination.
+ We make outer->inner conversions based on predicates in WHERE. If the WHERE
+ referred to an inner table (requirement for OJ->IJ conversion) then table
+ elimination would not be applicable anyway.
-=-=(Guest - Tue, 19 May 2009, 18:27)=-=-
Title modified.
--- /tmp/wklog.17.old.22023 2009-05-19 18:27:41.000000000 +0300
+++ /tmp/wklog.17.new.22023 2009-05-19 18:27:41.000000000 +0300
@@ -1 +1 @@
-Table ellimination
+Table elimination
------------------------------------------------------------
-=-=(View All Progress Notes, 11 total)=-=-
http://askmonty.org/worklog/index.pl?tid=17&nolimit=1
DESCRIPTION:
Eliminate not needed tables from SELECT queries..
This will speed up some views and automatically generated queries.
Example:
CREATE TABLE B (id int primary key);
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
In this case we can remove table B and the join from the query.
HIGH-LEVEL SPECIFICATION:
Here is an extended explanation of table elimination.
Table elimination is a feature found in some modern query optimizers, of
which Microsoft SQL Server 2005/2008 seems to have the most advanced
implementation. Oracle 11g has also been confirmed to use table
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
execution plan when it is unneccessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
contain a NULL value.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
A
contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
what
the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
as
many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
unneccessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
in
the case described above. Let us look at a more advanced query, where
Oracle fails.
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id
and
B.fromDate = (
select
max(sub.fromDate)
from
tableB sub
where
sub.id = A.id
);
In this example we have added another join condition, which ensures
that we only pick the matching row from tableB having the latest
fromDate. In this case tableB will contain duplicates of the column
B.id, so in order to ensure uniqueness the primary key has to contain
the fromDate column as well. In other words the primary key of tableB
is (B.id, B.fromDate).
Furthermore, since the subselect ensures that we only pick the latest
B.fromDate for a given B.id we know that at most one row will match
the join condition. We will again have the situation where joining
with tableB cannot affect the number of rows in the result set. Since
we do not select any columns from tableB, the whole join operation can
be eliminated from the execution plan.
SQL Server 2005/2008 will deploy table elimination in this situation as
well. We have not found a way to make Oracle 11g use it for this type of
query. Queries like these arise in two situations. Either when you have
denormalized model consisting of a fact table with several related
dimension tables, or when you have a highly normalized model where each
attribute is stored in its own table. The example with the subselect is
common whenever you store historized/versioned data.
LOW-LEVEL DESIGN:
<contents>
1. Conditions for removal
2. Removal operation properties
3. Removal operation
4. User interface
5. Todo, issues to resolve
5.1 To resolve
5.2 Resolved
</contents>
It's not really about elimination of tables, it's about elimination of inner
sides of outer joins.
1. Conditions for removal
-------------------------
We can eliminate an inner side of outer join if:
1. For each record combination of outer tables, it will always produce
exactly one record.
2. There are no references to columns of the inner tables anywhere else in
the query.
#1 means that every table inside the outer join nest is:
- is a constant table:
= because it can be accessed via eq_ref(const) access, or
= it is a zero-rows or one-row MyISAM-like table [MARK1]
- has an eq_ref access method candidate.
#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
GROUP BY and HAVING do not refer to the inner tables of the outer join
nest.
2. Removal operation properties
-------------------------------
* There is always one way to remove (no choice to remove either this or that)
* It is always better to remove as much tables as possible (at least within
our cost model).
Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
3. Removal operation
--------------------
* Remove the outer join nest's nested join structure (i.e. get the
outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
$OJ->embedding->nested_join. Update table_map's of all ancestor nested
joins). [MARK2]
* Move the tables and their JOIN_TABs to front like it is done with const
tables, with exception that if eliminated outer join nest was within
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
* Update join->table_count and all-join-tables bitmap.
* That's it. Nothing else?
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
name: 'table_elimination'.
* With EXPLAIN, there are two options:
- Show removed tables in a way similar to const tables, with some
indication that they are removed.
- Do not show them altogether.
(the second one seems to be better? We're targeting a situation with VIEWs,
where the user would not care about what tables were added into his query
and then discarded from it?)
5. Todo, issues to resolve
--------------------------
5.1 To resolve
~~~~~~~~~~~~~~
- See EXPLAIN question in section #4.
- Re-check how this works with equality propagation.
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
rework operation [MARK2] to be undoable.
I'm leaning towards doing the former. With anchor modeling, it is unlikely
that we'll meet outer joins which have N inner tables of which some are 1-row
MyISAM tables that do not have primary key.
5.2 Resolved
~~~~~~~~~~~~
- outer->inner join conversion is not a problem for table elimination.
We make outer->inner conversions based on predicates in WHERE. If the WHERE
referred to an inner table (requirement for OJ->IJ conversion) then table
elimination would not be applicable anyway.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] Updated (by Guest): Table elimination (17)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table elimination
CREATION DATE..: Sun, 10 May 2009, 19:57
SUPERVISOR.....: Monty
IMPLEMENTOR....: Psergey
COPIES TO......:
CATEGORY.......: Server-Sprint
TASK ID........: 17 (http://askmonty.org/worklog/?tid=17)
VERSION........: Maria-2.0
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-Maria-Sprint
+Server-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-2.0
+9.x
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30162 2009-05-22 17:00:28.000000000 +0300
+++ /tmp/wklog.17.new.30162 2009-05-22 17:00:28.000000000 +0300
@@ -1 +1 @@
-9.x
+Maria-2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-RawIdeaBin
+Maria-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-5.2
+2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Status updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.28468 2009-05-21 22:54:28.000000000 +0300
+++ /tmp/wklog.17.new.28468 2009-05-21 22:54:28.000000000 +0300
@@ -77,7 +77,7 @@
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
- to limit the applicablity by removing [MARK1] as that can change during
+ to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
-=-=(Psergey - Tue, 19 May 2009, 20:20)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27126 2009-05-19 20:20:06.000000000 +0300
+++ /tmp/wklog.17.new.27126 2009-05-19 20:20:06.000000000 +0300
@@ -48,6 +48,10 @@
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
+* Update join->table_count and all-join-tables bitmap.
+
+* That's it. Nothing else?
+
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
-=-=(Psergey - Tue, 19 May 2009, 20:18)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27035 2009-05-19 20:18:27.000000000 +0300
+++ /tmp/wklog.17.new.27035 2009-05-19 20:18:27.000000000 +0300
@@ -1 +1,92 @@
+<contents>
+1. Conditions for removal
+2. Removal operation properties
+3. Removal operation
+4. User interface
+5. Todo, issues to resolve
+5.1 To resolve
+5.2 Resolved
+</contents>
+
+It's not really about elimination of tables, it's about elimination of inner
+sides of outer joins.
+
+1. Conditions for removal
+-------------------------
+We can eliminate an inner side of outer join if:
+1. For each record combination of outer tables, it will always produce
+ exactly one record.
+2. There are no references to columns of the inner tables anywhere else in
+ the query.
+
+#1 means that every table inside the outer join nest is:
+ - is a constant table:
+ = because it can be accessed via eq_ref(const) access, or
+ = it is a zero-rows or one-row MyISAM-like table [MARK1]
+ - has an eq_ref access method candidate.
+
+#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
+ GROUP BY and HAVING do not refer to the inner tables of the outer join
+ nest.
+
+2. Removal operation properties
+-------------------------------
+* There is always one way to remove (no choice to remove either this or that)
+* It is always better to remove as much tables as possible (at least within
+ our cost model).
+Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
+
+3. Removal operation
+--------------------
+* Remove the outer join nest's nested join structure (i.e. get the
+ outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
+ $OJ->embedding->nested_join. Update table_map's of all ancestor nested
+ joins). [MARK2]
+
+* Move the tables and their JOIN_TABs to front like it is done with const
+ tables, with exception that if eliminated outer join nest was within
+ another outer join nest, that shouldn't prevent us from moving away the
+ eliminated tables.
+
+4. User interface
+-----------------
+* We'll add an @@optimizer switch flag for table elimination. Tentative
+ name: 'table_elimination'.
+
+* With EXPLAIN, there are two options:
+ - Show removed tables in a way similar to const tables, with some
+ indication that they are removed.
+ - Do not show them altogether.
+(the second one seems to be better? We're targeting a situation with VIEWs,
+where the user would not care about what tables were added into his query
+and then discarded from it?)
+
+5. Todo, issues to resolve
+--------------------------
+
+5.1 To resolve
+~~~~~~~~~~~~~~
+- See EXPLAIN question in section #4.
+
+- Re-check how this works with equality propagation.
+
+- Relationship with prepared statements.
+ On one hand, it's natural to desire to make table elimination a
+ once-per-statement operation, like outer->inner join conversion. We'll have
+ to limit the applicablity by removing [MARK1] as that can change during
+ lifetime of the statement.
+
+ The other option is to do table elimination every time. This will require to
+ rework operation [MARK2] to be undoable.
+
+ I'm leaning towards doing the former. With anchor modeling, it is unlikely
+ that we'll meet outer joins which have N inner tables of which some are 1-row
+ MyISAM tables that do not have primary key.
+
+5.2 Resolved
+~~~~~~~~~~~~
+- outer->inner join conversion is not a problem for table elimination.
+ We make outer->inner conversions based on predicates in WHERE. If the WHERE
+ referred to an inner table (requirement for OJ->IJ conversion) then table
+ elimination would not be applicable anyway.
-=-=(Guest - Tue, 19 May 2009, 18:27)=-=-
Title modified.
--- /tmp/wklog.17.old.22023 2009-05-19 18:27:41.000000000 +0300
+++ /tmp/wklog.17.new.22023 2009-05-19 18:27:41.000000000 +0300
@@ -1 +1 @@
-Table ellimination
+Table elimination
------------------------------------------------------------
-=-=(View All Progress Notes, 11 total)=-=-
http://askmonty.org/worklog/index.pl?tid=17&nolimit=1
DESCRIPTION:
Eliminate not needed tables from SELECT queries..
This will speed up some views and automatically generated queries.
Example:
CREATE TABLE B (id int primary key);
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
In this case we can remove table B and the join from the query.
HIGH-LEVEL SPECIFICATION:
Here is an extended explanation of table elimination.
Table elimination is a feature found in some modern query optimizers, of
which Microsoft SQL Server 2005/2008 seems to have the most advanced
implementation. Oracle 11g has also been confirmed to use table
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
execution plan when it is unneccessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
contain a NULL value.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
A
contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
what
the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
as
many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
unneccessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
in
the case described above. Let us look at a more advanced query, where
Oracle fails.
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id
and
B.fromDate = (
select
max(sub.fromDate)
from
tableB sub
where
sub.id = A.id
);
In this example we have added another join condition, which ensures
that we only pick the matching row from tableB having the latest
fromDate. In this case tableB will contain duplicates of the column
B.id, so in order to ensure uniqueness the primary key has to contain
the fromDate column as well. In other words the primary key of tableB
is (B.id, B.fromDate).
Furthermore, since the subselect ensures that we only pick the latest
B.fromDate for a given B.id we know that at most one row will match
the join condition. We will again have the situation where joining
with tableB cannot affect the number of rows in the result set. Since
we do not select any columns from tableB, the whole join operation can
be eliminated from the execution plan.
SQL Server 2005/2008 will deploy table elimination in this situation as
well. We have not found a way to make Oracle 11g use it for this type of
query. Queries like these arise in two situations. Either when you have
denormalized model consisting of a fact table with several related
dimension tables, or when you have a highly normalized model where each
attribute is stored in its own table. The example with the subselect is
common whenever you store historized/versioned data.
LOW-LEVEL DESIGN:
<contents>
1. Conditions for removal
2. Removal operation properties
3. Removal operation
4. User interface
5. Todo, issues to resolve
5.1 To resolve
5.2 Resolved
</contents>
It's not really about elimination of tables, it's about elimination of inner
sides of outer joins.
1. Conditions for removal
-------------------------
We can eliminate an inner side of outer join if:
1. For each record combination of outer tables, it will always produce
exactly one record.
2. There are no references to columns of the inner tables anywhere else in
the query.
#1 means that every table inside the outer join nest is:
- is a constant table:
= because it can be accessed via eq_ref(const) access, or
= it is a zero-rows or one-row MyISAM-like table [MARK1]
- has an eq_ref access method candidate.
#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
GROUP BY and HAVING do not refer to the inner tables of the outer join
nest.
2. Removal operation properties
-------------------------------
* There is always one way to remove (no choice to remove either this or that)
* It is always better to remove as much tables as possible (at least within
our cost model).
Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
3. Removal operation
--------------------
* Remove the outer join nest's nested join structure (i.e. get the
outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
$OJ->embedding->nested_join. Update table_map's of all ancestor nested
joins). [MARK2]
* Move the tables and their JOIN_TABs to front like it is done with const
tables, with exception that if eliminated outer join nest was within
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
* Update join->table_count and all-join-tables bitmap.
* That's it. Nothing else?
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
name: 'table_elimination'.
* With EXPLAIN, there are two options:
- Show removed tables in a way similar to const tables, with some
indication that they are removed.
- Do not show them altogether.
(the second one seems to be better? We're targeting a situation with VIEWs,
where the user would not care about what tables were added into his query
and then discarded from it?)
5. Todo, issues to resolve
--------------------------
5.1 To resolve
~~~~~~~~~~~~~~
- See EXPLAIN question in section #4.
- Re-check how this works with equality propagation.
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
rework operation [MARK2] to be undoable.
I'm leaning towards doing the former. With anchor modeling, it is unlikely
that we'll meet outer joins which have N inner tables of which some are 1-row
MyISAM tables that do not have primary key.
5.2 Resolved
~~~~~~~~~~~~
- outer->inner join conversion is not a problem for table elimination.
We make outer->inner conversions based on predicates in WHERE. If the WHERE
referred to an inner table (requirement for OJ->IJ conversion) then table
elimination would not be applicable anyway.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] Updated (by Guest): Table elimination (17)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table elimination
CREATION DATE..: Sun, 10 May 2009, 19:57
SUPERVISOR.....: Monty
IMPLEMENTOR....: Psergey
COPIES TO......:
CATEGORY.......: Maria-Sprint
TASK ID........: 17 (http://askmonty.org/worklog/?tid=17)
VERSION........: 2.0
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-RawIdeaBin
+Maria-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-5.2
+2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Status updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.28468 2009-05-21 22:54:28.000000000 +0300
+++ /tmp/wklog.17.new.28468 2009-05-21 22:54:28.000000000 +0300
@@ -77,7 +77,7 @@
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
- to limit the applicablity by removing [MARK1] as that can change during
+ to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
-=-=(Psergey - Tue, 19 May 2009, 20:20)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27126 2009-05-19 20:20:06.000000000 +0300
+++ /tmp/wklog.17.new.27126 2009-05-19 20:20:06.000000000 +0300
@@ -48,6 +48,10 @@
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
+* Update join->table_count and all-join-tables bitmap.
+
+* That's it. Nothing else?
+
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
-=-=(Psergey - Tue, 19 May 2009, 20:18)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27035 2009-05-19 20:18:27.000000000 +0300
+++ /tmp/wklog.17.new.27035 2009-05-19 20:18:27.000000000 +0300
@@ -1 +1,92 @@
+<contents>
+1. Conditions for removal
+2. Removal operation properties
+3. Removal operation
+4. User interface
+5. Todo, issues to resolve
+5.1 To resolve
+5.2 Resolved
+</contents>
+
+It's not really about elimination of tables, it's about elimination of inner
+sides of outer joins.
+
+1. Conditions for removal
+-------------------------
+We can eliminate an inner side of outer join if:
+1. For each record combination of outer tables, it will always produce
+ exactly one record.
+2. There are no references to columns of the inner tables anywhere else in
+ the query.
+
+#1 means that every table inside the outer join nest is:
+ - is a constant table:
+ = because it can be accessed via eq_ref(const) access, or
+ = it is a zero-rows or one-row MyISAM-like table [MARK1]
+ - has an eq_ref access method candidate.
+
+#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
+ GROUP BY and HAVING do not refer to the inner tables of the outer join
+ nest.
+
+2. Removal operation properties
+-------------------------------
+* There is always one way to remove (no choice to remove either this or that)
+* It is always better to remove as much tables as possible (at least within
+ our cost model).
+Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
+
+3. Removal operation
+--------------------
+* Remove the outer join nest's nested join structure (i.e. get the
+ outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
+ $OJ->embedding->nested_join. Update table_map's of all ancestor nested
+ joins). [MARK2]
+
+* Move the tables and their JOIN_TABs to front like it is done with const
+ tables, with exception that if eliminated outer join nest was within
+ another outer join nest, that shouldn't prevent us from moving away the
+ eliminated tables.
+
+4. User interface
+-----------------
+* We'll add an @@optimizer switch flag for table elimination. Tentative
+ name: 'table_elimination'.
+
+* With EXPLAIN, there are two options:
+ - Show removed tables in a way similar to const tables, with some
+ indication that they are removed.
+ - Do not show them altogether.
+(the second one seems to be better? We're targeting a situation with VIEWs,
+where the user would not care about what tables were added into his query
+and then discarded from it?)
+
+5. Todo, issues to resolve
+--------------------------
+
+5.1 To resolve
+~~~~~~~~~~~~~~
+- See EXPLAIN question in section #4.
+
+- Re-check how this works with equality propagation.
+
+- Relationship with prepared statements.
+ On one hand, it's natural to desire to make table elimination a
+ once-per-statement operation, like outer->inner join conversion. We'll have
+ to limit the applicablity by removing [MARK1] as that can change during
+ lifetime of the statement.
+
+ The other option is to do table elimination every time. This will require to
+ rework operation [MARK2] to be undoable.
+
+ I'm leaning towards doing the former. With anchor modeling, it is unlikely
+ that we'll meet outer joins which have N inner tables of which some are 1-row
+ MyISAM tables that do not have primary key.
+
+5.2 Resolved
+~~~~~~~~~~~~
+- outer->inner join conversion is not a problem for table elimination.
+ We make outer->inner conversions based on predicates in WHERE. If the WHERE
+ referred to an inner table (requirement for OJ->IJ conversion) then table
+ elimination would not be applicable anyway.
-=-=(Guest - Tue, 19 May 2009, 18:27)=-=-
Title modified.
--- /tmp/wklog.17.old.22023 2009-05-19 18:27:41.000000000 +0300
+++ /tmp/wklog.17.new.22023 2009-05-19 18:27:41.000000000 +0300
@@ -1 +1 @@
-Table ellimination
+Table elimination
-=-=(Monty - Sun, 10 May 2009, 19:58)=-=-
High-Level Specification modified.
--- /tmp/wklog.17.old.5423 2009-05-10 19:58:06.000000000 +0300
+++ /tmp/wklog.17.new.5423 2009-05-10 19:58:06.000000000 +0300
@@ -1 +1,90 @@
+Here is an extended explanation of table elimination.
+
+Table elimination is a feature found in some modern query optimizers, of
+which Microsoft SQL Server 2005/2008 seems to have the most advanced
+implementation. Oracle 11g has also been confirmed to use table
+elimination but not to the same extent.
+
+Basically, what table elimination does, is to remove tables from the
+execution plan when it is unneccessary to include them. This can, of
+course, only happen if the right circumstances arise. Let us for example
+look at the following query:
+
+select
+ A.colA
+from
+ tableA A
+left outer join
+ tableB B
+on
+ B.id = A.id;
+
+When using A as the left table we ensure that the query will return at
+least as many rows as there are in that table. For rows where the join
+condition (B.id = A.id) is not met the selected column (A.colA) will
+contain a NULL value.
+
+However, the result set could actually contain more rows than what is
+found in tableA if there are duplicates of the column B.id in tableB. If
+A
+contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
+then two rows will match in the join condition. The only way to know
+what
+the result will look like is to actually touch both tables during
+execution.
+
+Instead, let's say that tableB contains rows that make it possible to
+place a unique constraint on the column B.id, for example and often the
+case a primary key. In this situation we know that we will get exactly
+as
+many rows as there are in tableA, since joining with tableB cannot
+introduce any duplicates. If further, as in the example query, we do not
+select any columns from tableB, touching that table during execution is
+unneccessary. We can remove the whole join operation from the execution
+plan.
+
+Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
+in
+the case described above. Let us look at a more advanced query, where
+Oracle fails.
+
+select
+ A.colA
+from
+ tableA A
+left outer join
+ tableB B
+on
+ B.id = A.id
+and
+ B.fromDate = (
+ select
+ max(sub.fromDate)
+ from
+ tableB sub
+ where
+ sub.id = A.id
+ );
+
+In this example we have added another join condition, which ensures
+that we only pick the matching row from tableB having the latest
+fromDate. In this case tableB will contain duplicates of the column
+B.id, so in order to ensure uniqueness the primary key has to contain
+the fromDate column as well. In other words the primary key of tableB
+is (B.id, B.fromDate).
+
+Furthermore, since the subselect ensures that we only pick the latest
+B.fromDate for a given B.id we know that at most one row will match
+the join condition. We will again have the situation where joining
+with tableB cannot affect the number of rows in the result set. Since
+we do not select any columns from tableB, the whole join operation can
+be eliminated from the execution plan.
+
+SQL Server 2005/2008 will deploy table elimination in this situation as
+well. We have not found a way to make Oracle 11g use it for this type of
+query. Queries like these arise in two situations. Either when you have
+denormalized model consisting of a fact table with several related
+dimension tables, or when you have a highly normalized model where each
+attribute is stored in its own table. The example with the subselect is
+common whenever you store historized/versioned data.
DESCRIPTION:
Eliminate not needed tables from SELECT queries..
This will speed up some views and automatically generated queries.
Example:
CREATE TABLE B (id int primary key);
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
In this case we can remove table B and the join from the query.
HIGH-LEVEL SPECIFICATION:
Here is an extended explanation of table elimination.
Table elimination is a feature found in some modern query optimizers, of
which Microsoft SQL Server 2005/2008 seems to have the most advanced
implementation. Oracle 11g has also been confirmed to use table
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
execution plan when it is unneccessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
contain a NULL value.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
A
contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
what
the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
as
many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
unneccessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
in
the case described above. Let us look at a more advanced query, where
Oracle fails.
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id
and
B.fromDate = (
select
max(sub.fromDate)
from
tableB sub
where
sub.id = A.id
);
In this example we have added another join condition, which ensures
that we only pick the matching row from tableB having the latest
fromDate. In this case tableB will contain duplicates of the column
B.id, so in order to ensure uniqueness the primary key has to contain
the fromDate column as well. In other words the primary key of tableB
is (B.id, B.fromDate).
Furthermore, since the subselect ensures that we only pick the latest
B.fromDate for a given B.id we know that at most one row will match
the join condition. We will again have the situation where joining
with tableB cannot affect the number of rows in the result set. Since
we do not select any columns from tableB, the whole join operation can
be eliminated from the execution plan.
SQL Server 2005/2008 will deploy table elimination in this situation as
well. We have not found a way to make Oracle 11g use it for this type of
query. Queries like these arise in two situations. Either when you have
denormalized model consisting of a fact table with several related
dimension tables, or when you have a highly normalized model where each
attribute is stored in its own table. The example with the subselect is
common whenever you store historized/versioned data.
LOW-LEVEL DESIGN:
<contents>
1. Conditions for removal
2. Removal operation properties
3. Removal operation
4. User interface
5. Todo, issues to resolve
5.1 To resolve
5.2 Resolved
</contents>
It's not really about elimination of tables, it's about elimination of inner
sides of outer joins.
1. Conditions for removal
-------------------------
We can eliminate an inner side of outer join if:
1. For each record combination of outer tables, it will always produce
exactly one record.
2. There are no references to columns of the inner tables anywhere else in
the query.
#1 means that every table inside the outer join nest is:
- is a constant table:
= because it can be accessed via eq_ref(const) access, or
= it is a zero-rows or one-row MyISAM-like table [MARK1]
- has an eq_ref access method candidate.
#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
GROUP BY and HAVING do not refer to the inner tables of the outer join
nest.
2. Removal operation properties
-------------------------------
* There is always one way to remove (no choice to remove either this or that)
* It is always better to remove as much tables as possible (at least within
our cost model).
Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
3. Removal operation
--------------------
* Remove the outer join nest's nested join structure (i.e. get the
outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
$OJ->embedding->nested_join. Update table_map's of all ancestor nested
joins). [MARK2]
* Move the tables and their JOIN_TABs to front like it is done with const
tables, with exception that if eliminated outer join nest was within
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
* Update join->table_count and all-join-tables bitmap.
* That's it. Nothing else?
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
name: 'table_elimination'.
* With EXPLAIN, there are two options:
- Show removed tables in a way similar to const tables, with some
indication that they are removed.
- Do not show them altogether.
(the second one seems to be better? We're targeting a situation with VIEWs,
where the user would not care about what tables were added into his query
and then discarded from it?)
5. Todo, issues to resolve
--------------------------
5.1 To resolve
~~~~~~~~~~~~~~
- See EXPLAIN question in section #4.
- Re-check how this works with equality propagation.
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
rework operation [MARK2] to be undoable.
I'm leaning towards doing the former. With anchor modeling, it is unlikely
that we'll meet outer joins which have N inner tables of which some are 1-row
MyISAM tables that do not have primary key.
5.2 Resolved
~~~~~~~~~~~~
- outer->inner join conversion is not a problem for table elimination.
We make outer->inner conversions based on predicates in WHERE. If the WHERE
referred to an inner table (requirement for OJ->IJ conversion) then table
elimination would not be applicable anyway.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] Updated (by Guest): Table elimination (17)
by worklog-noreply@askmonty.org 22 May '09
by worklog-noreply@askmonty.org 22 May '09
22 May '09
-----------------------------------------------------------------------
WORKLOG TASK
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
TASK...........: Table elimination
CREATION DATE..: Sun, 10 May 2009, 19:57
SUPERVISOR.....: Monty
IMPLEMENTOR....: Psergey
COPIES TO......:
CATEGORY.......: Maria-Sprint
TASK ID........: 17 (http://askmonty.org/worklog/?tid=17)
VERSION........: 2.0
STATUS.........: Assigned
PRIORITY.......: 60
WORKED HOURS...: 0
ESTIMATE.......: 0 (hours remain)
ORIG. ESTIMATE.: 0
PROGRESS NOTES:
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Category updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-RawIdeaBin
+Maria-Sprint
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Version updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Server-5.2
+2.0
-=-=(Guest - Fri, 22 May 2009, 17:00)=-=-
Status updated.
--- /tmp/wklog.17.old.30122 2009-05-22 17:00:02.000000000 +0300
+++ /tmp/wklog.17.new.30122 2009-05-22 17:00:02.000000000 +0300
@@ -1 +1 @@
-Un-Assigned
+Assigned
-=-=(Guest - Thu, 21 May 2009, 22:54)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.28468 2009-05-21 22:54:28.000000000 +0300
+++ /tmp/wklog.17.new.28468 2009-05-21 22:54:28.000000000 +0300
@@ -77,7 +77,7 @@
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
- to limit the applicablity by removing [MARK1] as that can change during
+ to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
-=-=(Psergey - Tue, 19 May 2009, 20:20)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27126 2009-05-19 20:20:06.000000000 +0300
+++ /tmp/wklog.17.new.27126 2009-05-19 20:20:06.000000000 +0300
@@ -48,6 +48,10 @@
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
+* Update join->table_count and all-join-tables bitmap.
+
+* That's it. Nothing else?
+
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
-=-=(Psergey - Tue, 19 May 2009, 20:18)=-=-
Low Level Design modified.
--- /tmp/wklog.17.old.27035 2009-05-19 20:18:27.000000000 +0300
+++ /tmp/wklog.17.new.27035 2009-05-19 20:18:27.000000000 +0300
@@ -1 +1,92 @@
+<contents>
+1. Conditions for removal
+2. Removal operation properties
+3. Removal operation
+4. User interface
+5. Todo, issues to resolve
+5.1 To resolve
+5.2 Resolved
+</contents>
+
+It's not really about elimination of tables, it's about elimination of inner
+sides of outer joins.
+
+1. Conditions for removal
+-------------------------
+We can eliminate an inner side of outer join if:
+1. For each record combination of outer tables, it will always produce
+ exactly one record.
+2. There are no references to columns of the inner tables anywhere else in
+ the query.
+
+#1 means that every table inside the outer join nest is:
+ - is a constant table:
+ = because it can be accessed via eq_ref(const) access, or
+ = it is a zero-rows or one-row MyISAM-like table [MARK1]
+ - has an eq_ref access method candidate.
+
+#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
+ GROUP BY and HAVING do not refer to the inner tables of the outer join
+ nest.
+
+2. Removal operation properties
+-------------------------------
+* There is always one way to remove (no choice to remove either this or that)
+* It is always better to remove as much tables as possible (at least within
+ our cost model).
+Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
+
+3. Removal operation
+--------------------
+* Remove the outer join nest's nested join structure (i.e. get the
+ outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
+ $OJ->embedding->nested_join. Update table_map's of all ancestor nested
+ joins). [MARK2]
+
+* Move the tables and their JOIN_TABs to front like it is done with const
+ tables, with exception that if eliminated outer join nest was within
+ another outer join nest, that shouldn't prevent us from moving away the
+ eliminated tables.
+
+4. User interface
+-----------------
+* We'll add an @@optimizer switch flag for table elimination. Tentative
+ name: 'table_elimination'.
+
+* With EXPLAIN, there are two options:
+ - Show removed tables in a way similar to const tables, with some
+ indication that they are removed.
+ - Do not show them altogether.
+(the second one seems to be better? We're targeting a situation with VIEWs,
+where the user would not care about what tables were added into his query
+and then discarded from it?)
+
+5. Todo, issues to resolve
+--------------------------
+
+5.1 To resolve
+~~~~~~~~~~~~~~
+- See EXPLAIN question in section #4.
+
+- Re-check how this works with equality propagation.
+
+- Relationship with prepared statements.
+ On one hand, it's natural to desire to make table elimination a
+ once-per-statement operation, like outer->inner join conversion. We'll have
+ to limit the applicablity by removing [MARK1] as that can change during
+ lifetime of the statement.
+
+ The other option is to do table elimination every time. This will require to
+ rework operation [MARK2] to be undoable.
+
+ I'm leaning towards doing the former. With anchor modeling, it is unlikely
+ that we'll meet outer joins which have N inner tables of which some are 1-row
+ MyISAM tables that do not have primary key.
+
+5.2 Resolved
+~~~~~~~~~~~~
+- outer->inner join conversion is not a problem for table elimination.
+ We make outer->inner conversions based on predicates in WHERE. If the WHERE
+ referred to an inner table (requirement for OJ->IJ conversion) then table
+ elimination would not be applicable anyway.
-=-=(Guest - Tue, 19 May 2009, 18:27)=-=-
Title modified.
--- /tmp/wklog.17.old.22023 2009-05-19 18:27:41.000000000 +0300
+++ /tmp/wklog.17.new.22023 2009-05-19 18:27:41.000000000 +0300
@@ -1 +1 @@
-Table ellimination
+Table elimination
-=-=(Monty - Sun, 10 May 2009, 19:58)=-=-
High-Level Specification modified.
--- /tmp/wklog.17.old.5423 2009-05-10 19:58:06.000000000 +0300
+++ /tmp/wklog.17.new.5423 2009-05-10 19:58:06.000000000 +0300
@@ -1 +1,90 @@
+Here is an extended explanation of table elimination.
+
+Table elimination is a feature found in some modern query optimizers, of
+which Microsoft SQL Server 2005/2008 seems to have the most advanced
+implementation. Oracle 11g has also been confirmed to use table
+elimination but not to the same extent.
+
+Basically, what table elimination does, is to remove tables from the
+execution plan when it is unneccessary to include them. This can, of
+course, only happen if the right circumstances arise. Let us for example
+look at the following query:
+
+select
+ A.colA
+from
+ tableA A
+left outer join
+ tableB B
+on
+ B.id = A.id;
+
+When using A as the left table we ensure that the query will return at
+least as many rows as there are in that table. For rows where the join
+condition (B.id = A.id) is not met the selected column (A.colA) will
+contain a NULL value.
+
+However, the result set could actually contain more rows than what is
+found in tableA if there are duplicates of the column B.id in tableB. If
+A
+contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
+then two rows will match in the join condition. The only way to know
+what
+the result will look like is to actually touch both tables during
+execution.
+
+Instead, let's say that tableB contains rows that make it possible to
+place a unique constraint on the column B.id, for example and often the
+case a primary key. In this situation we know that we will get exactly
+as
+many rows as there are in tableA, since joining with tableB cannot
+introduce any duplicates. If further, as in the example query, we do not
+select any columns from tableB, touching that table during execution is
+unneccessary. We can remove the whole join operation from the execution
+plan.
+
+Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
+in
+the case described above. Let us look at a more advanced query, where
+Oracle fails.
+
+select
+ A.colA
+from
+ tableA A
+left outer join
+ tableB B
+on
+ B.id = A.id
+and
+ B.fromDate = (
+ select
+ max(sub.fromDate)
+ from
+ tableB sub
+ where
+ sub.id = A.id
+ );
+
+In this example we have added another join condition, which ensures
+that we only pick the matching row from tableB having the latest
+fromDate. In this case tableB will contain duplicates of the column
+B.id, so in order to ensure uniqueness the primary key has to contain
+the fromDate column as well. In other words the primary key of tableB
+is (B.id, B.fromDate).
+
+Furthermore, since the subselect ensures that we only pick the latest
+B.fromDate for a given B.id we know that at most one row will match
+the join condition. We will again have the situation where joining
+with tableB cannot affect the number of rows in the result set. Since
+we do not select any columns from tableB, the whole join operation can
+be eliminated from the execution plan.
+
+SQL Server 2005/2008 will deploy table elimination in this situation as
+well. We have not found a way to make Oracle 11g use it for this type of
+query. Queries like these arise in two situations. Either when you have
+denormalized model consisting of a fact table with several related
+dimension tables, or when you have a highly normalized model where each
+attribute is stored in its own table. The example with the subselect is
+common whenever you store historized/versioned data.
DESCRIPTION:
Eliminate not needed tables from SELECT queries..
This will speed up some views and automatically generated queries.
Example:
CREATE TABLE B (id int primary key);
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
In this case we can remove table B and the join from the query.
HIGH-LEVEL SPECIFICATION:
Here is an extended explanation of table elimination.
Table elimination is a feature found in some modern query optimizers, of
which Microsoft SQL Server 2005/2008 seems to have the most advanced
implementation. Oracle 11g has also been confirmed to use table
elimination but not to the same extent.
Basically, what table elimination does, is to remove tables from the
execution plan when it is unneccessary to include them. This can, of
course, only happen if the right circumstances arise. Let us for example
look at the following query:
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id;
When using A as the left table we ensure that the query will return at
least as many rows as there are in that table. For rows where the join
condition (B.id = A.id) is not met the selected column (A.colA) will
contain a NULL value.
However, the result set could actually contain more rows than what is
found in tableA if there are duplicates of the column B.id in tableB. If
A
contains a row [1, "val1"] and B the rows [1, "other1a"],[1, "other1b"]
then two rows will match in the join condition. The only way to know
what
the result will look like is to actually touch both tables during
execution.
Instead, let's say that tableB contains rows that make it possible to
place a unique constraint on the column B.id, for example and often the
case a primary key. In this situation we know that we will get exactly
as
many rows as there are in tableA, since joining with tableB cannot
introduce any duplicates. If further, as in the example query, we do not
select any columns from tableB, touching that table during execution is
unneccessary. We can remove the whole join operation from the execution
plan.
Both SQL Server 2005/2008 and Oracle 11g will deploy table elimination
in
the case described above. Let us look at a more advanced query, where
Oracle fails.
select
A.colA
from
tableA A
left outer join
tableB B
on
B.id = A.id
and
B.fromDate = (
select
max(sub.fromDate)
from
tableB sub
where
sub.id = A.id
);
In this example we have added another join condition, which ensures
that we only pick the matching row from tableB having the latest
fromDate. In this case tableB will contain duplicates of the column
B.id, so in order to ensure uniqueness the primary key has to contain
the fromDate column as well. In other words the primary key of tableB
is (B.id, B.fromDate).
Furthermore, since the subselect ensures that we only pick the latest
B.fromDate for a given B.id we know that at most one row will match
the join condition. We will again have the situation where joining
with tableB cannot affect the number of rows in the result set. Since
we do not select any columns from tableB, the whole join operation can
be eliminated from the execution plan.
SQL Server 2005/2008 will deploy table elimination in this situation as
well. We have not found a way to make Oracle 11g use it for this type of
query. Queries like these arise in two situations. Either when you have
denormalized model consisting of a fact table with several related
dimension tables, or when you have a highly normalized model where each
attribute is stored in its own table. The example with the subselect is
common whenever you store historized/versioned data.
LOW-LEVEL DESIGN:
<contents>
1. Conditions for removal
2. Removal operation properties
3. Removal operation
4. User interface
5. Todo, issues to resolve
5.1 To resolve
5.2 Resolved
</contents>
It's not really about elimination of tables, it's about elimination of inner
sides of outer joins.
1. Conditions for removal
-------------------------
We can eliminate an inner side of outer join if:
1. For each record combination of outer tables, it will always produce
exactly one record.
2. There are no references to columns of the inner tables anywhere else in
the query.
#1 means that every table inside the outer join nest is:
- is a constant table:
= because it can be accessed via eq_ref(const) access, or
= it is a zero-rows or one-row MyISAM-like table [MARK1]
- has an eq_ref access method candidate.
#2 means that WHERE clause, ON clauses of embedding outer joins, ORDER BY,
GROUP BY and HAVING do not refer to the inner tables of the outer join
nest.
2. Removal operation properties
-------------------------------
* There is always one way to remove (no choice to remove either this or that)
* It is always better to remove as much tables as possible (at least within
our cost model).
Thus, no need for any cost calculations/etc. It's an unconditional rewrite.
3. Removal operation
--------------------
* Remove the outer join nest's nested join structure (i.e. get the
outer join's TABLE_LIST object $OJ and remove it from $OJ->embedding,
$OJ->embedding->nested_join. Update table_map's of all ancestor nested
joins). [MARK2]
* Move the tables and their JOIN_TABs to front like it is done with const
tables, with exception that if eliminated outer join nest was within
another outer join nest, that shouldn't prevent us from moving away the
eliminated tables.
* Update join->table_count and all-join-tables bitmap.
* That's it. Nothing else?
4. User interface
-----------------
* We'll add an @@optimizer switch flag for table elimination. Tentative
name: 'table_elimination'.
* With EXPLAIN, there are two options:
- Show removed tables in a way similar to const tables, with some
indication that they are removed.
- Do not show them altogether.
(the second one seems to be better? We're targeting a situation with VIEWs,
where the user would not care about what tables were added into his query
and then discarded from it?)
5. Todo, issues to resolve
--------------------------
5.1 To resolve
~~~~~~~~~~~~~~
- See EXPLAIN question in section #4.
- Re-check how this works with equality propagation.
- Relationship with prepared statements.
On one hand, it's natural to desire to make table elimination a
once-per-statement operation, like outer->inner join conversion. We'll have
to limit the applicability by removing [MARK1] as that can change during
lifetime of the statement.
The other option is to do table elimination every time. This will require to
rework operation [MARK2] to be undoable.
I'm leaning towards doing the former. With anchor modeling, it is unlikely
that we'll meet outer joins which have N inner tables of which some are 1-row
MyISAM tables that do not have primary key.
5.2 Resolved
~~~~~~~~~~~~
- outer->inner join conversion is not a problem for table elimination.
We make outer->inner conversions based on predicates in WHERE. If the WHERE
referred to an inner table (requirement for OJ->IJ conversion) then table
elimination would not be applicable anyway.
ESTIMATED WORK TIME
ESTIMATED COMPLETION DATE
-----------------------------------------------------------------------
WorkLog (v3.5.9)
1
0
[Maria-developers] bzr commit into MariaDB 5.1, with Maria 1.5:maria branch (knielsen:2704)
by knielsen@knielsen-hq.org 22 May '09
by knielsen@knielsen-hq.org 22 May '09
22 May '09
#At lp:maria
2704 knielsen(a)knielsen-hq.org 2009-05-22
After-merge fixes for problems seen in buildbot after merging MySQL-5.1.35.
- Version number.
- Valgrind false alarms in libz.
- New variant of suppression for Valgrind warning in dlclose().
- Fix double free() in plugin init error case.
modified:
configure.in
include/my_sys.h
mysql-test/valgrind.supp
mysys/my_compress.c
sql/handler.cc
sql/item_strfunc.cc
storage/archive/azio.c
per-file messages:
configure.in
Fix version number. We should reset the maria variant back to `1' when the MySQL version
number increases.
include/my_sys.h
Fix false alarms in Valgrind for zlib.
Apply same fix as for archive storage handler also to the cases of compression in the
client protocol, and to the compression SQL function.
mysql-test/valgrind.supp
A new variant of the dlclose() suppression is needed now.
mysys/my_compress.c
Fix false alarms in Valgrind for zlib.
Apply same fix as for archive storage handler also to the cases of compression in the
client protocol, and to the compression SQL function.
sql/handler.cc
Fix a double free() in error case for plugin initialisation.
sql/item_strfunc.cc
Fix false alarms in Valgrind for zlib.
Apply same fix as for archive storage handler also to the cases of compression in the
client protocol, and to the compression SQL function.
=== modified file 'configure.in'
--- a/configure.in 2009-04-25 10:05:32 +0000
+++ b/configure.in 2009-05-22 12:38:50 +0000
@@ -9,8 +9,11 @@ AC_CANONICAL_SYSTEM
# remember to also update version.c in ndb
#
# When changing major version number please also check switch statement
-# in mysqlbinlog::check_master_version().
-AM_INIT_AUTOMAKE(mysql, 5.1.35-maria-beta2)
+# in mysqlbinlog.cc / check_master_version().
+#
+# When merging new MySQL releases, update the version number to match the
+# MySQL version number, but reset the maria subrelease (-beta1).
+AM_INIT_AUTOMAKE(mysql, 5.1.35-maria-beta1)
AM_CONFIG_HEADER([include/config.h:config.h.in])
PROTOCOL_VERSION=10
=== modified file 'include/my_sys.h'
--- a/include/my_sys.h 2009-04-25 10:05:32 +0000
+++ b/include/my_sys.h 2009-05-22 12:38:50 +0000
@@ -879,6 +879,10 @@ extern my_bool my_compress(uchar *, size
extern my_bool my_uncompress(uchar *, size_t , size_t *);
extern uchar *my_compress_alloc(const uchar *packet, size_t *len,
size_t *complen);
+extern void *my_az_allocator(void *dummy, unsigned int items, unsigned int size);
+extern void my_az_free(void *dummy, void *address);
+extern int my_compress_buffer(uchar *dest, size_t *destLen,
+ const uchar *source, size_t sourceLen);
extern int packfrm(uchar *, size_t, uchar **, size_t *);
extern int unpackfrm(uchar **, size_t *, const uchar *);
=== modified file 'mysql-test/valgrind.supp'
--- a/mysql-test/valgrind.supp 2009-04-08 16:55:26 +0000
+++ b/mysql-test/valgrind.supp 2009-05-22 12:38:50 +0000
@@ -401,6 +401,20 @@
}
{
+ dlclose memory loss from plugin variant 3
+ Memcheck:Leak
+ fun:malloc
+ fun:_dl_scope_free
+ fun:_dl_close_worker
+ fun:_dl_close
+ fun:_dl_catch_error
+ fun:_dlerror_run
+ fun:dlclose
+ fun:_Z15free_plugin_memP12st_plugin_dl
+ fun:_Z13plugin_dl_delPK19st_mysql_lex_string
+}
+
+{
dlopen / ptread_cancel_init memory loss on Suse Linux 10.3 32/64 bit
Memcheck:Leak
fun:*alloc
=== modified file 'mysys/my_compress.c'
--- a/mysys/my_compress.c 2009-04-25 10:05:32 +0000
+++ b/mysys/my_compress.c 2009-05-22 12:38:50 +0000
@@ -57,19 +57,85 @@ my_bool my_compress(uchar *packet, size_
}
+/*
+ Valgrind normally gives false alarms for zlib operations, in the form of
+ "conditional jump depends on uninitialised values" etc. The reason is
+ explained in the zlib FAQ (http://www.zlib.net/zlib_faq.html#faq36)
+
+ "That is intentional for performance reasons, and the output of deflate
+ is not affected."
+
+ Also discussed on a blog
+ (http://www.sirena.org.uk/log/2006/02/19/zlib-generating-valgrind-warnings/)
+
+ "...loop unrolling in the zlib library causes the mentioned
+ “Conditional jump or move depends on uninitialised value(s)”
+ warnings. These are safe since the results of the comparison are
+ subsequently ignored..."
+
+ "the results of the calculations are discarded by bounds checking done
+ after the loop exits"
+
+ Fix by initializing the memory allocated by zlib when running under Valgrind.
+
+ This fix is safe, since such memory is only used internally by zlib, so we
+ will not hide any bugs in mysql this way.
+*/
+void *my_az_allocator(void *dummy, unsigned int items, unsigned int size)
+{
+ return my_malloc((size_t)items*(size_t)size, IF_VALGRIND(MY_ZEROFILL, MYF(0)));
+}
+
+void my_az_free(void *dummy, void *address)
+{
+ my_free(address, MYF(MY_ALLOW_ZERO_PTR));
+}
+
+/*
+ This works like zlib compress(), but using custom memory allocators to work
+ better with my_malloc leak detection and Valgrind.
+*/
+int my_compress_buffer(uchar *dest, size_t *destLen,
+ const uchar *source, size_t sourceLen)
+{
+ z_stream stream;
+ int err;
+
+ stream.next_in = (Bytef*)source;
+ stream.avail_in = (uInt)sourceLen;
+ stream.next_out = (Bytef*)dest;
+ stream.avail_out = (uInt)*destLen;
+ if ((size_t)stream.avail_out != *destLen)
+ return Z_BUF_ERROR;
+
+ stream.zalloc = (alloc_func)my_az_allocator;
+ stream.zfree = (free_func)my_az_free;
+ stream.opaque = (voidpf)0;
+
+ err = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
+ if (err != Z_OK) return err;
+
+ err = deflate(&stream, Z_FINISH);
+ if (err != Z_STREAM_END) {
+ deflateEnd(&stream);
+ return err == Z_OK ? Z_BUF_ERROR : err;
+ }
+ *destLen = stream.total_out;
+
+ err = deflateEnd(&stream);
+ return err;
+}
+
uchar *my_compress_alloc(const uchar *packet, size_t *len, size_t *complen)
{
uchar *compbuf;
- uLongf tmp_complen;
int res;
*complen= *len * 120 / 100 + 12;
if (!(compbuf= (uchar *) my_malloc(*complen, MYF(MY_WME))))
return 0; /* Not enough memory */
- tmp_complen= (uLongf) *complen;
- res= compress((Bytef*) compbuf, &tmp_complen, (Bytef*) packet, (uLong) *len);
- *complen= tmp_complen;
+ res= my_compress_buffer(compbuf, complen, packet, *len);
if (res != Z_OK)
{
=== modified file 'sql/handler.cc'
--- a/sql/handler.cc 2009-04-25 10:05:32 +0000
+++ b/sql/handler.cc 2009-05-22 12:38:50 +0000
@@ -432,9 +432,6 @@ int ha_initialize_handlerton(st_plugin_i
{
sql_print_error("Plugin '%s' init function returned error.",
plugin->name.str);
- /* Free data, so that we don't refer to it in ha_finalize_handlerton */
- my_free(hton, MYF(0));
- plugin->data= 0;
goto err;
}
=== modified file 'sql/item_strfunc.cc'
--- a/sql/item_strfunc.cc 2009-04-25 10:05:32 +0000
+++ b/sql/item_strfunc.cc 2009-05-22 12:38:50 +0000
@@ -3243,7 +3243,7 @@ longlong Item_func_crc32::val_int()
String *Item_func_compress::val_str(String *str)
{
int err= Z_OK, code;
- ulong new_size;
+ size_t new_size;
String *res;
Byte *body;
char *tmp, *last_char;
@@ -3279,8 +3279,8 @@ String *Item_func_compress::val_str(Stri
body= ((Byte*)buffer.ptr()) + 4;
// As far as we have checked res->is_empty() we can use ptr()
- if ((err= compress(body, &new_size,
- (const Bytef*)res->ptr(), res->length())) != Z_OK)
+ if ((err= my_compress_buffer(body, &new_size, (const uchar *)res->ptr(),
+ res->length())) != Z_OK)
{
code= err==Z_MEM_ERROR ? ER_ZLIB_Z_MEM_ERROR : ER_ZLIB_Z_BUF_ERROR;
push_warning(current_thd,MYSQL_ERROR::WARN_LEVEL_ERROR,code,ER(code));
=== modified file 'storage/archive/azio.c'
--- a/storage/archive/azio.c 2009-05-19 09:28:05 +0000
+++ b/storage/archive/azio.c 2009-05-22 12:38:50 +0000
@@ -16,6 +16,8 @@
#include <stdio.h>
#include <string.h>
+#include "my_sys.h"
+
static int const gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
static int const az_magic[3] = {0xfe, 0x03, 0x01}; /* az magic header */
@@ -37,40 +39,6 @@ void putLong(File file, uLong x);
uLong getLong(azio_stream *s);
void read_header(azio_stream *s, unsigned char *buffer);
-/*
- Valgrind normally gives false alarms for zlib operations, in the form of
- "conditional jump depends on uninitialised values" etc. The reason is
- explained in the zlib FAQ (http://www.zlib.net/zlib_faq.html#faq36)
-
- "That is intentional for performance reasons, and the output of deflate
- is not affected."
-
- Also discussed on a blog
- (http://www.sirena.org.uk/log/2006/02/19/zlib-generating-valgrind-warnings/)
-
- "...loop unrolling in the zlib library causes the mentioned
- “Conditional jump or move depends on uninitialised value(s)”
- warnings. These are safe since the results of the comparison are
- subsequently ignored..."
-
- "the results of the calculations are discarded by bounds checking done
- after the loop exits"
-
- Fix by initializing the memory allocated by zlib when running under Valgrind.
-
- This fix is safe, since such memory is only used internally by zlib, so we
- will not hide any bugs in mysql this way.
-*/
-static void *az_allocator(void *dummy, uInt items, uInt size)
-{
- return my_malloc((size_t)items*(size_t)size, IF_VALGRIND(MY_ZEROFILL, MYF(0)));
-}
-
-static void az_free(void *dummy, void *address)
-{
- my_free(address, MYF(MY_ALLOW_ZERO_PTR));
-}
-
/* ===========================================================================
Opens a gzip (.gz) file for reading or writing. The mode parameter
is as in fopen ("rb" or "wb"). The file is given either by file descriptor
@@ -86,8 +54,8 @@ int az_open (azio_stream *s, const char
int level = Z_DEFAULT_COMPRESSION; /* compression level */
int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */
- s->stream.zalloc = az_allocator;
- s->stream.zfree = az_free;
+ s->stream.zalloc = my_az_allocator;
+ s->stream.zfree = my_az_free;
s->stream.opaque = (voidpf)0;
memset(s->inbuf, 0, AZ_BUFSIZE_READ);
memset(s->outbuf, 0, AZ_BUFSIZE_WRITE);
1
0