Hi Sanja, Please find review comments below. On Thu, Apr 02, 2015 at 06:19:41PM +0200, sanja@mariadb.com wrote:
revision-id: fc31f6d95720b4b946b8b68c816026d65831f347 parent(s): 01d7da6785284383b2c04f2d4474feccebb0bb6f committer: Oleksandr Byelkin branch nick: server timestamp: 2015-04-02 18:19:33 +0200 message:
MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record
--- mysql-test/r/analyze_format_json.result | 62 +++++++++++++++++++++++++++++ mysql-test/t/analyze_format_json.test | 30 ++++++++++++++ sql/sql_explain.cc | 26 +++++++++++-- sql/sql_explain.h | 12 +++++- sql/sql_select.cc | 69 ++++++++++++++++++++++++++++++--- sql/sql_select.h | 7 +++- 6 files changed, 195 insertions(+), 11 deletions(-)
diff --git a/mysql-test/r/analyze_format_json.result b/mysql-test/r/analyze_format_json.result index 9022d8c..b3ef075 100644 --- a/mysql-test/r/analyze_format_json.result +++ b/mysql-test/r/analyze_format_json.result @@ -264,3 +264,65 @@ ANALYZE } } drop table t1; +# +# MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record +# +create table t3(a int); +insert into t3 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t4(a int); +insert into t4 select A.a + B.a* 10 + C.a * 100 from t3 A, t3 B, t3 C; +create table t1 (lb1 int, rb1 int, lb2 int, rb2 int, c1 int, c2 int); +insert into t1 values (1,2,10,20,15,15); +insert into t1 values (3,5,10,20,15,15); +insert into t1 values (10,20,10,20,15,15); +insert into t1 values (10,20,1,2,15,15); +insert into t1 values (10,20,10,20,1,3); +create table t2 (key1 int, key2 int, key3 int, key4 int, col1 int, +key(key1), key(key2), key(key3), key(key4)); +insert into t2 select a,a,a,a,a from t3; +insert into t2 select 15,15,15,15,15 from t4; +analyze format=json +select * from t1, t2 where (t2.key1 between t1.lb1 and t1.rb1) and +(t2.key2 between t1.lb2 and t1.rb2) and +(t2.key3=t1.c1 OR t2.key4=t1.c2); +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t1", + "access_type": "ALL", + "r_loops": 1, + "rows": 5, + "r_rows": 5, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + }, + "range-checked-for-each-record": { + "keys": ["key1", "key2", "key3", "key4"], + "r_keys_none": 1, + "r_keys_index_merge": 1, + "r_keys": { + "key1": 2, + "key2": 1, + "key3": 0, + "key4": 0 + }, It seems wrong to have multiple r_XXX elements. I think, having one will be better. How about: r_keys: { "none": 1, ## Maybe, change to "full_table_scan" ? (btw can it be a full index scan instead I wonder?) "index_merge": nnn, "range" : { "key1" : nnn, "key2" : nnn, "key3" : nnn } }
Let's discuss it.
+ "table": { + "table_name": "t2", + "access_type": "ALL", + "possible_keys": ["key1", "key2", "key3", "key4"], + "r_loops": 5, + "rows": 1010, + "r_rows": 203.8, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 98.135 + } + } + } +} +drop table t1,t2,t3,t4; diff --git a/mysql-test/t/analyze_format_json.test b/mysql-test/t/analyze_format_json.test index 64bafd7..0ae9d4f 100644 --- a/mysql-test/t/analyze_format_json.test +++ b/mysql-test/t/analyze_format_json.test @@ -75,3 +75,33 @@ disconnect con1; connection default; drop table t1;
+ +--echo # +--echo # MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record +--echo # +create table t3(a int); +insert into t3 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t4(a int); +insert into t4 select A.a + B.a* 10 + C.a * 100 from t3 A, t3 B, t3 C; + +create table t1 (lb1 int, rb1 int, lb2 int, rb2 int, c1 int, c2 int); + +insert into t1 values (1,2,10,20,15,15); +insert into t1 values (3,5,10,20,15,15); +insert into t1 values (10,20,10,20,15,15); +insert into t1 values (10,20,1,2,15,15); +insert into t1 values (10,20,10,20,1,3); + +create table t2 (key1 int, key2 int, key3 int, key4 int, col1 int, + key(key1), key(key2), key(key3), key(key4)); +insert into t2 select a,a,a,a,a from t3; +insert into t2 select 15,15,15,15,15 from t4; + +--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/ +analyze format=json +select * from t1, t2 where (t2.key1 between t1.lb1 and t1.rb1) and + (t2.key2 between t1.lb2 and t1.rb2) and + (t2.key3=t1.c1 OR t2.key4=t1.c2); + +drop table t1,t2,t3,t4; diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index 9d82f4f..edbc2bf 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -1076,15 +1076,15 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai }
Please add a comment: @return ptr - pointer to the added string NULL - out of memory error Does it make sense to change the return type to 'const char*' ?
-bool String_list::append_str(MEM_ROOT *mem_root, const char *str) +char *String_list::append_str(MEM_ROOT *mem_root, const char *str) { size_t len= strlen(str); char *cp; if (!(cp = (char*)alloc_root(mem_root, len+1))) - return 1; + return NULL; memcpy(cp, str, len+1); push_back(cp); - return 0; + return cp; }
@@ -1209,6 +1209,26 @@ void Explain_table_access::print_explain_json(Explain_query *query, { writer->add_member("range-checked-for-each-record").start_object(); add_json_keyset(writer, "keys", &possible_keys); + if (is_analyze) + { + writer->add_member("r_keys_none").add_ll(range_checked_fer->none); + writer->add_member("r_keys_index_merge").add_ll(range_checked_fer-> + index_merge); + if (range_checked_fer->keys_stat) + { + writer->add_member("r_keys").start_object(); + for (uint i= 0; i < range_checked_fer->keys; i++) + { + if (range_checked_fer->keys_stat_names[i]) + { + writer->add_member(range_checked_fer-> + keys_stat_names[i]).add_ll(range_checked_fer-> + keys_stat[i]); + } + } + writer->end_object(); + } + } If we move this code into Range_checked_fer::print_json(), will it be possible to make its members private? }
if (full_scan_on_null_key) diff --git a/sql/sql_explain.h b/sql/sql_explain.h index e3b41ee..f86886d 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -54,7 +54,7 @@ it into the slow query log. class String_list: public List<char> { public: - bool append_str(MEM_ROOT *mem_root, const char *str); + char *append_str(MEM_ROOT *mem_root, const char *str); };
class Json_writer; @@ -627,6 +627,16 @@ class Explain_range_checked_fer : public Sql_alloc public: String_list key_set; key_map keys_map; + + ha_rows none, index_merge; 'none' looks weird. Would a 'full_scan' be better.
+ ha_rows *keys_stat; + char **keys_stat_names; + uint keys; This needs to be documented. + + Explain_range_checked_fer() + :Sql_alloc(), none(0), index_merge(0), + keys_stat(0), keys_stat_names(0), keys(0) + {} };
/* diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 5c3e116..4eb3d81 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -11398,6 +11398,7 @@ void JOIN_TAB::cleanup() table->reginfo.join_tab= 0; } end_read_record(&read_record); + explain_plan= 0; It's a pointer, right? Please change s/0/NULL/
DBUG_VOID_RETURN; }
@@ -18762,9 +18763,30 @@ test_if_quick_select(JOIN_TAB *tab)
delete tab->select->quick; tab->select->quick=0; - return tab->select->test_quick_select(tab->join->thd, tab->keys, - (table_map) 0, HA_POS_ERROR, 0, - FALSE, /*remove where parts*/FALSE); + int res= tab->select->test_quick_select(tab->join->thd, tab->keys, + (table_map) 0, HA_POS_ERROR, 0, + FALSE, /*remove where parts*/FALSE); + if (tab->explain_plan && tab->explain_plan->range_checked_fer) + { + if (tab->select->quick) + { + QUICK_SELECT_I *quick= tab->select->quick; + if (quick->index == MAX_KEY) + tab->explain_plan->range_checked_fer->index_merge++; + else + { + DBUG_ASSERT(quick->index < tab->explain_plan->range_checked_fer->keys); + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat); + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat_names); + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat_names[ + quick->index]); + tab->explain_plan->range_checked_fer->keys_stat[quick->index]++; + } + } + else + tab->explain_plan->range_checked_fer->none++; + } + return res; I think it's bad that join execution code becomes littered with details about how things are stored in the Explain data structures.
Can you add a set of methods to range_checked_fer: count_full_scan(); count_index_merge(); count_range_scan(); and then only call these methods from here?
}
@@ -23360,6 +23382,32 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, return 0; }
What does this function do? AFAIU it does two things: - copies indexes from possible_keys into a String_list. - allocates an empty array of ha_rows(). The set of data structures used by Range_checked_fer is quite peculiar. This function is only useful for Range_checked_fer. Do you think it would make sense to made this a method of Range_checked_fer?
+int append_possible_keys_stat(MEM_ROOT *alloc, String_list &list, + ha_rows **stat, char ***names, + TABLE *table, key_map possible_keys) +{ + uint j; + multi_alloc_root(alloc, stat, sizeof(ha_rows) * table->s->keys, + names, sizeof(char *) * table->s->keys, NULL); + if ((!(*stat)) || (!(*names))) + { + (*stat)= NULL; (*names)= NULL; + return 1; + } + bzero(*stat, sizeof(ha_rows) * table->s->keys); + for (j= 0; j < table->s->keys; j++) + { + if (possible_keys.is_set(j)) + { + char *nm= list.append_str(alloc, table->key_info[j].name); + (*names)[j]= nm; + } + else + (*names)[j]= NULL; + } + return 0; +} + /* TODO: this function is only applicable for the first non-const optimization join tab. @@ -23400,6 +23448,8 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab quick_type= -1; QUICK_SELECT_I *quick= NULL;
+ + explain_plan= eta; eta->key.clear(); eta->quick_info= NULL;
@@ -23663,9 +23713,16 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab { eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD); eta->range_checked_fer= new (thd->mem_root) Explain_range_checked_fer; - eta->range_checked_fer->keys_map= tab->keys; - append_possible_keys(thd->mem_root, eta->range_checked_fer->key_set, - table, tab->keys); + if (eta->range_checked_fer) + { + eta->range_checked_fer->keys_map= tab->keys; + eta->range_checked_fer->keys= table->s->keys; + append_possible_keys_stat(thd->mem_root, + eta->range_checked_fer->key_set, + &eta->range_checked_fer->keys_stat, + &eta->range_checked_fer->keys_stat_names, + table, tab->keys); + } } else if (tab->select->cond || (tab->cache_select && tab->cache_select->cond)) diff --git a/sql/sql_select.h b/sql/sql_select.h index 0557e32..ef92690 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -362,7 +362,12 @@ typedef struct st_join_table { SJ_TMP_TABLE *check_weed_out_table; /* for EXPLAIN only: */ SJ_TMP_TABLE *first_weedout_table; - + + /** + reference to saved plan and execution statistics + */ + Explain_table_access *explain_plan; + /* If set, means we should stop join enumeration after we've got the first match and return to the specified join tab. May point to
BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog