[Maria-developers] Rev 2842: A test implementation of having SQL layer to supply the storage engine with in file:///home/psergey/dev/maria-5.1-minmax/
At file:///home/psergey/dev/maria-5.1-minmax/ ------------------------------------------------------------ revno: 2842 revision-id: psergey@askmonty.org-20100421141933-707cy6l3iha89lm5 parent: monty@askmonty.org-20100406224708-i62wvtw3nl14fhv4 committer: Sergey Petrunya <psergey@askmonty.org> branch nick: maria-5.1-minmax timestamp: Wed 2010-04-21 10:19:33 -0400 message: A test implementation of having SQL layer to supply the storage engine with interval lists for each unindexed column. This patch consists of - SQL layer providing service of taking a condition and extracting list<column, disjoint_ordered_list<interval>> from it (see opt_field_minmax.h) - An storage engine's "Implementation" that prints passed intervals to server stderr. Things need to be done: - Comments, better names - Make the interface compatible with Query Fragment Pushdown ideas. - Invent something that would allow to have test coverage for this. === added file 'sql/opt_field_minmax.h' --- a/sql/opt_field_minmax.h 1970-01-01 00:00:00 +0000 +++ b/sql/opt_field_minmax.h 2010-04-21 14:19:33 +0000 @@ -0,0 +1,35 @@ +//#include "../sql/opt_field_minmax.h" + +class RANGE_OPT_PARAM; +class SEL_TREE; +class SEL_ARG; + +typedef bool (*take_int_return_bool)(void* param, uint index); + +class Condition_analyzer +{ + TABLE *table; + MEM_ROOT alloc; + RANGE_OPT_PARAM *range_par; + SEL_TREE *tree; + my_bitmap_map *old_sets[2]; + uchar min_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; + uchar max_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; + + uint n_keys; + + int cur_key; + SEL_ARG *cur_interval; +public: + Condition_analyzer(TABLE *table_arg) : + table(table_arg), range_par(NULL), cur_key(-1) {} + + bool analyze(Item *cond, take_int_return_bool field_filter_func, + void *field_filter_param); + + bool start_intervals_walk_for_field(uint *fieldno); + bool get_next_interval(KEY_MULTI_RANGE *mrange); + + void dispose(); +}; + === modified file 'sql/opt_range.cc' --- a/sql/opt_range.cc 2010-01-15 15:27:55 +0000 +++ b/sql/opt_range.cc 2010-04-21 14:19:33 +0000 @@ -2788,6 +2788,248 @@ DBUG_RETURN(retval); } +// psergey-minmax: start + +#include "opt_field_minmax.h" + +/* + Create one-column pseudo-indexes for each column. +*/ + +static +bool create_minmax_indexes_descriptions(RANGE_OPT_PARAM *range_par, + take_int_return_bool field_filter_func, + void *func_param, + uint *n_keys) +{ + TABLE *table= range_par->table; + MEM_ROOT *alloc= range_par->mem_root; + *n_keys= 0; + uint fieldno; + for (fieldno= 0; fieldno < range_par->table->s->fields; fieldno++) + { + enum_field_types type= table->field[fieldno]->type(); + if (!(type == MYSQL_TYPE_TINY_BLOB || + type == MYSQL_TYPE_MEDIUM_BLOB || + type == MYSQL_TYPE_LONG_BLOB || + type == MYSQL_TYPE_BLOB || + type == MYSQL_TYPE_GEOMETRY) && + field_filter_func(func_param, fieldno) && + *n_keys < MAX_INDEXES) + { + (*n_keys)++; + } + } + + if (!*n_keys) + return TRUE; + + KEY_PART *key_part; + key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)* (*n_keys)); + range_par->key_parts= key_part; + range_par->key_parts_end= key_part + *n_keys; + + uint idx= 0; + for (fieldno= 0; fieldno < range_par->table->s->fields; fieldno++) + { + enum_field_types type= table->field[fieldno]->type(); + if (!(type == MYSQL_TYPE_TINY_BLOB || + type == MYSQL_TYPE_MEDIUM_BLOB || + type == MYSQL_TYPE_LONG_BLOB || + type == MYSQL_TYPE_BLOB || + type == MYSQL_TYPE_GEOMETRY) && + field_filter_func(func_param, fieldno) && + idx < MAX_INDEXES) + { + idx++; + Field **field= table->field + fieldno; + key_part->key= 0; + key_part->part= 0; + key_part->store_length= key_part->length= (uint16) (*field)->key_length(); + if ((*field)->real_maybe_null()) + key_part->store_length+= HA_KEY_NULL_LENGTH; + if ((*field)->type() == MYSQL_TYPE_BLOB || + (*field)->real_type() == MYSQL_TYPE_VARCHAR) + key_part->store_length+= HA_KEY_BLOB_LENGTH; + + key_part->field= (*field); + key_part->image_type = Field::itRAW; + /* + We set keypart flag to 0 here as the only HA_PART_KEY_SEG is checked + in the RangeAnalysisModule. + */ + key_part->flag= 0; + /* We don't set key_parts->null_bit as it will not be used */ + + key_part++; + } + } + return FALSE; +} + + + + +bool Condition_analyzer::analyze(Item *cond, + take_int_return_bool func, + void *func_param) +{ + bool retval= FALSE; + THD *thd= current_thd; + uint i; + DBUG_ENTER("find_min_max_bounds"); + + init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); + range_par= (RANGE_OPT_PARAM*)alloc_root(&alloc, sizeof(RANGE_OPT_PARAM)); + range_par->mem_root= &alloc; + range_par->old_root= thd->mem_root; + range_par->thd= thd; + range_par->table= table; + /* range_par->cond doesn't need initialization */ + range_par->prev_tables= range_par->read_tables= 0; + range_par->current_table= table->map; + + if (create_minmax_indexes_descriptions(range_par, func, func_param, &n_keys)) + { + free_root(&alloc,MYF(0)); // Return memory & allocator + DBUG_RETURN(FALSE); + } + range_par->keys= n_keys; + + dbug_tmp_use_all_columns(table, old_sets, + table->read_set, table->write_set); + range_par->using_real_indexes= FALSE; + range_par->remove_jump_scans= TRUE; + for (i=0; i < n_keys; i++) + range_par->real_keynr[i]= i; + range_par->alloced_sel_args= 0; + + + thd->no_errors=1; // Don't warn about NULL + thd->mem_root=&alloc; + + tree= get_mm_tree(range_par, cond); + if (!tree) + { + retval= TRUE; + goto end; + } + + if (tree->type == SEL_TREE::IMPOSSIBLE) + { + retval= TRUE; + goto end; + } + + if ((tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER) || + !tree->merges.is_empty()) + { + retval= TRUE; + goto end; + } + +end: + DBUG_RETURN(retval); +} + + +bool Condition_analyzer::start_intervals_walk_for_field(uint *fieldno) +{ + if (!tree) + return FALSE; + for (cur_key++; cur_key < (int)n_keys; cur_key++) + { + if (tree->keys[cur_key]) + { + *fieldno= range_par->key_parts[cur_key].field->field_index; + cur_interval= tree->keys[0]->first(); + return TRUE; + } + } + return FALSE; +} + + +bool Condition_analyzer::get_next_interval(KEY_MULTI_RANGE *mrange) +{ + uchar *key_ptr; + key_range *min_range= &mrange->start_key; + key_range *max_range= &mrange->end_key; + + if (!cur_interval) + return FALSE; + +// uchar min_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; +// uchar max_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; + min_range->key= this->min_val; + max_range->key= this->max_val; + + key_ptr= (uchar*)min_range->key; + cur_interval->store_min(range_par->key_parts[cur_key].store_length, + &key_ptr, /*cur_interval->min_flag*/ 0); + min_range->keypart_map= key_part_map(1); + min_range->length= key_ptr - min_range->key; + min_range->flag= (cur_interval->min_flag & NEAR_MIN ? HA_READ_AFTER_KEY : + HA_READ_KEY_EXACT); + + key_ptr= (uchar*)max_range->key; + cur_interval->store_max(range_par->key_parts[cur_key].store_length, + &key_ptr, /* cur_interval->max_flag */ 0); + max_range->keypart_map= key_part_map(1); + max_range->length= key_ptr - max_range->key; + max_range->flag= (cur_interval->max_flag & NEAR_MAX ? + HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY); + + mrange->range_flag= cur_interval->min_flag | cur_interval->max_flag; + + cur_interval= cur_interval->next; + return TRUE; +} + + +void Condition_analyzer::dispose() +{ + THD *thd= current_thd; + dbug_tmp_restore_column_maps(table->read_set, table->write_set, old_sets); + thd->no_errors=0; + thd->mem_root= range_par->old_root; + free_root(&alloc,MYF(0)); // Return memory & allocator +} + + +#if 0 +static +int search_min_max_bounds(RANGE_OPT_PARAM *ppar, SEL_ARG *key_tree) +{ + int res, left_res=0, right_res=0; + + if (key_tree->left != &null_element) + { + if (-1 == (left_res= search_min_max_bounds(ppar,key_tree->left))) + return -1; + } + + if (key_tree->type == SEL_ARG::KEY_RANGE) + { + /* + Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and + we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change + */ + DBUG_EXECUTE("info", dbug_print_segment_range(key_tree, + ppar->key_parts);); + // key_tree->min_value, key_tree->max_value, key_tree->min_flag | key_tree->max_flag, + } + + if (key_tree->right != &null_element) + { + if (-1 == (right_res= search_min_max_bounds(ppar,key_tree->right))) + return -1; + } + return (left_res || right_res || res); +} +#endif +// psergey-minmax: end; + /* Store field key image to table record === modified file 'sql/sql_parse.cc' --- a/sql/sql_parse.cc 2010-03-04 08:03:07 +0000 +++ b/sql/sql_parse.cc 2010-04-21 14:19:33 +0000 @@ -1237,6 +1237,7 @@ general_log_write(thd, command, thd->query(), thd->query_length()); DBUG_PRINT("query",("%-.4096s",thd->query())); + fprintf(stderr, "query: %-.4096s\n",thd->query()); #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER) thd->profiling.set_query_source(thd->query(), thd->query_length()); #endif === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2010-03-10 09:12:23 +0000 +++ b/sql/sql_select.cc 2010-04-21 14:19:33 +0000 @@ -6643,6 +6643,8 @@ } } +//psergey-minmax: +bool find_min_max_bounds(THD *thd, TABLE *table, Item *pprune_cond); static void make_join_readinfo(JOIN *join, ulonglong options) @@ -6681,6 +6683,7 @@ table->status=STATUS_NO_RECORD; pick_table_access_method (tab); + //find_min_max_bounds(join->thd, table, tab->select_cond); switch (tab->type) { case JT_EQ_REF: tab->read_record.unlock_row= join_read_key_unlock_row; === modified file 'storage/myisam/ha_myisam.cc' --- a/storage/myisam/ha_myisam.cc 2009-12-03 11:34:11 +0000 +++ b/storage/myisam/ha_myisam.cc 2010-04-21 14:19:33 +0000 @@ -2269,3 +2269,88 @@ DBUG_RETURN(TRUE); } #endif + +static bool do_min_max_for_index(void* param, uint index) +{ + /* param is what was to Condition_analyzer::analyze */ + return TRUE; +} + +#include "../sql/opt_field_minmax.h" + +void store_key_image_to_rec(Field *field, uchar *ptr, uint len); +static void print_segment_range(Field *field, KEY_MULTI_RANGE *mrange); + +static void print_field(Field *field) +{ + if (field->is_real_null()) + fprintf(stderr, "NULL"); + else + { + char buf[256]; + String str(buf, sizeof(buf), &my_charset_bin); + str.length(0); + String *pstr; + pstr= field->val_str(&str); + fprintf(stderr, "'%s'", pstr->c_ptr_safe()); + } +} + +/* Print a "c1 < keypartX < c2" - type interval into debug trace. */ +static void print_segment_range(Field *field, KEY_MULTI_RANGE *mrange) +{ + if (!(mrange->range_flag & NO_MIN_RANGE)) + { + //DBUG_ASSERT(field->key_length() == mrange->start_key.length); + store_key_image_to_rec(field, (uchar*)mrange->start_key.key, mrange->start_key.length); + print_field(field); + if (mrange->range_flag & NEAR_MIN) + fputs(" < ", stderr); + else + fputs(" <= ", stderr); + } + + fprintf(stderr, "%s", field->field_name); + + if (!(mrange->range_flag & NO_MAX_RANGE)) + { + if (mrange->range_flag & NEAR_MAX) + fputs(" < ", stderr); + else + fputs(" <= ", stderr); + //DBUG_ASSERT(field->key_length() == mrange->end_key.length); + store_key_image_to_rec(field, (uchar*)mrange->end_key.key, mrange->end_key.length); + print_field(field); + } + fputs("\n", stderr); +} + +//931 202 76 04 natalya + + + +const COND * ha_myisam::cond_push(const COND *cond) +{ + fprintf(stderr, "table %s: set min-max bounds { \n", table->alias); + Condition_analyzer analyzer(table); + analyzer.analyze((Item*)cond, do_min_max_for_index, (void*)0x123456); + uint fieldno; + while (analyzer.start_intervals_walk_for_field(&fieldno)) + { + KEY_MULTI_RANGE mrange; + while (analyzer.get_next_interval(&mrange)) + { + // print the range; + print_segment_range(table->field[fieldno], &mrange); + } + } + fprintf(stderr, "}\n"); + analyzer.dispose(); + return cond; +} + +void ha_myisam::cond_pop() +{ + fprintf(stderr, "table %s: Remove min-max bounds\n", table->alias); +} + === modified file 'storage/myisam/ha_myisam.h' --- a/storage/myisam/ha_myisam.h 2009-09-07 20:50:10 +0000 +++ b/storage/myisam/ha_myisam.h 2010-04-21 14:19:33 +0000 @@ -148,4 +148,10 @@ { return file; } +//psergey-minmax: + /* + Condition pushdown + */ + const COND *cond_push(const COND *cond); + void cond_pop(); };
participants (1)
-
Sergey Petrunya