Hi, Igor, See my patch below. The only questionable part - in remove_dup_with_compare(). Before it was memcpy'ing only part of the record, skipping const fields. Now it memcpy's the complete record. I don't think it matters much performance-wise. An alternative would be to do two memcpy's - for the NULL bitmap and for the fields (because const fields are located between the NULL bitmap that we need, and the other non-const fields). Another alternative would be to have const fields at the end of the record, but it'd be a much larger and more risky change. ------------------------------------------------------------ revno: 3620 fixes bug: https://mariadb.atlassian.net/browse/MDEV-3875 committer: Sergei Golubchik <sergii@pisem.net> branch nick: 5.3 timestamp: Sat 2013-01-26 22:33:18 +0100 message: MDEV-3875 Wrong result (missing row) on a DISTINCT query with the same subquery in the SELECT list and GROUP BY fix remove_dup_with_hash_index() and remove_dup_with_compare() to take NULLs into account modified: mysql-test/r/distinct.result mysql-test/t/distinct.test sql/field.cc sql/field.h sql/filesort.cc sql/sql_select.cc diff: === modified file 'mysql-test/r/distinct.result' --- mysql-test/r/distinct.result 2011-12-15 22:26:59 +0000 +++ mysql-test/r/distinct.result 2013-01-26 21:33:18 +0000 @@ -847,3 +847,35 @@ time(f1) 00:00:00.000200 00:00:00.000300 drop table t1; +create table t1(i int, g int); +insert into t1 values (null, 1), (0, 2); +select distinct i from t1 group by g; +i +NULL +0 +drop table t1; +create table t1(i int, g blob); +insert into t1 values (null, 1), (0, 2); +select distinct i from t1 group by g; +i +NULL +0 +drop table t1; +create table t1 (a int) engine=myisam; +insert into t1 values (0),(7); +create table t2 (b int) engine=myisam; +insert into t2 values (7),(0),(3); +create algorithm=temptable view v as +select distinct (select max(a) from t1 where alias.b = a) as field1 from t2 as alias group by field1; +select * from v; +field1 +NULL +0 +7 +select distinct (select max(a) from t1 where alias.b = a) as field1 from t2 as alias group by field1; +field1 +NULL +0 +7 +drop view v; +drop table t1, t2; === modified file 'mysql-test/t/distinct.test' --- mysql-test/t/distinct.test 2011-12-15 22:26:59 +0000 +++ mysql-test/t/distinct.test 2013-01-26 21:33:18 +0000 @@ -658,3 +658,27 @@ select time(f1) from t1 ; select distinct time(f1) from t1 ; drop table t1; +# +# MDEV-3875 Wrong result (missing row) on a DISTINCT query with the same subquery in the SELECT list and GROUP BY +# MySQL Bug#66896 Distinct not distinguishing 0 from NULL when GROUP BY is used +# +create table t1(i int, g int); # remove_dup_with_hash_index +insert into t1 values (null, 1), (0, 2); +select distinct i from t1 group by g; +drop table t1; + +create table t1(i int, g blob); # remove_dup_with_compare +insert into t1 values (null, 1), (0, 2); +select distinct i from t1 group by g; +drop table t1; + +create table t1 (a int) engine=myisam; +insert into t1 values (0),(7); +create table t2 (b int) engine=myisam; +insert into t2 values (7),(0),(3); +create algorithm=temptable view v as +select distinct (select max(a) from t1 where alias.b = a) as field1 from t2 as alias group by field1; +select * from v; +select distinct (select max(a) from t1 where alias.b = a) as field1 from t2 as alias group by field1; +drop view v; +drop table t1, t2; === modified file 'sql/field.cc' --- sql/field.cc 2012-08-29 15:55:59 +0000 +++ sql/field.cc 2013-01-26 21:33:18 +0000 @@ -1062,6 +1062,21 @@ bool Field::type_can_have_key_part(enum } +void Field::make_sort_key(uchar *buff,uint length) +{ + if (maybe_null()) + { + if (is_null()) + { + bzero(buff, length + 1); + return; + } + *buff++= 1; + } + sort_string(buff, length); +} + + /** Numeric fields base class constructor. */ === modified file 'sql/field.h' --- sql/field.h 2012-06-23 22:00:05 +0000 +++ sql/field.h 2013-01-26 21:33:18 +0000 @@ -386,6 +386,7 @@ class Field return bytes; } + void make_sort_key(uchar *buff, uint length); virtual void make_field(Send_field *); virtual void sort_string(uchar *buff,uint length)=0; virtual bool optimize_range(uint idx, uint part); === modified file 'sql/filesort.cc' --- sql/filesort.cc 2012-11-09 08:11:20 +0000 +++ sql/filesort.cc 2013-01-26 21:33:18 +0000 @@ -783,21 +783,9 @@ static void make_sortkey(register SORTPA bool maybe_null=0; if ((field=sort_field->field)) { // Field - if (field->maybe_null()) - { - if (field->is_null()) - { - if (sort_field->reverse) - bfill(to,sort_field->length+1,(char) 255); - else - bzero((char*) to,sort_field->length+1); - to+= sort_field->length+1; - continue; - } - else - *to++=1; - } - field->sort_string(to, sort_field->length); + field->make_sort_key(to, sort_field->length); + if ((maybe_null = field->maybe_null())) + to++; } else { // Item @@ -949,8 +937,11 @@ static void make_sortkey(register SORTPA } if (sort_field->reverse) { /* Revers key */ - if (maybe_null) - to[-1]= ~to[-1]; + if (maybe_null && (to[-1]= !to[-1])) + { + to+= sort_field->length; // don't waste the time reversing all 0's + continue; + } length=sort_field->length; while (length--) { === modified file 'sql/sql_select.cc' --- sql/sql_select.cc 2013-01-16 13:11:13 +0000 +++ sql/sql_select.cc 2013-01-26 21:33:18 +0000 @@ -204,7 +204,7 @@ static int create_sort_index(THD *thd, J static int remove_duplicates(JOIN *join,TABLE *entry,List<Item> &fields, Item *having); static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field, - ulong offset,Item *having); + Item *having); static int remove_dup_with_hash_index(THD *thd,TABLE *table, uint field_count, Field **first_field, ulong key_length,Item *having); @@ -18891,19 +18891,24 @@ static bool fix_having(JOIN *join, Item #endif -/***************************************************************************** - Remove duplicates from tmp table - This should be recoded to add a unique index to the table and remove - duplicates - Table is a locked single thread table - fields is the number of fields to check (from the end) -*****************************************************************************/ +/** + Compare fields from table->record[0] and table->record[1], + possibly skipping few first fields. + @param table + @param ptr field to start the comparison from, + somewhere in the table->field[] array + + @retval 1 different + @retval 0 identical +*/ static bool compare_record(TABLE *table, Field **ptr) { for (; *ptr ; ptr++) { - if ((*ptr)->cmp_offset(table->s->rec_buff_length)) + Field *f= *ptr; + if (f->is_null() != f->is_null(table->s->rec_buff_length) || + (!f->is_null() && f->cmp_offset(table->s->rec_buff_length))) return 1; } return 0; @@ -18931,15 +18936,15 @@ static void free_blobs(Field **ptr) static int -remove_duplicates(JOIN *join, TABLE *entry,List<Item> &fields, Item *having) +remove_duplicates(JOIN *join, TABLE *table, List<Item> &fields, Item *having) { int error; - ulong reclength,offset; + ulong keylength= 0; uint field_count; THD *thd= join->thd; DBUG_ENTER("remove_duplicates"); - entry->reginfo.lock_type=TL_WRITE; + table->reginfo.lock_type=TL_WRITE; /* Calculate how many saved fields there is in list */ field_count=0; @@ -18956,24 +18961,21 @@ remove_duplicates(JOIN *join, TABLE *ent join->unit->select_limit_cnt= 1; // Only send first row DBUG_RETURN(0); } - Field **first_field=entry->field+entry->s->fields - field_count; - offset= (field_count ? - entry->field[entry->s->fields - field_count]-> - offset(entry->record[0]) : 0); - reclength=entry->s->reclength-offset; - - free_io_cache(entry); // Safety - entry->file->info(HA_STATUS_VARIABLE); - if (entry->s->db_type() == heap_hton || - (!entry->s->blob_fields && - ((ALIGN_SIZE(reclength) + HASH_OVERHEAD) * entry->file->stats.records < + + Field **first_field=table->field+table->s->fields - field_count; + for (Field **ptr=first_field; *ptr; ptr++) + keylength+= (*ptr)->sort_length() + (*ptr)->maybe_null(); + + free_io_cache(table); // Safety + table->file->info(HA_STATUS_VARIABLE); + if (table->s->db_type() == heap_hton || + (!table->s->blob_fields && + ((ALIGN_SIZE(keylength) + HASH_OVERHEAD) * table->file->stats.records < thd->variables.sortbuff_size))) - error=remove_dup_with_hash_index(join->thd, entry, - field_count, first_field, - reclength, having); + error=remove_dup_with_hash_index(join->thd, table, field_count, first_field, + keylength, having); else - error=remove_dup_with_compare(join->thd, entry, first_field, offset, - having); + error=remove_dup_with_compare(join->thd, table, first_field, having); free_blobs(first_field); DBUG_RETURN(error); @@ -18981,18 +18983,13 @@ remove_duplicates(JOIN *join, TABLE *ent static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field, - ulong offset, Item *having) + Item *having) { handler *file=table->file; - char *org_record,*new_record; - uchar *record; + uchar *record=table->record[0]; int error; - ulong reclength= table->s->reclength-offset; DBUG_ENTER("remove_dup_with_compare"); - org_record=(char*) (record=table->record[0])+offset; - new_record=(char*) table->record[1]+offset; - if (file->ha_rnd_init_with_error(1)) DBUG_RETURN(1); @@ -19029,7 +19026,7 @@ static int remove_dup_with_compare(THD * error=0; goto err; } - memcpy(new_record,org_record,reclength); + store_record(table,record[1]); /* Read through rest of file and mark duplicated rows deleted */ bool found=0; @@ -19088,8 +19085,9 @@ static int remove_dup_with_hash_index(TH int error; handler *file= table->file; ulong extra_length= ALIGN_SIZE(key_length)-key_length; - uint *field_lengths,*field_length; + uint *field_lengths, *field_length; HASH hash; + Field **ptr; DBUG_ENTER("remove_dup_with_hash_index"); if (!my_multi_malloc(MYF(MY_WME), @@ -19101,21 +19099,8 @@ static int remove_dup_with_hash_index(TH NullS)) DBUG_RETURN(1); - { - Field **ptr; - ulong total_length= 0; - for (ptr= first_field, field_length=field_lengths ; *ptr ; ptr++) - { - uint length= (*ptr)->sort_length(); - (*field_length++)= length; - total_length+= length; - } - DBUG_PRINT("info",("field_count: %u key_length: %lu total_length: %lu", - field_count, key_length, total_length)); - DBUG_ASSERT(total_length <= key_length); - key_length= total_length; - extra_length= ALIGN_SIZE(key_length)-key_length; - } + for (ptr= first_field, field_length=field_lengths ; *ptr ; ptr++) + (*field_length++)= (*ptr)->sort_length(); if (hash_init(&hash, &my_charset_bin, (uint) file->stats.records, 0, key_length, (hash_get_key) 0, 0, 0)) @@ -19155,13 +19140,13 @@ static int remove_dup_with_hash_index(TH /* copy fields to key buffer */ org_key_pos= key_pos; field_length=field_lengths; - for (Field **ptr= first_field ; *ptr ; ptr++) + for (ptr= first_field ; *ptr ; ptr++) { - (*ptr)->sort_string(key_pos,*field_length); - key_pos+= *field_length++; + (*ptr)->make_sort_key(key_pos, *field_length); + key_pos+= (*ptr)->maybe_null() + *field_length++; } /* Check if it exists before */ - if (hash_search(&hash, org_key_pos, key_length)) + if (my_hash_search(&hash, org_key_pos, key_length)) { /* Duplicated found ; Remove the row */ if ((error= file->ha_delete_row(record))) Regards, Sergei