revision-id: 847e6959f45a1334f3c60452f11dde38bb9cbe3f parent(s): fc7bcca40f4ce83327745e9edf3b9bf84b1516af committer: Sergei Petrunia branch nick: mysql-5.6-rocksdb-look800 timestamp: 2018-05-07 18:13:42 +0300 message: Issue #809: Wrong query result with bloom filters In reverse-ordered column families, if one wants to start reading at the logical end of the index, they should Seek() to a key value that is not covered by the index. This may (and typically does) prevent use of a bloom filter. The calls to setup_scan_iterator() that are made for index and table scan didn't take this into account and passed eq_cond_len=INDEX_NUMBER_SIZE. Fixed them to compute and pass correct eq_cond_len. Also, removed an incorrect assert in ha_rocksdb::setup_iterator_bounds. --- mysql-test/suite/rocksdb/r/bloomfilter5.result | 62 ++++++++++++++++++++++ mysql-test/suite/rocksdb/t/bloomfilter5-master.opt | 3 ++ mysql-test/suite/rocksdb/t/bloomfilter5.test | 61 +++++++++++++++++++++ storage/rocksdb/ha_rocksdb.cc | 9 ++-- 4 files changed, 130 insertions(+), 5 deletions(-) diff --git a/mysql-test/suite/rocksdb/r/bloomfilter5.result b/mysql-test/suite/rocksdb/r/bloomfilter5.result new file mode 100644 index 0000000..058d360 --- /dev/null +++ b/mysql-test/suite/rocksdb/r/bloomfilter5.result @@ -0,0 +1,62 @@ +# +# Issue #809: Wrong query result with bloom filters +# +create table t1 ( +id1 bigint not null, +id2 bigint not null, +id3 varchar(100) not null, +id4 int not null, +id5 int not null, +value bigint, +value2 varchar(100), +primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1' +) engine=ROCKSDB; +create table t2(a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t3(seq int); +insert into t3 +select +1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000 +from t2 A, t2 B, t2 C, t2 D; +insert t1 +select +(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc" +from t3; +set global rocksdb_force_flush_memtable_now=1; +# Full table scan +explain +select * from t1 limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 NULL +select * from t1 limit 10; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +1000 2000 2000 9999 9999 1000 aaabbbccc +1000 2000 2000 9998 9998 1000 aaabbbccc +1000 2000 2000 9997 9997 1000 aaabbbccc +1000 2000 2000 9996 9996 1000 aaabbbccc +1000 1999 1999 9995 9995 1000 aaabbbccc +1000 1999 1999 9994 9994 1000 aaabbbccc +1000 1999 1999 9993 9993 1000 aaabbbccc +1000 1999 1999 9992 9992 1000 aaabbbccc +1000 1999 1999 9991 9991 1000 aaabbbccc +# An index scan starting from the end of the table: +explain +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 122 NULL 1 NULL +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +create table t4 ( +pk int unsigned not null primary key, +kp1 int unsigned not null, +kp2 int unsigned not null, +col1 int unsigned, +key(kp1, kp2) comment 'rev:bf5_2' +) engine=rocksdb; +insert into t4 values (1, 0xFFFF, 0xFFF, 12345); +# This must not fail an assert: +select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; +pk kp1 kp2 col1 +drop table t1,t2,t3,t4; diff --git a/mysql-test/suite/rocksdb/t/bloomfilter5-master.opt b/mysql-test/suite/rocksdb/t/bloomfilter5-master.opt new file mode 100644 index 0000000..efcd69b --- /dev/null +++ b/mysql-test/suite/rocksdb/t/bloomfilter5-master.opt @@ -0,0 +1,3 @@ +--rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;} +--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4}; + diff --git a/mysql-test/suite/rocksdb/t/bloomfilter5.test b/mysql-test/suite/rocksdb/t/bloomfilter5.test new file mode 100644 index 0000000..00968ae --- /dev/null +++ b/mysql-test/suite/rocksdb/t/bloomfilter5.test @@ -0,0 +1,61 @@ + +--echo # +--echo # Issue #809: Wrong query result with bloom filters +--echo # + +create table t1 ( + id1 bigint not null, + id2 bigint not null, + id3 varchar(100) not null, + id4 int not null, + id5 int not null, + value bigint, + value2 varchar(100), + primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1' +) engine=ROCKSDB; + + +create table t2(a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t3(seq int); +insert into t3 +select + 1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000 +from t2 A, t2 B, t2 C, t2 D; + +insert t1 +select + (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc" +from t3; + +set global rocksdb_force_flush_memtable_now=1; + +--echo # Full table scan +explain +select * from t1 limit 10; +select * from t1 limit 10; + +--echo # An index scan starting from the end of the table: +explain +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; + +# A testcase for an assertion that the fix is removing +# The only requirement for the used column family is that it is reverse-ordered +create table t4 ( + pk int unsigned not null primary key, + kp1 int unsigned not null, + kp2 int unsigned not null, + col1 int unsigned, + key(kp1, kp2) comment 'rev:bf5_2' +) engine=rocksdb; + +insert into t4 values (1, 0xFFFF, 0xFFF, 12345); + +--echo # This must not fail an assert: +select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; + +drop table t1,t2,t3,t4; + + diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 455b55d..3eedf4c 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -8582,7 +8582,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { DBUG_ASSERT(key != nullptr); const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - kd.get_first_key(key, &key_size); + int key_start_matching_bytes = kd.get_first_key(key, &key_size); rocksdb::Slice index_key((const char *)key, key_size); @@ -8593,7 +8593,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { - setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE); + setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes); m_scan_it->Seek(index_key); m_skip_scan_it_next_call = true; @@ -9600,7 +9600,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd, const rocksdb::Slice &eq_cond) { uint eq_cond_len = eq_cond.size(); - DBUG_ASSERT(eq_cond_len >= Rdb_key_def::INDEX_NUMBER_SIZE); memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len); kd.successor(m_eq_cond_upper_bound, eq_cond_len); memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len); @@ -9696,12 +9695,12 @@ void ha_rocksdb::release_scan_iterator() { void ha_rocksdb::setup_iterator_for_rnd_scan() { uint key_size; - m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); + int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); setup_scan_iterator(*m_pk_descr, &table_key, false, - Rdb_key_def::INDEX_NUMBER_SIZE); + key_start_matching_bytes); m_scan_it->Seek(table_key); m_skip_scan_it_next_call = true; }