revision-id: e3661b9f7c60aa471aaa79e597723e897caf320c parent(s): 39fbafbcc22cd51c1cbca8a06320394e94a9cd50 committer: Sergei Petrunia branch nick: 10.2-r10 timestamp: 2018-05-07 20:21:35 +0300 message: Cherry-picked from MyRocks upstream: Issue #809: Wrong query result with bloom filters In reverse-ordered column families, if one wants to start reading at the logical end of the index, they should Seek() to a key value that is not covered by the index. This may (and typically does) prevent use of a bloom filter. The calls to setup_scan_iterator() that are made for index and table scan didn't take this into account and passed eq_cond_len=INDEX_NUMBER_SIZE. Fixed them to compute and pass correct eq_cond_len. Also, removed an incorrect assert in ha_rocksdb::setup_iterator_bounds. --- storage/rocksdb/ha_rocksdb.cc | 9 ++-- .../mysql-test/rocksdb/r/bloomfilter5.result | 62 ++++++++++++++++++++++ .../mysql-test/rocksdb/t/bloomfilter5-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/bloomfilter5.test | 61 +++++++++++++++++++++ storage/rocksdb/rdb_datadic.h | 26 +++++++-- 5 files changed, 149 insertions(+), 10 deletions(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 6e9e7ef..e7abac7 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -8526,7 +8526,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { DBUG_ASSERT(key != nullptr); const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - kd.get_first_key(key, &key_size); + int key_start_matching_bytes = kd.get_first_key(key, &key_size); rocksdb::Slice index_key((const char *)key, key_size); @@ -8537,7 +8537,7 @@ int ha_rocksdb::index_first_intern(uchar *const buf) { // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { - setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE); + setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes); m_scan_it->Seek(index_key); m_skip_scan_it_next_call = true; @@ -9539,7 +9539,6 @@ int ha_rocksdb::update_write_row(const uchar *const old_data, void ha_rocksdb::setup_iterator_bounds(const Rdb_key_def &kd, const rocksdb::Slice &eq_cond) { uint eq_cond_len = eq_cond.size(); - DBUG_ASSERT(eq_cond_len >= Rdb_key_def::INDEX_NUMBER_SIZE); memcpy(m_eq_cond_upper_bound, eq_cond.data(), eq_cond_len); kd.successor(m_eq_cond_upper_bound, eq_cond_len); memcpy(m_eq_cond_lower_bound, eq_cond.data(), eq_cond_len); @@ -9635,12 +9634,12 @@ void ha_rocksdb::release_scan_iterator() { void ha_rocksdb::setup_iterator_for_rnd_scan() { uint key_size; - m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); + int key_start_matching_bytes = m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); setup_scan_iterator(*m_pk_descr, &table_key, false, - Rdb_key_def::INDEX_NUMBER_SIZE); + key_start_matching_bytes); m_scan_it->Seek(table_key); m_skip_scan_it_next_call = true; } diff --git a/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result new file mode 100644 index 0000000..4f6702b --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result @@ -0,0 +1,62 @@ +# +# Issue #809: Wrong query result with bloom filters +# +create table t1 ( +id1 bigint not null, +id2 bigint not null, +id3 varchar(100) not null, +id4 int not null, +id5 int not null, +value bigint, +value2 varchar(100), +primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1' +) engine=ROCKSDB; +create table t2(a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t3(seq int); +insert into t3 +select +1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000 +from t2 A, t2 B, t2 C, t2 D; +insert t1 +select +(seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc" +from t3; +set global rocksdb_force_flush_memtable_now=1; +# Full table scan +explain +select * from t1 limit 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 +select * from t1 limit 10; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +1000 2000 2000 9999 9999 1000 aaabbbccc +1000 2000 2000 9998 9998 1000 aaabbbccc +1000 2000 2000 9997 9997 1000 aaabbbccc +1000 2000 2000 9996 9996 1000 aaabbbccc +1000 1999 1999 9995 9995 1000 aaabbbccc +1000 1999 1999 9994 9994 1000 aaabbbccc +1000 1999 1999 9993 9993 1000 aaabbbccc +1000 1999 1999 9992 9992 1000 aaabbbccc +1000 1999 1999 9991 9991 1000 aaabbbccc +# An index scan starting from the end of the table: +explain +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 122 NULL 1 +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +create table t4 ( +pk int unsigned not null primary key, +kp1 int unsigned not null, +kp2 int unsigned not null, +col1 int unsigned, +key(kp1, kp2) comment 'rev:bf5_2' +) engine=rocksdb; +insert into t4 values (1, 0xFFFF, 0xFFF, 12345); +# This must not fail an assert: +select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; +pk kp1 kp2 col1 +drop table t1,t2,t3,t4; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt new file mode 100644 index 0000000..7d63dc7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt @@ -0,0 +1 @@ +--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;}}; diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test new file mode 100644 index 0000000..00968aeb --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test @@ -0,0 +1,61 @@ + +--echo # +--echo # Issue #809: Wrong query result with bloom filters +--echo # + +create table t1 ( + id1 bigint not null, + id2 bigint not null, + id3 varchar(100) not null, + id4 int not null, + id5 int not null, + value bigint, + value2 varchar(100), + primary key (id1, id2, id3, id4) COMMENT 'rev:bf5_1' +) engine=ROCKSDB; + + +create table t2(a int); +insert into t2 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t3(seq int); +insert into t3 +select + 1+ A.a + B.a* 10 + C.a * 100 + D.a * 1000 +from t2 A, t2 B, t2 C, t2 D; + +insert t1 +select + (seq+9) div 10, (seq+4) div 5, (seq+4) div 5, seq, seq, 1000, "aaabbbccc" +from t3; + +set global rocksdb_force_flush_memtable_now=1; + +--echo # Full table scan +explain +select * from t1 limit 10; +select * from t1 limit 10; + +--echo # An index scan starting from the end of the table: +explain +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +select * from t1 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; + +# A testcase for an assertion that the fix is removing +# The only requirement for the used column family is that it is reverse-ordered +create table t4 ( + pk int unsigned not null primary key, + kp1 int unsigned not null, + kp2 int unsigned not null, + col1 int unsigned, + key(kp1, kp2) comment 'rev:bf5_2' +) engine=rocksdb; + +insert into t4 values (1, 0xFFFF, 0xFFF, 12345); + +--echo # This must not fail an assert: +select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; + +drop table t1,t2,t3,t4; + + diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 326570c..f97c0d0 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -238,12 +238,28 @@ class Rdb_key_def { *size = INDEX_NUMBER_SIZE; } - /* Get the first key that you need to position at to start iterating. - Returns a "supremum" or "infimum" for this index based on collation order + /* + Get the first key that you need to position at to start iterating. + + Stores into *key a "supremum" or "infimum" key value for the index. + + @return Number of bytes in the key that are usable for bloom filter use. */ - inline void get_first_key(uchar *const key, uint *const size) const { - return m_is_reverse_cf ? get_supremum_key(key, size) - : get_infimum_key(key, size); + inline int get_first_key(uchar *const key, uint *const size) const { + if (m_is_reverse_cf) + get_supremum_key(key, size); + else + get_infimum_key(key, size); + + /* Find out how many bytes of infimum are the same as m_index_number */ + uchar unmodified_key[INDEX_NUMBER_SIZE]; + rdb_netbuf_store_index(unmodified_key, m_index_number); + int i; + for (i = 0; i < INDEX_NUMBER_SIZE; i++) { + if (key[i] != unmodified_key[i]) + break; + } + return i; } /* Make a key that is right after the given key. */