- commits - lists.mariadb.org

[Commits] 76354bb2bd5: Apply patch: Merge index scan with index reads
by psergey 17 May '21

17 May '21

revision-id: 76354bb2bd5ae7776f7f5771ffa24b562b73d420 (percona-202102-46-g76354bb2bd5) parent(s): 6bb06db4fcb091dcafb0f896a962b7321078b44e author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-17 16:56:37 +0300 message: Apply patch: Merge index scan with index reads Summary: This merges index scans with index lookups. This is so that the codepaths to perform retry with snapshot refresh are consolidated into one location. Also, in `position_to_correct_key`, I save a redundant call to `pack_index_tuple` since we just packed it earlier. To do this, the packed tuple is saved in `m_sk_match_prefix_buf`. Test Plan: mtr Reviewers: luqun, herman, yzha Subscribers: pgl, vinaybhat Differential Revision: https://phabricator.intern.facebook.com/D23358423 --- mysql-test/suite/rocksdb/r/check_flags.result | 2 +- mysql-test/suite/rocksdb/t/check_flags.test | 2 +- sql/handler.cc | 5 +- sql/handler.h | 3 +- storage/rocksdb/ha_rocksdb.cc | 303 ++++++++++++-------------- storage/rocksdb/ha_rocksdb.h | 9 +- storage/rocksdb/rdb_datadic.h | 58 ----- 7 files changed, 152 insertions(+), 230 deletions(-) diff --git a/mysql-test/suite/rocksdb/r/check_flags.result b/mysql-test/suite/rocksdb/r/check_flags.result index 32369c12136..46ffdcfe6a4 100644 --- a/mysql-test/suite/rocksdb/r/check_flags.result +++ b/mysql-test/suite/rocksdb/r/check_flags.result @@ -34,7 +34,7 @@ KILL QUERY $conn1_id; set debug_sync='now SIGNAL go'; ERROR 70100: Query execution was interrupted set debug_sync='RESET'; -set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go'; +set debug_sync='rocksdb.check_flags_rke SIGNAL parked WAIT_FOR go'; SELECT kp1 FROM t3 ORDER BY kp1; set debug_sync='now WAIT_FOR parked'; KILL QUERY $conn1_id; diff --git a/mysql-test/suite/rocksdb/t/check_flags.test b/mysql-test/suite/rocksdb/t/check_flags.test index 5763872fa1d..ab02712e585 100644 --- a/mysql-test/suite/rocksdb/t/check_flags.test +++ b/mysql-test/suite/rocksdb/t/check_flags.test @@ -91,7 +91,7 @@ set debug_sync='RESET'; connection conn1; -set debug_sync='rocksdb.check_flags_ser SIGNAL parked WAIT_FOR go'; +set debug_sync='rocksdb.check_flags_rke SIGNAL parked WAIT_FOR go'; send SELECT kp1 FROM t3 ORDER BY kp1; connection default; diff --git a/sql/handler.cc b/sql/handler.cc index f6a59733a08..ffb06d1d8cb 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -7807,9 +7807,11 @@ int handler::index_read_idx_map(uchar *buf, uint index, const uchar *key, return error ? error : error1; } -uint calculate_key_len(TABLE *table, uint key, key_part_map keypart_map) { +uint calculate_key_len(TABLE *table, uint key, key_part_map keypart_map, + uint *count) { /* works only with key prefixes */ DBUG_ASSERT(((keypart_map + 1) & keypart_map) == 0); + if (count) *count = 0; KEY *key_info = table->key_info + key; KEY_PART_INFO *key_part = key_info->key_part; @@ -7820,6 +7822,7 @@ uint calculate_key_len(TABLE *table, uint key, key_part_map keypart_map) { length += key_part->store_length; keypart_map >>= 1; key_part++; + if (count) (*count)++; } return length; } diff --git a/sql/handler.h b/sql/handler.h index 24f68461368..b5242ff904a 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -3626,7 +3626,8 @@ class ha_statistics { @return Length of used key parts. */ -uint calculate_key_len(TABLE *table, uint key, key_part_map keypart_map); +uint calculate_key_len(TABLE *table, uint key, key_part_map keypart_map, + uint *count = nullptr); /* bitmap with first N+1 bits set (keypart_map for a key prefix of [0..N] keyparts) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 077b5116a5e..c9ce203fb6c 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -7282,7 +7282,6 @@ int ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, should_hide_ttl_rec( kd, iter->value(), get_or_create_tx(table->in_use)->m_snapshot_timestamp)) { - DEBUG_SYNC(thd, "rocksdb.check_flags_ser"); if (thd && thd->killed) { return HA_ERR_QUERY_INTERRUPTED; } @@ -8771,6 +8770,7 @@ int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, rocksdb_smart_seek(kd.m_is_reverse_cf, iter, key_slice); while (iter->Valid() && kd.value_matches_prefix(iter->key(), key_slice)) { + DEBUG_SYNC(thd, "rocksdb.check_flags_rke"); if (thd && thd->killed) { return HA_ERR_QUERY_INTERRUPTED; } @@ -8860,11 +8860,12 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd, return is_valid_iterator(m_scan_it) ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND; } -int ha_rocksdb::position_to_correct_key( - const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, - const bool full_key_match, const uchar *const key, - const key_part_map &keypart_map, const rocksdb::Slice &key_slice, - bool *const move_forward, const int64_t ttl_filter_ts) { +int ha_rocksdb::position_to_correct_key(const Rdb_key_def &kd, + const enum ha_rkey_function &find_flag, + const bool full_key_match, + const rocksdb::Slice &key_slice, + bool *const move_forward, + const int64_t ttl_filter_ts) { int rc = 0; *move_forward = true; @@ -8913,10 +8914,9 @@ int ha_rocksdb::position_to_correct_key( /* The record we've got is not from this index */ rc = HA_ERR_KEY_NOT_FOUND; } else if (find_flag == HA_READ_PREFIX_LAST) { - uint size = kd.pack_index_tuple(table, m_pack_buffer, - m_sk_packed_tuple, key, keypart_map); rocksdb::Slice lookup_tuple( - reinterpret_cast<char *>(m_sk_packed_tuple), size); + reinterpret_cast<char *>(m_sk_match_prefix_buf), + m_sk_match_length); // We need to compare the key we've got with the original search // prefix. @@ -8989,6 +8989,7 @@ int ha_rocksdb::read_row_from_primary_key(uchar *const buf) { m_last_rowkey.copy(pk_data, pk_size, &my_charset_bin); if (m_lock_rows != RDB_LOCK_NONE) { + DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete"); /* We need to put a lock and re-read */ rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size); } else { @@ -9042,6 +9043,7 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, m_converter->get_verify_row_debug_checksums()); inc_covered_sk_lookup(); } else { + DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete_sk"); rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size); } } @@ -9197,9 +9199,9 @@ int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) */ -int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, - key_part_map keypart_map, - enum ha_rkey_function find_flag) { +int ha_rocksdb::index_read_intern(uchar *const buf, const uchar *const key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) { DBUG_ENTER_FUNC(); check_build_decoder(); @@ -9217,92 +9219,114 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, DBUG_RETURN(rc); } - ha_statistic_increment(&System_status_var::ha_read_key_count); const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()]; - const uint actual_key_parts = kd.get_key_parts(); - bool using_full_key = is_using_full_key(keypart_map, actual_key_parts); + bool using_full_key = false; /* By default, we don't need the retrieved records to match the prefix */ m_sk_match_prefix = nullptr; /* TODO(yzha) - row stats are gone in 8.0 stats.rows_requested++; */ - if (active_index == table->s->primary_key && find_flag == HA_READ_KEY_EXACT && - using_full_key) { + uint packed_size; + + if (!key) { + // If no key is passed in, then we are doing a full index scan. + kd.get_infimum_key(m_sk_packed_tuple, &packed_size); + } else { + const uint actual_key_parts = kd.get_key_parts(); + using_full_key = is_using_full_key(keypart_map, actual_key_parts); /* - Equality lookup over primary key, using full tuple. - This is a special case, use DB::Get. + Handle some special cases when we do exact key lookups. */ - const uint size = kd.pack_index_tuple(table, m_pack_buffer, - m_pk_packed_tuple, key, keypart_map); - bool skip_lookup = is_blind_delete_enabled(); - - rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, skip_lookup, false); - - if (!rc && !skip_lookup) { - /* TODO(yzha) - row stats are gone in 8.0 - stats.rows_read++; - stats.rows_index_first++; */ - update_row_stats(ROWS_READ); - } - DBUG_RETURN(rc); - } + if (find_flag == HA_READ_KEY_EXACT && using_full_key) { + if (active_index == table->s->primary_key) { + /* + Equality lookup over primary key, using full tuple. + This is a special case, use DB::Get. + */ + const uint size = kd.pack_index_tuple( + table, m_pack_buffer, m_pk_packed_tuple, key, keypart_map); + bool skip_lookup = is_blind_delete_enabled(); + + rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, skip_lookup, false); + if (!rc && !skip_lookup) { + /* TODO(yzha) - row stats are gone in 8.0 + stats.rows_read++; + stats.rows_index_first++; */ + update_row_stats(ROWS_READ); + } + DBUG_RETURN(rc); + } else { + /* + The SQL layer sometimes sets HA_WHOLE_KEY for secondary keys lookups, + even though it may not include extended keys. - /* - Unique secondary index performs lookups without the extended key fields - */ - uint packed_size; - if (active_index != table->s->primary_key && - table->key_info[active_index].flags & HA_NOSAME && - find_flag == HA_READ_KEY_EXACT && using_full_key) { - key_part_map tmp_map = (key_part_map(1) << table->key_info[active_index] - .user_defined_key_parts) - - 1; - packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, - key, tmp_map); - if (table->key_info[active_index].user_defined_key_parts != - kd.get_key_parts()) { - using_full_key = false; - } + Adjust keypart_map so that we get the correct packing. + */ + if (keypart_map == HA_WHOLE_KEY) { + uint avail_key_parts = 0; + calculate_key_len(table, active_index, keypart_map, &avail_key_parts); + keypart_map = make_prev_keypart_map(avail_key_parts); + using_full_key = is_using_full_key(keypart_map, actual_key_parts); + } - if (m_insert_with_update && m_dup_key_found && - active_index == m_dupp_errkey) { - /* - We are in INSERT ... ON DUPLICATE KEY UPDATE, and this is a read - that SQL layer does to read the duplicate key. - Its rowid is saved in m_last_rowkey. Get the full record and return it. - */ + if (table->key_info[active_index].flags & HA_NOSAME && + m_insert_with_update && m_dup_key_found && + active_index == m_dupp_errkey) { + /* + We are in INSERT ... ON DUPLICATE KEY UPDATE, and this is a read + that SQL layer does to read the duplicate key. + Its rowid is saved in m_last_rowkey. Get the full record and + return it. + */ - DBUG_ASSERT(m_dup_key_tuple.length() >= packed_size); - DBUG_ASSERT( - memcmp(m_dup_key_tuple.ptr(), m_sk_packed_tuple, packed_size) == 0); +#ifndef DBUG_OFF + packed_size = kd.pack_index_tuple( + table, m_pack_buffer, m_sk_packed_tuple, key, keypart_map); + DBUG_ASSERT(m_dup_key_tuple.length() >= packed_size); + DBUG_ASSERT(memcmp(m_dup_key_tuple.ptr(), m_sk_packed_tuple, + packed_size) == 0); +#endif - rc = get_row_by_rowid(buf, m_last_rowkey.ptr(), m_last_rowkey.length()); - DBUG_RETURN(rc); + rc = get_row_by_rowid(buf, m_last_rowkey.ptr(), + m_last_rowkey.length()); + DBUG_RETURN(rc); + } + } } - } else { packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, key, keypart_map); } - if ((pushed_idx_cond && pushed_idx_cond_keyno == active_index) && - (find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX_LAST)) { - /* - We are doing a point index lookup, and ICP is enabled. It is possible - that this call will be followed by ha_rocksdb->index_next_same() call. + if (find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX_LAST) { + bool have_icp = pushed_idx_cond && pushed_idx_cond_keyno == active_index; + if (HA_READ_PREFIX_LAST || have_icp) { + /* + Save a copy of m_sk_packed_tuple for prefix matching, - Do what InnoDB does: save the lookup tuple now. We will need it in - index_next_same/find_icp_matching_index_rec in order to stop scanning - as soon as index record doesn't match the lookup tuple. + This is used in position_to_correct_key for the HA_READ_PREFIX_LAST + flag. + */ + m_sk_match_length = packed_size; + memcpy(m_sk_match_prefix_buf, m_sk_packed_tuple, packed_size); - When not using ICP, handler::index_next_same() will make sure that rows - that don't match the lookup prefix are not returned. - row matches the lookup prefix. - */ - m_sk_match_prefix = m_sk_match_prefix_buf; - m_sk_match_length = packed_size; - memcpy(m_sk_match_prefix, m_sk_packed_tuple, packed_size); + /* + We are doing a point index lookup, and ICP is enabled. It is possible + that this call will be followed by ha_rocksdb->index_next_same() call. + + Do what InnoDB does: save the lookup tuple now. We will need it in + index_next_same/find_icp_matching_index_rec in order to stop scanning + as soon as index record doesn't match the lookup tuple. + + When not using ICP, handler::index_next_same() will make sure that rows + that don't match the lookup prefix are not returned. + row matches the lookup prefix. + */ + if (have_icp) { + m_sk_match_prefix = m_sk_match_prefix_buf; + } + } } int bytes_changed_by_succ = 0; @@ -9318,12 +9342,6 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, const uint eq_cond_len = calc_eq_cond_len(kd, find_flag, slice, bytes_changed_by_succ, end_range); - bool use_all_keys = false; - if (find_flag == HA_READ_KEY_EXACT && - my_count_bits(keypart_map) == kd.get_key_parts()) { - use_all_keys = true; - } - Rdb_transaction *const tx = get_or_create_tx(table->in_use); const bool is_new_snapshot = !tx->has_snapshot(); // Loop as long as we get a deadlock error AND we end up creating the @@ -9338,23 +9356,22 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, This will open the iterator and position it at a record that's equal or greater than the lookup tuple. */ - setup_scan_iterator(kd, &slice, use_all_keys, eq_cond_len); + setup_scan_iterator(kd, &slice, + using_full_key && (find_flag == HA_READ_KEY_EXACT), + eq_cond_len); /* Once we are positioned on from above, move to the position we really want: See storage/rocksdb/rocksdb-range-access.txt */ bool move_forward; - rc = - position_to_correct_key(kd, find_flag, using_full_key, key, keypart_map, - slice, &move_forward, tx->m_snapshot_timestamp); + rc = position_to_correct_key(kd, find_flag, using_full_key, slice, + &move_forward, tx->m_snapshot_timestamp); if (rc) { break; } - m_skip_scan_it_next_call = false; - /* Now get the data for the row into 'buf'. If we were using a primary key then we have all the rows we need. For a secondary key we now need to @@ -9392,6 +9409,30 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, DBUG_RETURN(rc); } +/* + See storage/rocksdb/rocksdb-range-access.txt for description of how MySQL + index navigation commands are converted into RocksDB lookup commands. + + MyRocks needs to decide whether prefix bloom filter can be used or not. + To decide to use prefix bloom filter or not, calculating equal condition + length is needed. On equal lookups (find_flag == HA_READ_KEY_EXACT), equal + condition length is the same as rocksdb::Slice.size() of the start key. + On range scan, equal condition length is MIN(start_key, end_key) of the + rocksdb::Slice expression, where end_key is taken from end_range. + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) { + DBUG_ENTER_FUNC(); + ha_statistic_increment(&System_status_var::ha_read_key_count); + + DBUG_RETURN(index_read_intern(buf, key, keypart_map, find_flag)); +} + /* @brief Scan the secondary index until we find an index record that satisfies ICP @@ -9499,7 +9540,7 @@ int ha_rocksdb::check(THD *const thd MY_ATTRIBUTE((__unused__)), DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(check_opt != nullptr); - const uint pk = pk_index(table, m_tbl_def); + const uint pk = table->s->primary_key; String rowkey_copy; String sec_key_copy; const char *const table_name = table->s->table_name.str; @@ -9869,14 +9910,10 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { rc = HA_ERR_QUERY_INTERRUPTED; break; } - if (m_skip_scan_it_next_call) { - m_skip_scan_it_next_call = false; + if (move_forward) { + m_scan_it->Next(); /* this call cannot fail */ } else { - if (move_forward) { - m_scan_it->Next(); /* this call cannot fail */ - } else { - m_scan_it->Prev(); - } + m_scan_it->Prev(); } rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index_pos()], m_scan_it, !move_forward); @@ -9904,7 +9941,6 @@ int ha_rocksdb::index_first(uchar *const buf) { check_build_decoder(); - m_sk_match_prefix = nullptr; ha_statistic_increment(&System_status_var::ha_read_first_count); int rc = index_read_intern(buf, true /* first */); if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; @@ -9922,7 +9958,6 @@ int ha_rocksdb::index_last(uchar *const buf) { check_build_decoder(); - m_sk_match_prefix = nullptr; ha_statistic_increment(&System_status_var::ha_read_last_count); int rc = index_read_intern(buf, false /* first */); if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; @@ -9994,64 +10029,8 @@ int ha_rocksdb::index_last(uchar *const buf) { */ int ha_rocksdb::index_read_intern(uchar *const buf, bool first) { DBUG_ENTER_FUNC(); - - uchar *key; - uint key_size; - int rc; - - if (active_index == table->s->primary_key) { - key = m_pk_packed_tuple; - } else { - key = m_sk_packed_tuple; - } - - DBUG_ASSERT(key != nullptr); - - const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()]; - bool backwards = - (first && kd.m_is_reverse_cf) || (!first && !kd.m_is_reverse_cf); - - int key_matching_bytes; - if (backwards) { - key_matching_bytes = kd.get_last_key(key, &key_size); - } else { - key_matching_bytes = kd.get_first_key(key, &key_size); - } - - rocksdb::Slice index_key((const char *)key, key_size); - - Rdb_transaction *const tx = get_or_create_tx(table->in_use); - DBUG_ASSERT(tx != nullptr); - - const bool is_new_snapshot = !tx->has_snapshot(); - // Loop as long as we get a deadlock error AND we end up creating the - // snapshot here (i.e. it did not exist prior to this) - for (;;) { - setup_scan_iterator(kd, &index_key, false, key_matching_bytes); - rocksdb_smart_seek(backwards, m_scan_it, index_key); - m_skip_scan_it_next_call = true; - - rc = index_next_with_direction(buf, !backwards); - if (!should_recreate_snapshot(rc, is_new_snapshot)) { - break; /* exit the loop */ - } - - // release the snapshot and iterator so they will be regenerated - tx->release_snapshot(); - release_scan_iterator(); - } - - if (!rc) { - /* - index_next is always incremented on success, so decrement if it is - index_first instead - */ - /* TODO(yzha) - row stats are gone in 8.0 - stats.rows_index_first++; - stats.rows_index_next--; */ - } - - DBUG_RETURN(rc); + DBUG_RETURN(index_read_intern( + buf, nullptr, 0, first ? HA_READ_KEY_EXACT : HA_READ_PREFIX_LAST)); } void ha_rocksdb::unlock_row() { @@ -11361,14 +11340,10 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { break; } - if (m_skip_scan_it_next_call) { - m_skip_scan_it_next_call = false; + if (move_forward) { + m_scan_it->Next(); /* this call cannot fail */ } else { - if (move_forward) { - m_scan_it->Next(); /* this call cannot fail */ - } else { - m_scan_it->Prev(); /* this call cannot fail */ - } + m_scan_it->Prev(); /* this call cannot fail */ } if (!is_valid_iterator(m_scan_it)) { diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index ba62521fa79..b5f5ad23d90 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -280,8 +280,6 @@ class ha_rocksdb : public my_core::handler { /* We only iterate but don't need to decode anything */ bool m_iteration_only; - bool m_skip_scan_it_next_call; - bool m_rnd_scan_started; /* @@ -795,6 +793,10 @@ class ha_rocksdb : public my_core::handler { rocksdb::Iterator *const iter, bool seek_backward); + int index_read_intern(uchar *const buf, const uchar *const key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) + MY_ATTRIBUTE((__warn_unused_result__)); int index_read_intern(uchar *buf, bool first) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -854,8 +856,7 @@ class ha_rocksdb : public my_core::handler { MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); int position_to_correct_key(const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, - const bool full_key_match, const uchar *const key, - const key_part_map &keypart_map, + const bool full_key_match, const rocksdb::Slice &key_slice, bool *const move_forward, const int64_t ttl_filter_ts) diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 7b31356c7f2..f6917836f23 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -299,64 +299,6 @@ class Rdb_key_def { *size = INDEX_ID_SIZE; } - /* - Get the first key that you need to position at to start iterating. - Stores into *key a "supremum" or "infimum" key value for the index. - @parameters key OUT Big Endian, value is m_index_id or - m_index_id (index_num + 1) - @parameters size OUT key size, value is INDEX_ID_SIZE - @return Number of bytes in the key that are usable for bloom filter use. - */ - inline int get_first_key(uchar *const key, uint *const size) const { - if (m_is_reverse_cf) { - get_supremum_key(key, size); - /* Find out how many bytes of infimum are the same as m_index_id */ - uchar unmodified_key[INDEX_ID_SIZE]; - rdb_netbuf_store_index_id(unmodified_key, m_index_id); - int i; - for (i = 0; i < INDEX_ID_SIZE; i++) { - if (key[i] != unmodified_key[i]) { - break; - } - } - return i; - } else { - get_infimum_key(key, size); - // For infimum key, its value will be m_index_id - // Thus return its own size instead. - return INDEX_ID_SIZE; - } - } - - /* - The same as get_first_key, but get the key for the last entry in the index - @parameters key OUT Big Endian, value is m_index_id or - m_index_id (index_num + 1) - @parameters size OUT key size, value is INDEX_NUMBER_SIZE - - @return Number of bytes in the key that are usable for bloom filter use. - */ - inline int get_last_key(uchar *const key, uint *const size) const { - if (m_is_reverse_cf) { - get_infimum_key(key, size); - // For infimum key, its value will be m_index_id - // Thus return its own size instead. - return INDEX_ID_SIZE; - } else { - get_supremum_key(key, size); - /* Find out how many bytes are the same as m_index_id */ - uchar unmodified_key[INDEX_ID_SIZE]; - rdb_netbuf_store_index_id(unmodified_key, m_index_id); - int i; - for (i = 0; i < INDEX_ID_SIZE; i++) { - if (key[i] != unmodified_key[i]) { - break; - } - } - return i; - } - } - /* Make a key that is right after the given key. */ static int successor(uchar *const packed_tuple, const uint len);

1 0

[Commits] 6bb06db4fcb: Apply patch: Consolidate rnd_init/rnd_next/rnd_end into index_*
by psergey 17 May '21

17 May '21

revision-id: 6bb06db4fcb091dcafb0f896a962b7321078b44e (percona-202102-45-g6bb06db4fcb) parent(s): dea55321f0f03df1a29130aa6a2d38a0f9b6e39e author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-17 16:48:30 +0300 message: Apply patch: Consolidate rnd_init/rnd_next/rnd_end into index_* Summary: Instead of maintaining separate code paths for full table scans, merge them into full index scan code paths. Full table scans are just full index scans on the primary key anyway. Test Plan: mtr Reviewers: luqun, herman, yzha Subscribers: pgl, vinaybhat Differential Revision: https://phabricator.intern.facebook.com/D23358422 --- storage/rocksdb/ha_rocksdb.cc | 157 +++++++++++++++++++----------------------- storage/rocksdb/ha_rocksdb.h | 11 ++- 2 files changed, 78 insertions(+), 90 deletions(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 7f7c6a79a2e..077b5116a5e 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -7009,7 +7009,7 @@ void ha_rocksdb::update_auto_incr_val_from_field() { int ha_rocksdb::load_hidden_pk_value() { const int save_active_index = active_index; - active_index = m_tbl_def->m_key_count - 1; + active_index = MAX_KEY; const uint8 save_table_status = table->m_status; Rdb_transaction *const tx = get_or_create_tx(table->in_use); @@ -9218,7 +9218,7 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, } ha_statistic_increment(&System_status_var::ha_read_key_count); - const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()]; const uint actual_key_parts = kd.get_key_parts(); bool using_full_key = is_using_full_key(keypart_map, actual_key_parts); @@ -9415,7 +9415,7 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, int ha_rocksdb::find_icp_matching_index_rec(const bool move_forward, uchar *const buf) { if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) { - const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()]; THD *thd = ha_thd(); while (1) { @@ -9835,19 +9835,10 @@ int ha_rocksdb::records_from_index(ha_rows *num_rows, uint index) { */ int ha_rocksdb::index_next(uchar *const buf) { DBUG_ENTER_FUNC(); - check_build_decoder(); - bool moves_forward = true; ha_statistic_increment(&System_status_var::ha_read_next_count); - if (m_key_descr_arr[active_index]->m_is_reverse_cf) { - moves_forward = false; - } - - int rc = index_next_with_direction(buf, moves_forward); - if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - - DBUG_RETURN(rc); + DBUG_RETURN(index_next_intern(buf)); } /** @@ -9857,19 +9848,10 @@ int ha_rocksdb::index_next(uchar *const buf) { */ int ha_rocksdb::index_prev(uchar *const buf) { DBUG_ENTER_FUNC(); - check_build_decoder(); - bool moves_forward = false; ha_statistic_increment(&System_status_var::ha_read_prev_count); - if (m_key_descr_arr[active_index]->m_is_reverse_cf) { - moves_forward = true; - } - - int rc = index_next_with_direction(buf, moves_forward); - if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - - DBUG_RETURN(rc); + DBUG_RETURN(index_prev_intern(buf)); } int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { @@ -9877,7 +9859,7 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { int rc; - if (active_index == pk_index(table, m_tbl_def)) { + if (active_index == table->s->primary_key) { rc = rnd_next_with_direction(buf, move_forward); } else { THD *thd = ha_thd(); @@ -9896,7 +9878,7 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { m_scan_it->Prev(); } } - rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index], + rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index_pos()], m_scan_it, !move_forward); if (rc != HA_EXIT_SUCCESS) { break; @@ -10017,7 +9999,7 @@ int ha_rocksdb::index_read_intern(uchar *const buf, bool first) { uint key_size; int rc; - if (is_pk(active_index, table, m_tbl_def)) { + if (active_index == table->s->primary_key) { key = m_pk_packed_tuple; } else { key = m_sk_packed_tuple; @@ -10025,7 +10007,7 @@ int ha_rocksdb::index_read_intern(uchar *const buf, bool first) { DBUG_ASSERT(key != nullptr); - const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()]; bool backwards = (first && kd.m_is_reverse_cf) || (!first && !kd.m_is_reverse_cf); @@ -10166,6 +10148,11 @@ uint ha_rocksdb::pk_index(const TABLE *const table_arg, : table_arg->s->primary_key; } +/* Returns the index into m_key_descr_arr array based on active_index */ +uint ha_rocksdb::active_index_pos() { + return active_index == MAX_KEY ? m_tbl_def->m_key_count - 1 : active_index; +} + /* Returns true if given index number is a primary key */ bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg) { @@ -11292,50 +11279,21 @@ void ha_rocksdb::release_scan_iterator() { } } -void ha_rocksdb::setup_iterator_for_rnd_scan() { - uint key_size; - - int key_start_matching_bytes = - m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); - - rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); - - setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes); - m_scan_it->Seek(table_key); - m_skip_scan_it_next_call = true; -} - /** @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) */ -int ha_rocksdb::rnd_init(bool scan) { +int ha_rocksdb::rnd_init(bool) { DBUG_ENTER_FUNC(); m_need_build_decoder = true; active_index = table->s->primary_key; - THD *thd = ha_thd(); - if (thd && thd->killed) { - DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); - } - - Rdb_transaction *const tx = get_or_create_tx(table->in_use); - - if (scan) { - m_rnd_scan_is_new_snapshot = !tx->has_snapshot(); - setup_iterator_for_rnd_scan(); - } else { - /* We don't need any preparations for rnd_pos() calls. */ - } - - // If m_lock_rows is on then we will be doing a get_for_update when accessing - // the index, so don't acquire the snapshot right away. Otherwise acquire - // the snapshot immediately. - tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE); - - DBUG_RETURN(HA_EXIT_SUCCESS); + m_rnd_scan_started = false; + DBUG_RETURN( + index_init(has_hidden_pk(table) ? MAX_KEY : pk_index(table, m_tbl_def), + false /* sorted */)); } /** @@ -11350,20 +11308,22 @@ int ha_rocksdb::rnd_next(uchar *const buf) { int rc; ha_statistic_increment(&System_status_var::ha_read_rnd_next_count); - for (;;) { - rc = rnd_next_with_direction(buf, true); - if (!should_recreate_snapshot(rc, m_rnd_scan_is_new_snapshot)) { - break; /* exit the loop */ + + /* + Since order does not matter, the scan will occur go with natural index + order. + */ + bool is_reverse_cf = m_key_descr_arr[active_index_pos()]->m_is_reverse_cf; + if (!m_rnd_scan_started) { + rc = index_read_intern(buf, !is_reverse_cf /* first */); + m_rnd_scan_started = true; + } else { + if (is_reverse_cf) { + rc = index_prev_intern(buf); + } else { + rc = index_next_intern(buf); } - // release the snapshot and iterator and then regenerate them - Rdb_transaction *tx = get_or_create_tx(table->in_use); - tx->release_snapshot(); - release_scan_iterator(); - setup_iterator_for_rnd_scan(); } - - m_rnd_scan_is_new_snapshot = false; - if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); @@ -11492,12 +11452,9 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { int ha_rocksdb::rnd_end() { DBUG_ENTER_FUNC(); - + DBUG_RETURN(index_end()); m_need_build_decoder = false; - release_scan_iterator(); - - DBUG_RETURN(HA_EXIT_SUCCESS); } void ha_rocksdb::build_decoder() { @@ -11561,6 +11518,34 @@ int ha_rocksdb::index_end() { DBUG_RETURN(HA_EXIT_SUCCESS); } +int ha_rocksdb::index_next_intern(uchar *const buf) { + DBUG_ENTER_FUNC(); + + bool moves_forward = true; + if (m_key_descr_arr[active_index_pos()]->m_is_reverse_cf) { + moves_forward = false; + } + + int rc = index_next_with_direction(buf, moves_forward); + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + +int ha_rocksdb::index_prev_intern(uchar *const buf) { + DBUG_ENTER_FUNC(); + + bool moves_forward = false; + if (m_key_descr_arr[active_index_pos()]->m_is_reverse_cf) { + moves_forward = true; + } + + int rc = index_next_with_direction(buf, moves_forward); + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + /** Called by the partition manager for truncating tables. @@ -14025,23 +14010,21 @@ int ha_rocksdb::inplace_populate_sk( } /* - Note: We pass in the currently existing table + tbl_def object here, + Note: We use the currently existing table + tbl_def object here, as the pk index position may have changed in the case of hidden primary keys. */ - const uint pk = pk_index(table, m_tbl_def); - res = ha_index_init(pk, true); - if (res) DBUG_RETURN(res); + ha_rnd_init(true /* scan */); /* Scan each record in the primary key in order */ - for (res = index_first(table->record[0]); res == 0; - res = index_next(table->record[0])) { + for (res = ha_rnd_next(table->record[0]); res == 0; + res = ha_rnd_next(table->record[0])) { longlong hidden_pk_id = 0; if (hidden_pk_exists && (res = read_hidden_pk_id_from_rowkey(&hidden_pk_id))) { // NO_LINT_DEBUG sql_print_error("Error retrieving hidden pk id."); - ha_index_end(); + ha_rnd_end(); DBUG_RETURN(res); } @@ -14062,7 +14045,7 @@ int ha_rocksdb::inplace_populate_sk( disk in sorted chunks. */ if ((res = rdb_merge.add(key, val))) { - ha_index_end(); + ha_rnd_end(); DBUG_RETURN(res); } } @@ -14070,11 +14053,11 @@ int ha_rocksdb::inplace_populate_sk( if (res != HA_ERR_END_OF_FILE) { // NO_LINT_DEBUG sql_print_error("Error retrieving index entry from primary key."); - ha_index_end(); + ha_rnd_end(); DBUG_RETURN(res); } - ha_index_end(); + ha_rnd_end(); /* Perform an n-way merge of n sorted buffers on disk, then writes all diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 18c40fc1ca9..ba62521fa79 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -282,8 +282,7 @@ class ha_rocksdb : public my_core::handler { bool m_skip_scan_it_next_call; - /* true means we are accessing the first row after a snapshot was created */ - bool m_rnd_scan_is_new_snapshot; + bool m_rnd_scan_started; /* true means INSERT ON DUPLICATE KEY UPDATE. In such case we can optimize by @@ -331,7 +330,6 @@ class ha_rocksdb : public my_core::handler { MY_ATTRIBUTE((__nonnull__(2, 3), __warn_unused_result__)); int secondary_index_read(const int keyno, uchar *const buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - void setup_iterator_for_rnd_scan(); static void setup_iterator_bounds(const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len, uchar *const lower_bound, @@ -539,6 +537,8 @@ class ha_rocksdb : public my_core::handler { const Rdb_tbl_def *const tbl_def_arg) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + uint active_index_pos() MY_ATTRIBUTE((__warn_unused_result__)); + static bool is_pk(const uint index, const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -644,6 +644,11 @@ class ha_rocksdb : public my_core::handler { int index_last(uchar *const buf) override MY_ATTRIBUTE((__warn_unused_result__)); + int index_next_intern(uchar *const buf) + MY_ATTRIBUTE((__warn_unused_result__)); + int index_prev_intern(uchar *const buf) + MY_ATTRIBUTE((__warn_unused_result__)); + class Item *idx_cond_push(uint keyno, class Item *const idx_cond) override; /* Default implementation from cancel_pushed_idx_cond() suits us

1 0

[Commits] 31a395578da: Apply patch: Consolidate rnd_init/rnd_next/rnd_end into index_*
by psergey 17 May '21

17 May '21

revision-id: 31a395578da893e201ca58a4e20cd43596c9c295 (percona-202102-45-g31a395578da) parent(s): dea55321f0f03df1a29130aa6a2d38a0f9b6e39e author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-17 16:40:35 +0300 message: Apply patch: Consolidate rnd_init/rnd_next/rnd_end into index_* Summary: Instead of maintaining separate code paths for full table scans, merge them into full index scan code paths. Full table scans are just full index scans on the primary key anyway. Test Plan: mtr Reviewers: luqun, herman, yzha Subscribers: pgl, vinaybhat Differential Revision: https://phabricator.intern.facebook.com/D23358422 --- storage/rocksdb/ha_rocksdb.cc | 153 +++++++++++++++++++----------------------- storage/rocksdb/ha_rocksdb.h | 11 ++- 2 files changed, 77 insertions(+), 87 deletions(-) diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 7f7c6a79a2e..a5afc625139 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -7009,7 +7009,7 @@ void ha_rocksdb::update_auto_incr_val_from_field() { int ha_rocksdb::load_hidden_pk_value() { const int save_active_index = active_index; - active_index = m_tbl_def->m_key_count - 1; + active_index = MAX_KEY; const uint8 save_table_status = table->m_status; Rdb_transaction *const tx = get_or_create_tx(table->in_use); @@ -9218,7 +9218,7 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, } ha_statistic_increment(&System_status_var::ha_read_key_count); - const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()]; const uint actual_key_parts = kd.get_key_parts(); bool using_full_key = is_using_full_key(keypart_map, actual_key_parts); @@ -9415,7 +9415,7 @@ int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, int ha_rocksdb::find_icp_matching_index_rec(const bool move_forward, uchar *const buf) { if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) { - const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()]; THD *thd = ha_thd(); while (1) { @@ -9835,19 +9835,10 @@ int ha_rocksdb::records_from_index(ha_rows *num_rows, uint index) { */ int ha_rocksdb::index_next(uchar *const buf) { DBUG_ENTER_FUNC(); - check_build_decoder(); - bool moves_forward = true; ha_statistic_increment(&System_status_var::ha_read_next_count); - if (m_key_descr_arr[active_index]->m_is_reverse_cf) { - moves_forward = false; - } - - int rc = index_next_with_direction(buf, moves_forward); - if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - - DBUG_RETURN(rc); + DBUG_RETURN(index_next_intern(buf)); } /** @@ -9857,19 +9848,10 @@ int ha_rocksdb::index_next(uchar *const buf) { */ int ha_rocksdb::index_prev(uchar *const buf) { DBUG_ENTER_FUNC(); - check_build_decoder(); - bool moves_forward = false; ha_statistic_increment(&System_status_var::ha_read_prev_count); - if (m_key_descr_arr[active_index]->m_is_reverse_cf) { - moves_forward = true; - } - - int rc = index_next_with_direction(buf, moves_forward); - if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; - - DBUG_RETURN(rc); + DBUG_RETURN(index_prev_intern(buf)); } int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { @@ -9877,7 +9859,7 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { int rc; - if (active_index == pk_index(table, m_tbl_def)) { + if (active_index == table->s->primary_key) { rc = rnd_next_with_direction(buf, move_forward); } else { THD *thd = ha_thd(); @@ -9896,7 +9878,7 @@ int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { m_scan_it->Prev(); } } - rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index], + rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index_pos()], m_scan_it, !move_forward); if (rc != HA_EXIT_SUCCESS) { break; @@ -10017,7 +9999,7 @@ int ha_rocksdb::index_read_intern(uchar *const buf, bool first) { uint key_size; int rc; - if (is_pk(active_index, table, m_tbl_def)) { + if (active_index == table->s->primary_key) { key = m_pk_packed_tuple; } else { key = m_sk_packed_tuple; @@ -10025,7 +10007,7 @@ int ha_rocksdb::index_read_intern(uchar *const buf, bool first) { DBUG_ASSERT(key != nullptr); - const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + const Rdb_key_def &kd = *m_key_descr_arr[active_index_pos()]; bool backwards = (first && kd.m_is_reverse_cf) || (!first && !kd.m_is_reverse_cf); @@ -10166,6 +10148,11 @@ uint ha_rocksdb::pk_index(const TABLE *const table_arg, : table_arg->s->primary_key; } +/* Returns the index into m_key_descr_arr array based on active_index */ +uint ha_rocksdb::active_index_pos() { + return active_index == MAX_KEY ? m_tbl_def->m_key_count - 1 : active_index; +} + /* Returns true if given index number is a primary key */ bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg) { @@ -11292,50 +11279,21 @@ void ha_rocksdb::release_scan_iterator() { } } -void ha_rocksdb::setup_iterator_for_rnd_scan() { - uint key_size; - - int key_start_matching_bytes = - m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); - - rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); - - setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes); - m_scan_it->Seek(table_key); - m_skip_scan_it_next_call = true; -} - /** @return HA_EXIT_SUCCESS OK other HA_ERR error code (can be SE-specific) */ -int ha_rocksdb::rnd_init(bool scan) { +int ha_rocksdb::rnd_init(bool) { DBUG_ENTER_FUNC(); m_need_build_decoder = true; active_index = table->s->primary_key; - THD *thd = ha_thd(); - if (thd && thd->killed) { - DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); - } - - Rdb_transaction *const tx = get_or_create_tx(table->in_use); - - if (scan) { - m_rnd_scan_is_new_snapshot = !tx->has_snapshot(); - setup_iterator_for_rnd_scan(); - } else { - /* We don't need any preparations for rnd_pos() calls. */ - } - - // If m_lock_rows is on then we will be doing a get_for_update when accessing - // the index, so don't acquire the snapshot right away. Otherwise acquire - // the snapshot immediately. - tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE); - - DBUG_RETURN(HA_EXIT_SUCCESS); + m_rnd_scan_started = false; + DBUG_RETURN( + index_init(has_hidden_pk(table) ? MAX_KEY : pk_index(table, m_tbl_def), + false /* sorted */)); } /** @@ -11350,20 +11308,22 @@ int ha_rocksdb::rnd_next(uchar *const buf) { int rc; ha_statistic_increment(&System_status_var::ha_read_rnd_next_count); - for (;;) { - rc = rnd_next_with_direction(buf, true); - if (!should_recreate_snapshot(rc, m_rnd_scan_is_new_snapshot)) { - break; /* exit the loop */ + + /* + Since order does not matter, the scan will occur go with natural index + order. + */ + bool is_reverse_cf = m_key_descr_arr[active_index_pos()]->m_is_reverse_cf; + if (!m_rnd_scan_started) { + rc = index_read_intern(buf, !is_reverse_cf /* first */); + m_rnd_scan_started = true; + } else { + if (is_reverse_cf) { + rc = index_prev_intern(buf); + } else { + rc = index_next_intern(buf); } - // release the snapshot and iterator and then regenerate them - Rdb_transaction *tx = get_or_create_tx(table->in_use); - tx->release_snapshot(); - release_scan_iterator(); - setup_iterator_for_rnd_scan(); } - - m_rnd_scan_is_new_snapshot = false; - if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; DBUG_RETURN(rc); @@ -11492,12 +11452,9 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { int ha_rocksdb::rnd_end() { DBUG_ENTER_FUNC(); - + DBUG_RETURN(index_end()); m_need_build_decoder = false; - release_scan_iterator(); - - DBUG_RETURN(HA_EXIT_SUCCESS); } void ha_rocksdb::build_decoder() { @@ -11561,6 +11518,34 @@ int ha_rocksdb::index_end() { DBUG_RETURN(HA_EXIT_SUCCESS); } +int ha_rocksdb::index_next_intern(uchar *const buf) { + DBUG_ENTER_FUNC(); + + bool moves_forward = true; + if (m_key_descr_arr[active_index_pos()]->m_is_reverse_cf) { + moves_forward = false; + } + + int rc = index_next_with_direction(buf, moves_forward); + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + +int ha_rocksdb::index_prev_intern(uchar *const buf) { + DBUG_ENTER_FUNC(); + + bool moves_forward = false; + if (m_key_descr_arr[active_index_pos()]->m_is_reverse_cf) { + moves_forward = true; + } + + int rc = index_next_with_direction(buf, moves_forward); + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + /** Called by the partition manager for truncating tables. @@ -14025,7 +14010,7 @@ int ha_rocksdb::inplace_populate_sk( } /* - Note: We pass in the currently existing table + tbl_def object here, + Note: We use the currently existing table + tbl_def object here, as the pk index position may have changed in the case of hidden primary keys. */ @@ -14034,14 +14019,14 @@ int ha_rocksdb::inplace_populate_sk( if (res) DBUG_RETURN(res); /* Scan each record in the primary key in order */ - for (res = index_first(table->record[0]); res == 0; - res = index_next(table->record[0])) { + for (res = ha_rnd_next(table->record[0]); res == 0; + res = ha_rnd_next(table->record[0])) { longlong hidden_pk_id = 0; if (hidden_pk_exists && (res = read_hidden_pk_id_from_rowkey(&hidden_pk_id))) { // NO_LINT_DEBUG sql_print_error("Error retrieving hidden pk id."); - ha_index_end(); + ha_rnd_end(); DBUG_RETURN(res); } @@ -14062,7 +14047,7 @@ int ha_rocksdb::inplace_populate_sk( disk in sorted chunks. */ if ((res = rdb_merge.add(key, val))) { - ha_index_end(); + ha_rnd_end(); DBUG_RETURN(res); } } @@ -14070,11 +14055,11 @@ int ha_rocksdb::inplace_populate_sk( if (res != HA_ERR_END_OF_FILE) { // NO_LINT_DEBUG sql_print_error("Error retrieving index entry from primary key."); - ha_index_end(); + ha_rnd_end(); DBUG_RETURN(res); } - ha_index_end(); + ha_rnd_end(); /* Perform an n-way merge of n sorted buffers on disk, then writes all diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 18c40fc1ca9..ba62521fa79 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -282,8 +282,7 @@ class ha_rocksdb : public my_core::handler { bool m_skip_scan_it_next_call; - /* true means we are accessing the first row after a snapshot was created */ - bool m_rnd_scan_is_new_snapshot; + bool m_rnd_scan_started; /* true means INSERT ON DUPLICATE KEY UPDATE. In such case we can optimize by @@ -331,7 +330,6 @@ class ha_rocksdb : public my_core::handler { MY_ATTRIBUTE((__nonnull__(2, 3), __warn_unused_result__)); int secondary_index_read(const int keyno, uchar *const buf) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); - void setup_iterator_for_rnd_scan(); static void setup_iterator_bounds(const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len, uchar *const lower_bound, @@ -539,6 +537,8 @@ class ha_rocksdb : public my_core::handler { const Rdb_tbl_def *const tbl_def_arg) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + uint active_index_pos() MY_ATTRIBUTE((__warn_unused_result__)); + static bool is_pk(const uint index, const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg) MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); @@ -644,6 +644,11 @@ class ha_rocksdb : public my_core::handler { int index_last(uchar *const buf) override MY_ATTRIBUTE((__warn_unused_result__)); + int index_next_intern(uchar *const buf) + MY_ATTRIBUTE((__warn_unused_result__)); + int index_prev_intern(uchar *const buf) + MY_ATTRIBUTE((__warn_unused_result__)); + class Item *idx_cond_push(uint keyno, class Item *const idx_cond) override; /* Default implementation from cancel_pushed_idx_cond() suits us

1 0

[Commits] 9317dc96f03: MDEV-25631: Crash in st_select_lex::mark_as_dependent with VIEW, aggregate and subquery
by psergey 16 May '21

16 May '21

revision-id: 9317dc96f03f558dc20d41f7dcf003e6ebf6a2b4 (mariadb-10.6.0-52-g9317dc96f03) parent(s): bee1bb056dd5350c967dda65efb75e3a171e649a author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-16 12:30:40 +0300 message: MDEV-25631: Crash in st_select_lex::mark_as_dependent with VIEW, aggregate and subquery Name resolution code has checks like this one: if (thd->lex->in_sum_func && thd->lex->in_sum_func->nest_level >= select->nest_level) ... This fails to take into account the fact SELECT_LEX::nest_level is local to each VIEW. Adjust the check so that it only succeeds when the select and the aggregate function being considered are from the same VIEW (or both from the top-level query): add this: + thd->lex->in_sum_func->nest_level_base == select->nest_level_base && Note: this patch only modifies one such check. There are many, should they all be adjusted in the same way? --- mysql-test/main/subselect4.result | 11 +++++++++++ mysql-test/main/subselect4.test | 15 +++++++++++++++ sql/item.cc | 3 +++ sql/item_sum.cc | 1 + sql/item_sum.h | 1 + 5 files changed, 31 insertions(+) diff --git a/mysql-test/main/subselect4.result b/mysql-test/main/subselect4.result index 3bfb755120b..182235c039b 100644 --- a/mysql-test/main/subselect4.result +++ b/mysql-test/main/subselect4.result @@ -2898,3 +2898,14 @@ id select_type table type possible_keys key key_len ref rows Extra 2 MATERIALIZED t2 ALL NULL NULL NULL NULL 100 drop table t0, t1, t2; # End of 10.4 tests +# +# MDEV-25631: Crash in st_select_lex::mark_as_dependent with VIEW, aggregate and subquery +# +CREATE TABLE t1 (i1 int); +insert into t1 values (1),(2),(3); +CREATE VIEW v1 AS +SELECT t1.i1 FROM (t1 a JOIN t1 ON (t1.i1 = (SELECT t1.i1 FROM t1 b))); +SELECT 1 FROM (SELECT count(((SELECT i1 FROM v1))) FROM v1) dt ; +ERROR 21000: Subquery returns more than 1 row +DROP VIEW v1; +DROP TABLE t1; diff --git a/mysql-test/main/subselect4.test b/mysql-test/main/subselect4.test index a1a4108de37..bd18ec5f5c9 100644 --- a/mysql-test/main/subselect4.test +++ b/mysql-test/main/subselect4.test @@ -2398,3 +2398,18 @@ select * from t1 where t1.a in (select t2.a from t2 order by t2.b); drop table t0, t1, t2; --echo # End of 10.4 tests + +--echo # +--echo # MDEV-25631: Crash in st_select_lex::mark_as_dependent with VIEW, aggregate and subquery +--echo # + +CREATE TABLE t1 (i1 int); +insert into t1 values (1),(2),(3); #not important +CREATE VIEW v1 AS +SELECT t1.i1 FROM (t1 a JOIN t1 ON (t1.i1 = (SELECT t1.i1 FROM t1 b))); + +--error ER_SUBQUERY_NO_1_ROW +SELECT 1 FROM (SELECT count(((SELECT i1 FROM v1))) FROM v1) dt ; + +DROP VIEW v1; +DROP TABLE t1; diff --git a/sql/item.cc b/sql/item.cc index 5cdbf52e829..a4c16c53e5e 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -5608,9 +5608,12 @@ Item_field::fix_outer_field(THD *thd, Field **from_field, Item **reference) max_arg_level for the function if it's needed. */ if (thd->lex->in_sum_func && + thd->lex->in_sum_func->nest_level_base == select->nest_level_base && thd->lex->in_sum_func->nest_level >= select->nest_level) { Item::Type ref_type= (*reference)->type(); + // psergey-todo: check if in_sum_func "has" the same + // nest_level_base as we do.. set_if_bigger(thd->lex->in_sum_func->max_arg_level, select->nest_level); set_field(*from_field); diff --git a/sql/item_sum.cc b/sql/item_sum.cc index 537eaaf8dcd..23b6f739333 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -92,6 +92,7 @@ bool Item_sum::init_sum_func_check(THD *thd) /* Save a pointer to object to be used in items for nested set functions */ thd->lex->in_sum_func= this; nest_level= thd->lex->current_select->nest_level; + nest_level_base= thd->lex->current_select->nest_level_base; ref_by= 0; aggr_level= -1; aggr_sel= NULL; diff --git a/sql/item_sum.h b/sql/item_sum.h index 118f78ec5c1..10aa658c5e2 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -364,6 +364,7 @@ class Item_sum :public Item_func_or_sum Item_sum *in_sum_func; /* embedding set function if any */ st_select_lex * aggr_sel; /* select where the function is aggregated */ int8 nest_level; /* number of the nesting level of the set function */ + st_select_lex_unit *nest_level_base; int8 aggr_level; /* nesting level of the aggregating subquery */ int8 max_arg_level; /* max level of unbound column references */ int8 max_sum_func_level;/* max level of aggregation for embedded functions */

1 0

[Commits] 677f1ef: MDEV-25682 Explain shows an execution plan different from actually executed
by IgorBabaev 14 May '21

14 May '21

revision-id: 677f1ef6f00793b3ad2a42b4e6f0fcbb7cd0e39d (mariadb-10.2.31-950-g677f1ef) parent(s): e607f3398c69147299884d3814cf063d2e7516ce author: Igor Babaev committer: Igor Babaev timestamp: 2021-05-14 16:43:36 -0700 message: MDEV-25682 Explain shows an execution plan different from actually executed If a select query contained an ORDER BY clause that followed a LIMIT clause or an ORDER BY clause or ORDER BY with LIMIT the EXPLAIN output for the query showed an execution plan different from that was actually executed. Approved by Roman Nozdrin <roman.nozdrin(a)mariadb.com> --- mysql-test/r/order_by.result | 25 +++++++++++++++++++++++++ mysql-test/t/order_by.test | 16 ++++++++++++++++ sql/sql_select.cc | 2 +- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result index b144101..39b4e25 100644 --- a/mysql-test/r/order_by.result +++ b/mysql-test/r/order_by.result @@ -3460,4 +3460,29 @@ SET max_length_for_sort_data=@save_max_length_for_sort_data; SET max_sort_length= @save_max_sort_length; SET sql_select_limit= @save_sql_select_limit; DROP TABLE t1; +# +# MDEV-25682: EXPLAIN for SELECT with ORDER BY after [ORDER BY] LIMIT +# +create table t1 (a int); +insert into t1 values (3), (7), (1); +explain (select a from t1 limit 2) order by a desc; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 +NULL UNION RESULT <union1> ALL NULL NULL NULL NULL NULL Using filesort +(select a from t1 limit 2) order by a desc; +a +7 +3 +create table t2 (a int, b int); +insert into t2 values (3,70), (7,10), (1,40), (4,30); +explain (select b,a from t2 order by a limit 3) order by b desc; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 4 Using filesort +NULL UNION RESULT <union1> ALL NULL NULL NULL NULL NULL Using filesort +(select b,a from t2 order by a limit 3) order by b desc; +b a +70 3 +40 1 +30 4 +drop table t1,t2; # End of 10.2 tests diff --git a/mysql-test/t/order_by.test b/mysql-test/t/order_by.test index 36c25ed..4e50fc5 100644 --- a/mysql-test/t/order_by.test +++ b/mysql-test/t/order_by.test @@ -2293,4 +2293,20 @@ SET max_sort_length= @save_max_sort_length; SET sql_select_limit= @save_sql_select_limit; DROP TABLE t1; +--echo # +--echo # MDEV-25682: EXPLAIN for SELECT with ORDER BY after [ORDER BY] LIMIT +--echo # + +create table t1 (a int); +insert into t1 values (3), (7), (1); +explain (select a from t1 limit 2) order by a desc; +(select a from t1 limit 2) order by a desc; + +create table t2 (a int, b int); +insert into t2 values (3,70), (7,10), (1,40), (4,30); +explain (select b,a from t2 order by a limit 3) order by b desc; +(select b,a from t2 order by a limit 3) order by b desc; + +drop table t1,t2; + --echo # End of 10.2 tests diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b85bd31..ce70620 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -25332,7 +25332,7 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result) sl->options|= SELECT_DESCRIBE; } - if (unit->is_union()) + if (unit->is_union() || unit->fake_select_lex) { if (unit->union_needs_tmp_table() && unit->fake_select_lex) {

1 0

[Commits] 77dfdf839c6: MDEV-25629: Crash in get_sort_by_table() in subquery with order by having outer ref
by psergey 14 May '21

14 May '21

revision-id: 77dfdf839c61d84e913710da7b3647cd02e4ab58 (mariadb-10.2.31-950-g77dfdf839c6) parent(s): e607f3398c69147299884d3814cf063d2e7516ce author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-14 20:43:21 +0300 message: MDEV-25629: Crash in get_sort_by_table() in subquery with order by having outer ref In Item_field::fix_fields(): when the item was resolved to an Item_field in the SELECT's select_list, copy the Item_field's "depended_from" field. Failure to do so caused the item to have incorrect attributes: it pointed to a Field in an upper select but used_tables() didn't return OUTER_REF_TABLE_BIT. --- mysql-test/r/subselect4.result | 10 ++++++++++ mysql-test/t/subselect4.test | 14 ++++++++++++++ sql/item.cc | 1 + 3 files changed, 25 insertions(+) diff --git a/mysql-test/r/subselect4.result b/mysql-test/r/subselect4.result index 4021f717964..2a691799be5 100644 --- a/mysql-test/r/subselect4.result +++ b/mysql-test/r/subselect4.result @@ -2783,3 +2783,13 @@ INSERT INTO t2 VALUES (3),(4); SELECT 1 IN (SELECT (SELECT a FROM t1) AS x FROM t2 GROUP BY x); ERROR 21000: Subquery returns more than 1 row drop table t1,t2; +# +# MDEV-25629: Crash in get_sort_by_table() in subquery with order by having outer ref +# +CREATE TABLE t1 (i1 int); +insert into t1 values (1),(2); +SELECT 1 +FROM (t1 JOIN t1 AS ref_t1 ON +(t1.i1 > (SELECT ref_t1.i1 AS c0 FROM t1 b ORDER BY -c0))); +ERROR 21000: Subquery returns more than 1 row +DROP TABLE t1; diff --git a/mysql-test/t/subselect4.test b/mysql-test/t/subselect4.test index e218e3aab18..58aa7868815 100644 --- a/mysql-test/t/subselect4.test +++ b/mysql-test/t/subselect4.test @@ -2282,3 +2282,17 @@ INSERT INTO t2 VALUES (3),(4); # Optional, fails either way --error ER_SUBQUERY_NO_1_ROW SELECT 1 IN (SELECT (SELECT a FROM t1) AS x FROM t2 GROUP BY x); drop table t1,t2; + +--echo # +--echo # MDEV-25629: Crash in get_sort_by_table() in subquery with order by having outer ref +--echo # +CREATE TABLE t1 (i1 int); +insert into t1 values (1),(2); + +--error ER_SUBQUERY_NO_1_ROW +SELECT 1 +FROM (t1 JOIN t1 AS ref_t1 ON + (t1.i1 > (SELECT ref_t1.i1 AS c0 FROM t1 b ORDER BY -c0))); + +DROP TABLE t1; + diff --git a/sql/item.cc b/sql/item.cc index 42272fe0148..be64edca9a1 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -5513,6 +5513,7 @@ bool Item_field::fix_fields(THD *thd, Item **reference) */ set_max_sum_func_level(thd, select); set_field(new_field); + depended_from= (*((Item_field**)res))->depended_from; return 0; } else

1 0

[Commits] f90be8dab06: MDEV-25078: ALTER INDEX is inconsistent with ADD/DROP/RENAME index
by psergey 14 May '21

14 May '21

revision-id: f90be8dab063dd730a3a8fa3c2ac51ee8549a87e (mariadb-10.6.0-43-gf90be8dab06) parent(s): c67d69abb9b6d05a1c837dc92e1faad770741f55 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-14 15:42:05 +0300 message: MDEV-25078: ALTER INDEX is inconsistent with ADD/DROP/RENAME index Support IF EXISTS in the command that alter index visibility: ALTER TABLE ALTER (KEY|INDEX) [IF EXISTS] index_name [NOT] IGNORED --- mysql-test/main/ignored_index.result | 49 ++++++++++++++++++++++++++++++++++++ mysql-test/main/ignored_index.test | 20 +++++++++++++++ sql/sql_class.h | 6 +++-- sql/sql_table.cc | 28 ++++++++++++++++++++- sql/sql_yacc.yy | 4 +-- 5 files changed, 102 insertions(+), 5 deletions(-) diff --git a/mysql-test/main/ignored_index.result b/mysql-test/main/ignored_index.result index 733e44a3afa..84263dddd4d 100644 --- a/mysql-test/main/ignored_index.result +++ b/mysql-test/main/ignored_index.result @@ -479,3 +479,52 @@ t1 CREATE TABLE `t1` ( KEY `a` (`a`) IGNORED ) ENGINE=MyISAM DEFAULT CHARSET=latin1 DROP TABLE t1; +# +# MDEV-25078, part #2: allow IF EXISTS +# +create table t1 (a int, b int, c int, key(a), key(b), key(c)); +alter table t1 alter key if exists no_such_key ignored; +Warnings: +Note 1176 Key 'no_such_key' doesn't exist in table 't1' +alter table t1 alter key if exists a ignored; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) IGNORED, + KEY `b` (`b`), + KEY `c` (`c`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +alter table t1 +alter key if exists no_such_key ignored, +alter key if exists c ignored ; +Warnings: +Note 1176 Key 'no_such_key' doesn't exist in table 't1' +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) IGNORED, + KEY `b` (`b`), + KEY `c` (`c`) IGNORED +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +alter table t1 +alter key if exists no_such_key not ignored, +alter key if exists c not ignored ; +Warnings: +Note 1176 Key 'no_such_key' doesn't exist in table 't1' +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) IGNORED, + KEY `b` (`b`), + KEY `c` (`c`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop table t1; diff --git a/mysql-test/main/ignored_index.test b/mysql-test/main/ignored_index.test index a1084f3eb9c..a3d46fe6046 100644 --- a/mysql-test/main/ignored_index.test +++ b/mysql-test/main/ignored_index.test @@ -442,3 +442,23 @@ CREATE TABLE t1 (a INT, KEY (a)); ALTER TABLE t1 ALTER KEY a IGNORED; SHOW CREATE TABLE t1; DROP TABLE t1; + +--echo # +--echo # MDEV-25078, part #2: allow IF EXISTS +--echo # + +create table t1 (a int, b int, c int, key(a), key(b), key(c)); +alter table t1 alter key if exists no_such_key ignored; +alter table t1 alter key if exists a ignored; +show create table t1; +alter table t1 + alter key if exists no_such_key ignored, + alter key if exists c ignored ; +show create table t1; +alter table t1 + alter key if exists no_such_key not ignored, + alter key if exists c not ignored ; +show create table t1; +drop table t1; + + diff --git a/sql/sql_class.h b/sql/sql_class.h index 031fff71ec8..09faeb10ab4 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -386,13 +386,14 @@ class Alter_rename_key : public Sql_alloc class Alter_index_ignorability: public Sql_alloc { public: - Alter_index_ignorability(const char *name, bool is_ignored) : - m_name(name), m_is_ignored(is_ignored) + Alter_index_ignorability(const char *name, bool is_ignored, bool if_exists) : + m_name(name), m_is_ignored(is_ignored), m_if_exists(if_exists) { assert(name != NULL); } const char *name() const { return m_name; } + bool if_exists() const { return m_if_exists; } /* The ignorability after the operation is performed. */ bool is_ignored() const { return m_is_ignored; } @@ -402,6 +403,7 @@ class Alter_index_ignorability: public Sql_alloc private: const char *m_name; bool m_is_ignored; + bool m_if_exists; }; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 209c799dc59..e8fde33191f 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -6695,7 +6695,33 @@ handle_if_exists_options(THD *thd, TABLE *table, Alter_info *alter_info, rename_key_it.remove(); } } - + /* Handle ALTER KEY IF EXISTS. */ + { + List_iterator<Alter_index_ignorability> ignor_it(alter_info->alter_index_ignorability_list); + Alter_index_ignorability *aii; + while ((aii= ignor_it++)) + { + if (!aii->if_exists()) + continue; + bool exists= false; + for (uint n_key= 0; n_key < table->s->keys; n_key++) + { + if (my_strcasecmp(system_charset_info, aii->name(), + table->key_info[n_key].name.str) == 0) + { + exists= true; + break; + } + } + if (exists) + continue; + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_KEY_DOES_NOT_EXISTS, + ER_THD(thd, ER_KEY_DOES_NOT_EXISTS), + aii->name(), table->s->table_name.str); + ignor_it.remove(); + } + } /* ALTER TABLE ADD KEY IF NOT EXISTS */ /* ALTER TABLE ADD FOREIGN KEY IF NOT EXISTS */ { diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 92769dc01f1..c585ff4403c 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -7824,11 +7824,11 @@ alter_list_item: if (unlikely(Lex->add_alter_list($4, $7, $3))) MYSQL_YYABORT; } - | ALTER key_or_index ident ignorability + | ALTER key_or_index opt_if_exists_table_element ident ignorability { LEX *lex= Lex; Alter_index_ignorability *ac= new (thd->mem_root) - Alter_index_ignorability($3.str, $4); + Alter_index_ignorability($4.str, $5, $3); if (ac == NULL) MYSQL_YYABORT; lex->alter_info.alter_index_ignorability_list.push_back(ac);

1 0

[Commits] 4547c6f2833: MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe ...
by psergey 13 May '21

13 May '21

revision-id: 4547c6f28338afb06a19aab19b4609de14b8a05f (mariadb-10.6.0-46-g4547c6f2833) parent(s): 916c28c9e5ff334f48adff26c74d774a379d96e0 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-13 15:34:25 +0300 message: MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe ... Mark the JSON_TABLE function as SBR-unsafe. It is not unsafe for the current implementation. But we still mark it as such in order to be future-proof and keep it possible to change JSON data representation in the future. --- mysql-test/suite/json/r/json_table_binlog.result | 26 ++++++++++++++++++++++++ mysql-test/suite/json/t/json_table_binlog.test | 25 +++++++++++++++++++++++ sql/json_table.h | 16 +++++++++++++++ sql/sql_yacc.yy | 2 ++ 4 files changed, 69 insertions(+) diff --git a/mysql-test/suite/json/r/json_table_binlog.result b/mysql-test/suite/json/r/json_table_binlog.result new file mode 100644 index 00000000000..472f7395648 --- /dev/null +++ b/mysql-test/suite/json/r/json_table_binlog.result @@ -0,0 +1,26 @@ +# +# MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe for statement binlog and should be marked as such +# +create table t1 (a int); +call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +set binlog_format='statement'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave +set binlog_format='mixed'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; +# This must show Annotate_rows, Write_rows_v1 events. Not the statement event +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Annotate_rows # # insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000001 # Query # # COMMIT +drop table t1; diff --git a/mysql-test/suite/json/t/json_table_binlog.test b/mysql-test/suite/json/t/json_table_binlog.test new file mode 100644 index 00000000000..dcc05fb855d --- /dev/null +++ b/mysql-test/suite/json/t/json_table_binlog.test @@ -0,0 +1,25 @@ +--source include/have_binlog_format_mixed.inc + +--echo # +--echo # MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe for statement binlog and should be marked as such +--echo # + +create table t1 (a int); + +call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +set binlog_format='statement'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; + +set binlog_format='mixed'; +let $binlog_start= query_get_value(SHOW MASTER STATUS, Position, 1); +let $binlog_file= LAST; + +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; + +--echo # This must show Annotate_rows, Write_rows_v1 events. Not the statement event +--source include/show_binlog_events.inc +drop table t1; diff --git a/sql/json_table.h b/sql/json_table.h index beae5405d25..3560b4ca137 100644 --- a/sql/json_table.h +++ b/sql/json_table.h @@ -183,6 +183,22 @@ class Json_table_column : public Sql_alloc into the TABLE_LIST::table_function. Then the ha_json_table instance is created based on it in the create_table_for_function(). + + == Replication: whether JSON_TABLE is deterministic == + + In sql_yacc.yy, we set BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION whenever + JSON_TABLE is used. The reasoning behind this is as follows: + + In the current MariaDB code, evaluation of JSON_TABLE is deterministic, + that is, for a given input string JSON_TABLE will always produce the same + set of rows in the same order. However one can think of JSON documents + that one can consider indentical which will produce different output. + In order to be feature-proof and withstand changes like: + - sorting JSON object members by name (like MySQL does) + - changing the way duplicate object members are handled + we mark the function as SBR-unsafe. + (If there is ever an issue with this, marking the function as SBR-safe + is a non-intrusive change we will always be able to make) */ class Table_function_json_table : public Sql_alloc diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index fe6112e9592..92769dc01f1 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -11675,6 +11675,8 @@ table_function: new (thd->mem_root) Table_function_json_table($4); if (unlikely(!jt)) MYSQL_YYABORT; + /* See comment for class Table_function_json_table: */ + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); Lex->json_table= jt; Select->parsing_place= NO_MATTER;

1 0

[Commits] a97b533aebb: MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe ...
by psergey 13 May '21

13 May '21

revision-id: a97b533aebbdbdae72c1ada4161c894de01ec549 (mariadb-10.6.0-42-ga97b533aebb) parent(s): 370b310b1d67ad42df96b75c3876fdcf67a8694f author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-13 15:33:57 +0300 message: MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe ... Mark the JSON_TABLE function as SBR-unsafe. It is not unsafe for the current implementation. But we still mark it as such in order to be future-proof and keep it possible to change JSON data representation in the future. --- mysql-test/suite/json/r/json_table_binlog.result | 26 ++++++++++++++++++++++++ mysql-test/suite/json/t/json_table_binlog.test | 25 +++++++++++++++++++++++ sql/json_table.h | 16 +++++++++++++++ sql/sql_yacc.yy | 2 ++ 4 files changed, 69 insertions(+) diff --git a/mysql-test/suite/json/r/json_table_binlog.result b/mysql-test/suite/json/r/json_table_binlog.result new file mode 100644 index 00000000000..472f7395648 --- /dev/null +++ b/mysql-test/suite/json/r/json_table_binlog.result @@ -0,0 +1,26 @@ +# +# MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe for statement binlog and should be marked as such +# +create table t1 (a int); +call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +set binlog_format='statement'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave +set binlog_format='mixed'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; +# This must show Annotate_rows, Write_rows_v1 events. Not the statement event +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Annotate_rows # # insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000001 # Query # # COMMIT +drop table t1; diff --git a/mysql-test/suite/json/t/json_table_binlog.test b/mysql-test/suite/json/t/json_table_binlog.test new file mode 100644 index 00000000000..dcc05fb855d --- /dev/null +++ b/mysql-test/suite/json/t/json_table_binlog.test @@ -0,0 +1,25 @@ +--source include/have_binlog_format_mixed.inc + +--echo # +--echo # MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe for statement binlog and should be marked as such +--echo # + +create table t1 (a int); + +call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +set binlog_format='statement'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; + +set binlog_format='mixed'; +let $binlog_start= query_get_value(SHOW MASTER STATUS, Position, 1); +let $binlog_file= LAST; + +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; + +--echo # This must show Annotate_rows, Write_rows_v1 events. Not the statement event +--source include/show_binlog_events.inc +drop table t1; diff --git a/sql/json_table.h b/sql/json_table.h index beae5405d25..3560b4ca137 100644 --- a/sql/json_table.h +++ b/sql/json_table.h @@ -183,6 +183,22 @@ class Json_table_column : public Sql_alloc into the TABLE_LIST::table_function. Then the ha_json_table instance is created based on it in the create_table_for_function(). + + == Replication: whether JSON_TABLE is deterministic == + + In sql_yacc.yy, we set BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION whenever + JSON_TABLE is used. The reasoning behind this is as follows: + + In the current MariaDB code, evaluation of JSON_TABLE is deterministic, + that is, for a given input string JSON_TABLE will always produce the same + set of rows in the same order. However one can think of JSON documents + that one can consider indentical which will produce different output. + In order to be feature-proof and withstand changes like: + - sorting JSON object members by name (like MySQL does) + - changing the way duplicate object members are handled + we mark the function as SBR-unsafe. + (If there is ever an issue with this, marking the function as SBR-safe + is a non-intrusive change we will always be able to make) */ class Table_function_json_table : public Sql_alloc diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index fe6112e9592..92769dc01f1 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -11675,6 +11675,8 @@ table_function: new (thd->mem_root) Table_function_json_table($4); if (unlikely(!jt)) MYSQL_YYABORT; + /* See comment for class Table_function_json_table: */ + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); Lex->json_table= jt; Select->parsing_place= NO_MATTER;

1 0

[Commits] c67d69abb9b: MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe ...
by psergey 13 May '21

13 May '21

revision-id: c67d69abb9b6d05a1c837dc92e1faad770741f55 (mariadb-10.6.0-42-gc67d69abb9b) parent(s): 370b310b1d67ad42df96b75c3876fdcf67a8694f author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-05-13 12:25:01 +0300 message: MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe ... Mark the JSON_TABLE function as SBR-unsafe. It is not unsafe for the current implementation. But we still mark it as such in order to be future-proof and keep it possible to change JSON data representation in the future. --- mysql-test/suite/json/r/json_table_binlog.result | 26 ++++++++++++++++++++++++ mysql-test/suite/json/t/json_table_binlog.test | 25 +++++++++++++++++++++++ sql/json_table.h | 16 +++++++++++++++ sql/sql_yacc.yy | 2 ++ 4 files changed, 69 insertions(+) diff --git a/mysql-test/suite/json/r/json_table_binlog.result b/mysql-test/suite/json/r/json_table_binlog.result new file mode 100644 index 00000000000..472f7395648 --- /dev/null +++ b/mysql-test/suite/json/r/json_table_binlog.result @@ -0,0 +1,26 @@ +# +# MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe for statement binlog and should be marked as such +# +create table t1 (a int); +call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +set binlog_format='statement'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave +set binlog_format='mixed'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; +# This must show Annotate_rows, Write_rows_v1 events. Not the statement event +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Annotate_rows # # insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T +master-bin.000001 # Table_map # # table_id: # (test.t1) +master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000001 # Query # # COMMIT +drop table t1; diff --git a/mysql-test/suite/json/t/json_table_binlog.test b/mysql-test/suite/json/t/json_table_binlog.test new file mode 100644 index 00000000000..dcc05fb855d --- /dev/null +++ b/mysql-test/suite/json/t/json_table_binlog.test @@ -0,0 +1,25 @@ +--source include/have_binlog_format_mixed.inc + +--echo # +--echo # MDEV-25154: JSON_TABLE: Queries involving ordinality columns are unsafe for statement binlog and should be marked as such +--echo # + +create table t1 (a int); + +call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +set binlog_format='statement'; +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; + +set binlog_format='mixed'; +let $binlog_start= query_get_value(SHOW MASTER STATUS, Position, 1); +let $binlog_file= LAST; + +insert into t1 +select * +from json_table('[1,2,3]', '$[*]' columns (a for ordinality)) as T ; + +--echo # This must show Annotate_rows, Write_rows_v1 events. Not the statement event +--source include/show_binlog_events.inc +drop table t1; diff --git a/sql/json_table.h b/sql/json_table.h index beae5405d25..3560b4ca137 100644 --- a/sql/json_table.h +++ b/sql/json_table.h @@ -183,6 +183,22 @@ class Json_table_column : public Sql_alloc into the TABLE_LIST::table_function. Then the ha_json_table instance is created based on it in the create_table_for_function(). + + == Replication: whether JSON_TABLE is deterministic == + + In sql_yacc.yy, we set BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION whenever + JSON_TABLE is used. The reasoning behind this is as follows: + + In the current MariaDB code, evaluation of JSON_TABLE is deterministic, + that is, for a given input string JSON_TABLE will always produce the same + set of rows in the same order. However one can think of JSON documents + that one can consider indentical which will produce different output. + In order to be feature-proof and withstand changes like: + - sorting JSON object members by name (like MySQL does) + - changing the way duplicate object members are handled + we mark the function as SBR-unsafe. + (If there is ever an issue with this, marking the function as SBR-safe + is a non-intrusive change we will always be able to make) */ class Table_function_json_table : public Sql_alloc diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index fe6112e9592..92769dc01f1 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -11675,6 +11675,8 @@ table_function: new (thd->mem_root) Table_function_json_table($4); if (unlikely(!jt)) MYSQL_YYABORT; + /* See comment for class Table_function_json_table: */ + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); Lex->json_table= jt; Select->parsing_place= NO_MATTER;

1 0