developers
Threads by month
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
December 2018
- 10 participants
- 24 discussions
Re: [Maria-developers] d282f5c5560: MDEV-10963 Fragmented BINLOG query
by Sergei Golubchik 18 Dec '18
by Sergei Golubchik 18 Dec '18
18 Dec '18
Hi, Andrei!
Looks better!
There are no major problems, but see comments below. There're few
suggestions how to simplify the code.
On Nov 05, Andrei Elkin wrote:
> revision-id: d282f5c55609469cd74d7390f70c7d922c778711 (mariadb-10.1.35-93-gd282f5c5560)
> parent(s): 2a576f71c5d3c7aacef564e5b1251f83bde48f51
> author: Andrei Elkin <andrei.elkin(a)mariadb.com>
> committer: Andrei Elkin <andrei.elkin(a)mariadb.com>
> timestamp: 2018-10-21 23:42:00 +0300
> message:
>
> MDEV-10963 Fragmented BINLOG query
>
> diff --git a/mysql-test/suite/binlog/t/binlog_mysqlbinlog_row_frag.test b/mysql-test/suite/binlog/t/binlog_mysqlbinlog_row_frag.test
> new file mode 100644
> index 00000000000..bdf41c94c76
> --- /dev/null
> +++ b/mysql-test/suite/binlog/t/binlog_mysqlbinlog_row_frag.test
> @@ -0,0 +1,50 @@
> +--source include/have_debug.inc
> +--source include/have_log_bin.inc
> +--source include/have_binlog_format_row.inc
you don't need to include have_log_bin, if you include
have_binlog_format_row.
> +
> +--let $MYSQLD_DATADIR= `select @@datadir`
> +--let $max_size=1024
> +
> +CREATE TABLE t (a TEXT);
> +# events of interest are guaranteed to stay in 000001 log
> +RESET MASTER;
> +--eval INSERT INTO t SET a=repeat('a', $max_size)
eh? why did you do it with let/eval instead of a simple sql statement?
you don't use $max_size anywhere else.
> +SELECT a from t into @a;
> +FLUSH LOGS;
> +DELETE FROM t;
> +
> +--exec $MYSQL_BINLOG --debug-binlog-row-event-max-encoded-size=256 $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/mysqlbinlog.sql
> +
> +--let $assert_text= BINLOG is fragmented
> +--let $assert_select= BINLOG @binlog_fragment_0, @binlog_fragment_1
> +--let $assert_count= 1
> +--let $assert_file= $MYSQLTEST_VARDIR/tmp/mysqlbinlog.sql
> +--source include/assert_grep.inc
no, please, use search_pattern_in_file.inc instead.
> +
> +--exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/mysqlbinlog.sql
> +
> +SELECT a LIKE @a as 'true' FROM t;
> +SELECT @binlog_fragment_0, @binlog_fragment_1 as 'NULL';
that makes no sense, @binlog_fragment_0 and _1 were set in a separate
client session. You cannot test whether they were cleared or not there,
by looking at the values here
> +
> +# improper syntax error
> +--echo BINLOG number-of-fragments must be exactly two
> +--error ER_PARSE_ERROR
> +BINLOG @binlog_fragment;
> +--error ER_PARSE_ERROR
> +BINLOG @binlog_fragment, @binlog_fragment, @binlog_fragment;
> +
> +# corrupted fragments error check (to the expected error code notice,
> +# the same error code occurs in a similar unfragmented case)
> +SET @binlog_fragment_0='012345';
> +SET @binlog_fragment_1='012345';
> +--error ER_SYNTAX_ERROR
> +BINLOG @binlog_fragment_0, @binlog_fragment_1;
> +
> +# Not existing fragment is not allowed
> +SET @binlog_fragment_0='012345';
> +--error ER_WRONG_TYPE_FOR_VAR
> +BINLOG @binlog_fragment_0, @binlog_fragment_not_exist;
> +
> +--echo # Cleanup
> +--remove_file $MYSQLTEST_VARDIR/tmp/mysqlbinlog.sql
> +DROP TABLE t;
> diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c
> --- a/mysys/mf_iocache2.c
> +++ b/mysys/mf_iocache2.c
> @@ -22,52 +22,53 @@
> #include <stdarg.h>
> #include <m_ctype.h>
>
> -/*
> - Copy contents of an IO_CACHE to a file.
> -
> - SYNOPSIS
> - my_b_copy_to_file()
> - cache IO_CACHE to copy from
> - file File to copy to
> -
> - DESCRIPTION
> - Copy the contents of the cache to the file. The cache will be
> - re-inited to a read cache and will read from the beginning of the
> - cache.
> -
> - If a failure to write fully occurs, the cache is only copied
> - partially.
> +/**
> + Copy the cache to the file. Copying can be constrained to @c count
> + number of bytes when the parameter is less than SIZE_T_MAX. The
> + cache will be optionally re-inited to a read cache and will read
> + from the beginning of the cache. If a failure to write fully
> + occurs, the cache is only copied partially.
>
> TODO
> - Make this function solid by handling partial reads from the cache
> - in a correct manner: it should be atomic.
> -
> - RETURN VALUE
> - 0 All OK
> - 1 An error occurred
> + Make this function solid by handling partial reads from the cache
> + in a correct manner: it should be atomic.
> +
> + @param cache IO_CACHE to copy from
> + @param file File to copy to
> + @param do_reinit whether to turn the cache to read mode
> + @param count the copied size or the max of the type
> + when the whole cache is to be copied.
> + @return
> + 0 All OK
> + 1 An error occurred
> */
> int
> -my_b_copy_to_file(IO_CACHE *cache, FILE *file)
> +my_b_copy_to_file(IO_CACHE *cache, FILE *file,
> + my_bool do_reinit,
> + size_t count)
> {
> - size_t bytes_in_cache;
> + size_t curr_write, bytes_in_cache;
> DBUG_ENTER("my_b_copy_to_file");
>
> /* Reinit the cache to read from the beginning of the cache */
> - if (reinit_io_cache(cache, READ_CACHE, 0L, FALSE, FALSE))
> + if (do_reinit && reinit_io_cache(cache, READ_CACHE, 0L, FALSE, FALSE))
generally, when there's a function that is always called with a
constant (compile-time) argument, I prefer to split the code
compile-time too, if it isn't too much trouble. In this case it would
mean a new function like
int my_b_copy_all_to_file(IO_CACHE *cache, FILE *file)
{
if (reinit_io_cache(cache, READ_CACHE, 0L, FALSE, FALSE)
return 1;
return my_b_copy_to_file(cache, file, SIZE_T_MAX);
}
and all old code will be changed to use my_b_copy_all_to_file().
Old my_b_copy_to_file() won't need to do reinit_io_cache() anymore and
your code will use it directly.
> DBUG_RETURN(1);
> bytes_in_cache= my_b_bytes_in_cache(cache);
> do
> {
> - if (my_fwrite(file, cache->read_pos, bytes_in_cache,
> + curr_write= MY_MIN(bytes_in_cache, count);
> + if (my_fwrite(file, cache->read_pos, curr_write,
> MYF(MY_WME | MY_NABP)) == (size_t) -1)
> DBUG_RETURN(1);
> - } while ((bytes_in_cache= my_b_fill(cache)));
> +
> + cache->read_pos += curr_write;
> + count -= curr_write;
> + } while (count && (bytes_in_cache= my_b_fill(cache)));
> if(cache->error == -1)
> DBUG_RETURN(1);
> DBUG_RETURN(0);
> }
>
> -
> my_off_t my_b_append_tell(IO_CACHE* info)
> {
> /*
> diff --git a/sql/log_event.cc b/sql/log_event.cc
> index e07b7002398..aeca794f0cd 100644
> --- a/sql/log_event.cc
> +++ b/sql/log_event.cc
> @@ -10474,12 +10488,151 @@ void Rows_log_event::pack_info(Protocol *protocol)
> #endif
>
> #ifdef MYSQL_CLIENT
> +/**
> + Print an event "body" cache to @c file possibly in multiple fragements.
> + Each fragement is optionally per @c do_wrap to procude an SQL statement.
> +
> + @param file a file to print to
> + @param body the "body" IO_CACHE of event
> + @param do_wrap whether to wrap base64-encoded strings with
> + SQL cover.
> + The function signals on any error through setting @c body->error to -1.
> +*/
> +void copy_cache_to_file_wrapped(FILE *file,
> + IO_CACHE *body,
> + bool do_wrap,
> + const char *delimiter)
> +{
> + uint n_frag= 1;
> + const char* before_frag= NULL;
> + char* after_frag= NULL;
> + char* after_last= NULL;
> + /*
> + 2 fragments can always represent near 1GB row-based
> + base64-encoded event as two strings each of size less than
> + max(max_allowed_packet). Greater number of fragments does not
> + save from potential need to tweak (increase) @@max_allowed_packet
> + before to process the fragments. So 2 is safe and enough.
> + */
> + const char fmt_last_frag2[]=
> + "\nBINLOG @binlog_fragment_0, @binlog_fragment_1%s\n";
> + const char fmt_before_frag[]= "\nSET /* ONE_SHOT */ @binlog_fragment_%d ='\n";
this ONE_SHOT is confusing, even if in a comment. Better not to do it :)
> + /*
> + Buffer to hold computed formatted strings according to specifiers.
> + The sizes may depend on an actual fragment number size in terms of decimal
> + signs so its maximum is estimated (not precisely yet safely) below.
> + */
> + char buf[sizeof(fmt_before_frag) + sizeof(fmt_last_frag2)
> + + ((sizeof(n_frag) * 8)/3 + 1) // max of decimal index
> + + sizeof(PRINT_EVENT_INFO::max_delimiter_len) + 3]; // delim, \n and 0
sizeof(max_delimiter_len) ? it's sizeof(uint), right? Did you mean
sizeof(PRINT_EVENT_INFO::delimiter)
or simply
PRINT_EVENT_INFO::max_delimiter_len
without sizeof?
> +
> + if (do_wrap)
> + {
> + after_frag= (char*) my_malloc(sizeof(buf), MYF(MY_WME));
> + sprintf(after_frag, "'%s\n", delimiter);
> + if (my_b_tell(body) > opt_binlog_rows_event_max_encoded_size)
> + n_frag= 2;
> + if (n_frag > 1)
> + {
> + before_frag= fmt_before_frag;
> + after_last= (char*) my_malloc(sizeof(buf), MYF(MY_WME));
> + sprintf(after_last, fmt_last_frag2, (char*) delimiter);
> + }
> + else
> + {
> + before_frag= "\nBINLOG '\n"; // single "fragment"
> + }
> + }
> +
> + size_t total_size= my_b_tell(body), total_written= 0;
> + size_t frag_size= total_size / n_frag + 1, curr_size;
> +
> + if (reinit_io_cache(body, READ_CACHE, 0L, FALSE, FALSE))
> + {
> + body->error= -1;
> + goto err;
> + }
> +
> + for (uint i= 0; i < n_frag; i++, total_written += curr_size)
> + {
> + curr_size= i < n_frag - 1 ? frag_size : total_size - total_written;
> +
> + DBUG_ASSERT(i < n_frag - 1 || curr_size <= frag_size);
> +
> + if (before_frag)
> + {
> + sprintf(buf, before_frag, i);
> + my_fwrite(file, (uchar*) buf, strlen(buf), MYF(MY_WME | MY_NABP));
> + }
> + if (my_b_copy_to_file(body, file, FALSE, curr_size))
> + {
> + body->error= -1;
> + goto err;
> + }
> + if (after_frag)
> + {
> + sprintf(buf, after_frag, NULL);
> + my_fwrite(file, (uchar*) buf, strlen(buf), MYF(MY_WME | MY_NABP));
> + }
> + }
Hmm, dunno. I suspect you can do it three times shorter and five times
easier to read if you wouldn't try to generalize it for a arbitrary
number of fragments with arbitrary prefixes and suffixes. Just
if (my_b_tell(body) < opt_binlog_rows_event_max_encoded_size - margin)
{
my_fprintf(file, "BINLOG '");
my_b_copy_to_file(body, file);
my_fprintf(file, "'%s\n", delimiter);
}
else
{
my_fprintf(file, "SET @binlog_fragment_0='");
my_b_copy_to_file(body, file, opt_binlog_rows_event_max_encoded_size);
my_fprintf(file, "'%s\nSET @binlog_fragment_1='", delimiter);
my_b_copy_to_file(body, file, SIZE_T_MAX);
my_fprintf(file, "'%s\nBINLOG @binlog_fragment_0, @binlog_fragment_1%s\n",
delimiter, delimiter);
}
See?
> +
> + if (after_last)
> + {
> + sprintf(buf, after_last, n_frag);
> + my_fwrite(file, (uchar*) buf, strlen(buf), MYF(MY_WME | MY_NABP));
> + }
> + reinit_io_cache(body, WRITE_CACHE, 0, FALSE, TRUE);
> +
> +err:
> + my_free(after_frag);
> + my_free(after_last);
> +}
> +
> +/**
> + The function invokes base64 encoder to run on the current
> + event string and store the result into two caches.
> + When the event ends the current statement the caches are is copied into
> + the argument file.
> + Copying is also concerned how to wrap the event, specifically to produce
> + a valid SQL syntax.
> + When the encoded data size is within max(MAX_ALLOWED_PACKET)
> + a regular BINLOG query is composed. Otherwise it is build as fragmented
> +
> + SET @binlog_fragment_0='...';
> + SET @binlog_fragment_1='...';
> + BINLOG DEFRAGMENT(@binlog_fragment_0, @binlog_fragment_1);
> +
> + where fragments are represented by a sequence of "indexed" user
> + variables.
> + Two more statements are composed as well
> +
> + SET @binlog_fragment_0=NULL;
> + SET @binlog_fragment_1=NULL;
> +
> + to promptly release memory.
No, they aren't
> +
> + NOTE.
@note
> + If any changes made don't forget to duplicate them to
> + Old_rows_log_event as long as it's supported.
> +
> + @param file pointer to IO_CACHE
> + @param print_event_info pointer to print_event_info specializing
> + what out of and how to print the event
> + @param name the name of a table that the event operates on
> +
> + The function signals on any error of cache access through setting
> + that cache's @c error to -1.
> +*/
> void Rows_log_event::print_helper(FILE *file,
> PRINT_EVENT_INFO *print_event_info,
> char const *const name)
> {
> IO_CACHE *const head= &print_event_info->head_cache;
> IO_CACHE *const body= &print_event_info->body_cache;
> + bool do_print_encoded=
> + print_event_info->base64_output_mode != BASE64_OUTPUT_DECODE_ROWS &&
> + !print_event_info->short_form;
> +
> if (!print_event_info->short_form)
> {
> bool const last_stmt_event= get_flags(STMT_END_F);
> diff --git a/sql/log_event.h b/sql/log_event.h
> index 90900f63533..28277e659d2 100644
> --- a/sql/log_event.h
> +++ b/sql/log_event.h
> @@ -749,6 +749,7 @@ typedef struct st_print_event_info
> that was printed. We cache these so that we don't have to print
> them if they are unchanged.
> */
> + static const uint max_delimiter_len= 16;
why did you introduce this max_delimiter_len, if all you use
is sizeof(delimiter) anyway? (and even that is not needed)
> // TODO: have the last catalog here ??
> char db[FN_REFLEN+1]; // TODO: make this a LEX_STRING when thd->db is
> bool flags2_inited;
> @@ -798,7 +799,7 @@ typedef struct st_print_event_info
> bool printed_fd_event;
> my_off_t hexdump_from;
> uint8 common_header_len;
> - char delimiter[16];
> + char delimiter[max_delimiter_len];
>
> uint verbose;
> table_mapping m_table_map;
> diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
> index 91cf038907e..b4e3342d8f3 100644
> --- a/sql/sql_binlog.cc
> +++ b/sql/sql_binlog.cc
> @@ -28,6 +28,70 @@
> // START_EVENT_V3,
> // Log_event_type,
> // Log_event
> +
> +/**
> + Copy fragments into the standard placeholder thd->lex->comment.str.
> +
> + Compute the size of the (still) encoded total,
> + allocate and then copy fragments one after another.
> + The size can exceed max(max_allowed_packet) which is not a
> + problem as no String instance is created off this char array.
> +
> + @param thd THD handle
> + @return
> + 0 at success,
> + -1 otherwise.
> +*/
> +int binlog_defragment(THD *thd)
> +{
> + user_var_entry *entry[2];
> + LEX_STRING name[2]= { thd->lex->comment, thd->lex->ident };
> +
> + /* compute the total size */
> + thd->lex->comment.str= NULL;
> + thd->lex->comment.length= 0;
> + for (uint k= 0; k < 2; k++)
> + {
> + entry[k]=
> + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name[k].str,
> + name[k].length);
> + if (!entry[k] || entry[k]->type != STRING_RESULT)
> + {
> + my_error(ER_WRONG_TYPE_FOR_VAR, MYF(0), name[k].str);
> + return -1;
> + }
> + thd->lex->comment.length += entry[k]->length;
> + }
> +
> + thd->lex->comment.str= // to be freed by the caller
> + (char *) my_malloc(thd->lex->comment.length, MYF(MY_WME));
> + if (!thd->lex->comment.str)
> + {
> + my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR), 1);
> + return -1;
> + }
> +
> + /* fragments are merged into allocated buf while the user var:s get reset */
> + size_t gathered_length= 0;
> + for (uint k=0; k < 2; k++)
> + {
> + memcpy(thd->lex->comment.str + gathered_length, entry[k]->value, entry[k]->length);
> + gathered_length += entry[k]->length;
> + if (update_hash(entry[k], true, NULL, 0, STRING_RESULT, &my_charset_bin, 0))
> + {
> + my_printf_error(ER_WRONG_TYPE_FOR_VAR,
> + "%s: BINLOG fragment user "
> + "variable '%s' could not be unset", MYF(0),
> + ER_THD(thd, ER_WRONG_TYPE_FOR_VAR), entry[k]->value);
> + }
I don't see how update_hash(entry[k], true, ...) can ever fail, so
there's no need to pretend that it can.
> + }
> +
> + DBUG_ASSERT(gathered_length == thd->lex->comment.length);
> +
> + return 0;
> +}
> +
> +
> /**
> Execute a BINLOG statement.
>
> @@ -119,6 +175,23 @@ void mysql_client_binlog_statement(THD* thd)
> rli->sql_driver_thd= thd;
> rli->no_storage= TRUE;
>
> + if (unlikely(is_fragmented= thd->lex->comment.str && thd->lex->ident.str))
> + if (binlog_defragment(thd))
> + goto end;
> +
> + if (!(coded_len= thd->lex->comment.length))
> + {
> + my_error(ER_SYNTAX_ERROR, MYF(0));
> + goto end;
> + }
> +
> + decoded_len= base64_needed_decoded_length(coded_len);
> + if (!(buf= (char *) my_malloc(decoded_len, MYF(MY_WME))))
> + {
> + my_error(ER_OUTOFMEMORY, MYF(ME_FATALERROR), 1);
> + goto end;
> + }
> +
Technically, it should be possible to decode base64 in-place and avoid
allocating a second 3GB buffer. But let's not do it in this MDEV :)
> for (char const *strptr= thd->lex->comment.str ;
> strptr < thd->lex->comment.str + thd->lex->comment.length ; )
> {
Regards,
Sergei
Chief Architect MariaDB
and security(a)mariadb.org
2
2
[Maria-developers] New Foundation Buildbot and Github build status push
by Teodor Mircea Ionita 18 Dec '18
by Teodor Mircea Ionita 18 Dec '18
18 Dec '18
Hi all,
For the past 6 months we have been working on a new Buildbot deployment at:
https://buildbot.mariadb.org
Currently we have docker x64 builders for all supported Linux platforms (aptly named quick builders, since they all finish a build + default MTR under 40 minutes):
https://buildbot.mariadb.org/#/builders
We are working on adding builders for all the other supported targets in the near future as well as acquiring more building power to accommodate for --big and more extensive/complex tests. We are open to donations in that regard.
Saved package artifacts can be accessed from any individual build log via the "Download" link or directly from here:
https://ci.mariadb.org
The configuration and associated documentation can be found in the tools repo:
https://github.com/MariaDB/mariadb.org-tools/tree/master/buildbot.mariadb.o…
We encourage collaboration and are open to pull requests, you can do so for adding new builders, fixing build steps or adjusting the docker images available in the dockerfiles/ sub-directory, etc..
Currently we consider this deployment in beta phase and have enabled Github status push for the main repo for direct code pushes as well as pull requests. The build status shows up in the same Github widget as Travis does and you can follow any individual report to the actual buildbot log via the links. Sample report for a push:
https://github.com/MariaDB/server/commits/bb-10.4-statustest
Sample PR:
https://github.com/MariaDB/server/pull/1034
Reproducing Linux failures are facilitated by the Dockerfiles available in the tool's repo, which are used by the Buildbot master to create the build environment on the docker enabled workers:
https://github.com/MariaDB/mariadb.org-tools/tree/master/buildbot.mariadb.o…
Ideally, any developer should pay attention to these build failures on Github and attempt to fix any issues before merging into the main branches. While in beta, we are trying to weed out environmental or configuration failures too, so please report those as you come by them so we can fix them.
The end goal is to enable protected branches, where any individual change has to pass a certain selection of robust tests before merging, which ultimately, would ensure that the MariaDB code is in a releasable state after any individual commit.
We welcome feedback, improvement suggestions and any particular questions you might have about the current setup and future plans.
We will check-in from time to time as we have more goodness to show.
Best regards,
Teodor
--
Developer, Infra
MariaDB Foundation
https://mariadb.org
1
0
Re: [Maria-developers] [Commits] e1b0a8f9622: MDEV-6453: Assertion `inited==NONE || (inited==RND && scan)' failed in handler::ha_rnd_init(bool)
by Sergey Petrunia 15 Dec '18
by Sergey Petrunia 15 Dec '18
15 Dec '18
Hi Varun,
On Mon, Jan 29, 2018 at 12:27:35PM +0530, Varun wrote:
> revision-id: e1b0a8f9622ab8c2bab988cb71225f992fec320a (mariadb-10.0.30-286-ge1b0a8f9622)
> parent(s): d01dbe66a8bf9cb6031f95159c49100f9299a768
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2018-01-29 12:23:31 +0530
> message:
>
> MDEV-6453: Assertion `inited==NONE || (inited==RND && scan)' failed in handler::ha_rnd_init(bool)
> with InnoDB, joins, AND/OR conditions
>
> The inited parameter handler is not initialised when we do a quick_select after a table scan.
>
As far I understand, the problem only shows with "Range Checked for each record".
The failure scenario is as follows:
- range checked for each record plan in coonstructed
- the first check picks to a full table scan.
- the second check picks to do a QUICK_ROR_INTERSECT_SELECT scan
- QUICK_ROR_INTERSECT_SELECT starts to initialize the quick select.
- and it hits an assertion, because the handle object is already initialized
- index merge finds the handler not to be initialized correctly.
That is, the cause of handler object not being correctly initialized is "range
checked for each record" feature.
Because of that, I think it should be fixed in that feature as well. A more
suitable location would be in range-checked-for-each-record's code, in
test_if_quick_select():
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index db3ed8a1aa9..6634554ee6a 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -19637,6 +19637,10 @@ test_if_quick_select(JOIN_TAB *tab)
delete tab->select->quick;
tab->select->quick=0;
+
+ if (tab->table->file->inited != handler::NONE)
+ tab->table->file->ha_index_or_rnd_end();
+
int res= tab->select->test_quick_select(tab->join->thd, tab->keys,
(table_map) 0, HA_POS_ERROR, 0,
FALSE, /*remove where parts*/FALSE);
What do you think? (If you agree, let's use the above variant)
> ---
> mysql-test/r/range_innodb.result | 18 ++++++++++++++++++
> mysql-test/t/range_innodb.test | 17 +++++++++++++++++
> sql/opt_range.cc | 2 ++
> 3 files changed, 37 insertions(+)
>
> diff --git a/mysql-test/r/range_innodb.result b/mysql-test/r/range_innodb.result
> index 794e6c7b3cc..8bb1c833a56 100644
> --- a/mysql-test/r/range_innodb.result
> +++ b/mysql-test/r/range_innodb.result
> @@ -37,3 +37,21 @@ id select_type table type possible_keys key key_len ref rows Extra
> 1 SIMPLE t0 ALL NULL NULL NULL NULL 10
> 1 SIMPLE t2 range a,b b 5 NULL 201 Using where; Using join buffer (flat, BNL join)
> drop table t0,t1,t2;
> +CREATE TABLE t1 (
> +pk INT PRIMARY KEY, f1 INT, f2 CHAR(1), f3 CHAR(1),
> +KEY(f1), KEY(f2)
> +) ENGINE=InnoDB;
> +INSERT INTO t1 VALUES
> +(1,4,'v',NULL),(2,6,'v',NULL),(3,7,'c',NULL),(4,1,'e',NULL),(5,0,'x',NULL),
> +(6,7,'i',NULL),(7,7,'e',NULL),(8,1,'p',NULL),(9,7,'s',NULL),(10,1,'j',NULL),
> +(11,5,'z',NULL),(12,2,'c',NULL),(13,0,'a',NULL),(14,1,'q',NULL),(15,8,'y',NULL),
> +(16,1,'m',NULL),(17,1,'r',NULL),(18,9,'v',NULL),(19,1,'n',NULL);
> +CREATE TABLE t2 (f4 INT, f5 CHAR(1)) ENGINE=InnoDB;
> +INSERT INTO t2 VALUES (4,'q'),(NULL,'j');
> +SELECT * FROM t1 AS t1_1, t1 AS t1_2, t2
> +WHERE f5 = t1_2.f2 AND ( t1_1.f1 = 103 AND t1_1.f2 = 'o' OR t1_1.pk < f4 );
> +pk f1 f2 f3 pk f1 f2 f3 f4 f5
> +1 4 v NULL 14 1 q NULL 4 q
> +2 6 v NULL 14 1 q NULL 4 q
> +3 7 c NULL 14 1 q NULL 4 q
> +drop table t1,t2;
> diff --git a/mysql-test/t/range_innodb.test b/mysql-test/t/range_innodb.test
> index f76794814ef..605006587cc 100644
> --- a/mysql-test/t/range_innodb.test
> +++ b/mysql-test/t/range_innodb.test
> @@ -45,3 +45,20 @@ explain select * from t0 left join t2 on t2.a <t0.a and t2.b between 50 and 250;
>
> drop table t0,t1,t2;
>
> +CREATE TABLE t1 (
> + pk INT PRIMARY KEY, f1 INT, f2 CHAR(1), f3 CHAR(1),
> + KEY(f1), KEY(f2)
> +) ENGINE=InnoDB;
> +
> +INSERT INTO t1 VALUES
> +(1,4,'v',NULL),(2,6,'v',NULL),(3,7,'c',NULL),(4,1,'e',NULL),(5,0,'x',NULL),
> +(6,7,'i',NULL),(7,7,'e',NULL),(8,1,'p',NULL),(9,7,'s',NULL),(10,1,'j',NULL),
> +(11,5,'z',NULL),(12,2,'c',NULL),(13,0,'a',NULL),(14,1,'q',NULL),(15,8,'y',NULL),
> +(16,1,'m',NULL),(17,1,'r',NULL),(18,9,'v',NULL),(19,1,'n',NULL);
> +
> +CREATE TABLE t2 (f4 INT, f5 CHAR(1)) ENGINE=InnoDB;
> +INSERT INTO t2 VALUES (4,'q'),(NULL,'j');
> +
> +SELECT * FROM t1 AS t1_1, t1 AS t1_2, t2
> +WHERE f5 = t1_2.f2 AND ( t1_1.f1 = 103 AND t1_1.f2 = 'o' OR t1_1.pk < f4 );
> +drop table t1,t2;
> diff --git a/sql/opt_range.cc b/sql/opt_range.cc
> index f1d84e5c623..30b7f43ef28 100644
> --- a/sql/opt_range.cc
> +++ b/sql/opt_range.cc
> @@ -3003,6 +3003,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
> quick=0;
> needed_reg.clear_all();
> quick_keys.clear_all();
> + if (head->file->inited != handler::NONE)
> + head->file->ha_index_or_rnd_end();
> DBUG_ASSERT(!head->is_filled_at_execution());
> if (keys_to_use.is_clear_all() || head->is_filled_at_execution())
> DBUG_RETURN(0);
> _______________________________________________
> commits mailing list
> commits(a)mariadb.org
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
--
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
1
0
Re: [Maria-developers] 7c782988845: MDEV-16240: Assertion `0' failed in row_sel_convert_mysql_key_to_innobase
by Sergei Golubchik 14 Dec '18
by Sergei Golubchik 14 Dec '18
14 Dec '18
Hi, Oleksandr!
On Dec 13, Oleksandr Byelkin wrote:
> revision-id: 7c782988845dec9f0f26a539911f66ed8cecdf83 (mariadb-10.2.19-51-g7c782988845)
> parent(s): ad3346dddf419aed3e5d16066471fd5022af1795
> author: Oleksandr Byelkin
> committer: Oleksandr Byelkin
> timestamp: 2018-12-13 15:29:52 +0100
> message:
>
> MDEV-16240: Assertion `0' failed in row_sel_convert_mysql_key_to_innobase
>
> Set table in row ID position mode before using this function.
>
> ---
> mysql-test/r/multi_update_innodb.result | 40 +++++++++++++++++++++++++++
> mysql-test/t/multi_update_innodb.test | 49 +++++++++++++++++++++++++++++++++
> sql/sql_update.cc | 4 +++
> 3 files changed, 93 insertions(+)
>
> diff --git a/mysql-test/r/multi_update_innodb.result b/mysql-test/r/multi_update_innodb.result
> index 5890fd24f5f..535c5a41d9f 100644
> --- a/mysql-test/r/multi_update_innodb.result
> +++ b/mysql-test/r/multi_update_innodb.result
> @@ -151,3 +151,43 @@ create table t2 like t1;
> insert into t2 select * from t1;
> delete t1,t2 from t2,t1 where t1.a<'B' and t2.b=t1.b;
> drop table t1,t2;
> +#
> +# MDEV-16240: Assertion `0' failed in
> +# row_sel_convert_mysql_key_to_innobase
> +#
> +SET @save_sql_mode=@@sql_mode;
> +set sql_mode='';
why sql_mode matters here?
> +CREATE TABLE `t3` (
> +`col_varchar_nokey` TIMESTAMP NOT NULL DEFAULT '0000-00-00 00:00:00' ON UPDATE current_timestamp(),
> +`col_varchar_key` datetime DEFAULT '2000-01-01 00:00:00' ON UPDATE current_timestamp(),
> +`col_int_nokey` TIMESTAMP NULL DEFAULT '2000-01-01 00:00:00',
> +`pk` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
> +`col_int_key` datetime DEFAULT current_timestamp(),
> +PRIMARY KEY (`pk`),
> +UNIQUE KEY `col_varchar_key` (`col_varchar_key`),
> +KEY `col_int_key` (`col_int_key`)
> +) ENGINE=InnoDB;
> +INSERT INTO `t3` VALUES ('2018-05-18 15:08:07','2018-05-18 17:08:07','0000-00-00 00:00:00','0000-00-00 00:00:00','0000-00-00 00:00:00'),('0000-00-00 00:00:00','0000-00-00 00:00:00','1999-12-31 23:00:00','2002-07-03 23:04:40','0000-00-00 00:00:00');
> +CREATE VIEW `v1` AS
> +SELECT `t3`.`pk` AS `pk`,
> +`t3`.`col_int_nokey` AS `col_int_nokey`,
> +`t3`.`col_int_key` AS `col_int_key`,
> +`t3`.`col_varchar_key` AS `col_varchar_key`,
> +`t3`.`col_varchar_nokey` AS `col_varchar_nokey`
> +FROM `t3`;
> +CREATE TABLE `t4` (
> +`col_varchar_nokey` datetime DEFAULT current_timestamp() ON UPDATE current_timestamp(),
> +`col_int_nokey` timestamp NULL DEFAULT NULL,
> +`col_varchar_key` timestamp NULL DEFAULT '1999-12-31 23:00:00' ON UPDATE current_timestamp(),
> +`pk` int(11) NOT NULL,
> +`col_int_key` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp(),
> +PRIMARY KEY (`pk`)
> +) ENGINE=InnoDB;
> +INSERT INTO `t4` VALUES ('2018-05-18 17:08:06','0000-00-00 00:00:00',NULL,1,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00',NULL,2,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00',NULL,3,'2018-05-18 15:08:06'),('0000-00-00 00:00:00','0000-00-00 00:00:00',NULL,1976,'0000-00-00 00:00:00'),('2018-05-18 17:08:06','0000-00-00 00:00:00',NULL,2000,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00',NULL,2001,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00',NULL,2002,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00',NULL,2003,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00',NULL,2004,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00','2018-05-18 15:08:06',2005,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00','2018-05-18 15:08:06',2018,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00','2018-05-18 15:08:06',2019,'2018-05-1
> 8 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00','2018-05-18 15:08:06',2024,'2018-05-18 15:08:06'),('2018-05-18 17:08:06','0000-00-00 00:00:00','1999-12-31 23:00:00',2025,'2018-05-18 15:08:06'),('0000-00-00 00:00:00',NULL,'2018-05-18 15:08:06',2026,'2018-05-18 15:08:06'),('2018-05-18 17:08:07','0000-00-00 00:00:00','0000-00-00 00:00:00',2027,'0000-00-00 00:00:00');
> +UPDATE `v1` t1, `t4` t2
> +SET t1.`col_varchar_key` = 6452736 WHERE t1.`col_int_key` = 6272000;
> +ERROR 23000: Duplicate entry '0000-00-00 00:00:00' for key 'col_varchar_key'
please, rename all columns in the test to have unique names. What
col_varchar_key is meant in the error message?
> +DROP VIEW v1;
> +DROP TABLE t3,t4;
> +SET @@sql_mode=@save_sql_mode;
> +# End of 10.2 tests
> diff --git a/sql/sql_update.cc b/sql/sql_update.cc
> index 11ffa684216..6d4c11d494a 100644
> --- a/sql/sql_update.cc
> +++ b/sql/sql_update.cc
> @@ -211,6 +211,10 @@ static void prepare_record_for_error_message(int error, TABLE *table)
> bitmap_union(table->read_set, &unique_map);
> /* Tell the engine about the new set. */
> table->file->column_bitmaps_signal();
> + /* Prepare table for random positioning (importent for innodb) */
This definitely needs a more detailed comment. I suspect (remembering
your questions on slack) that rnd_pos() in InnoDB ends up using
whatever active_index was at the moment, not the primary key. I'm not
sure, though.
And you have a typo in the word "important".
By the way, why using rnd_pos() after index_init() didn't trigger an
assert?.. Okay, I see why :) please fix it too.
Hmm, with the assert you won't need a detailed comment in sql_update.cc
anymore, assert will be a sufficient explanation.
> + if (table->file->ha_index_or_rnd_end() ||
> + table->file->ha_rnd_init(0))
> + DBUG_VOID_RETURN;
> /* Read record that is identified by table->file->ref. */
> (void) table->file->ha_rnd_pos(table->record[1], table->file->ref);
> /* Copy the newly read columns into the new record. */
Regards,
Sergei
Chief Architect MariaDB
and security(a)mariadb.org
2
1
Re: [Maria-developers] c16ad313bc4: MDEV-16849 Extending indexed VARCHAR column should be instantaneous
by Sergei Golubchik 13 Dec '18
by Sergei Golubchik 13 Dec '18
13 Dec '18
Hi, Thirunarayanan!
On Dec 13, Thirunarayanan Balathandayuthapani wrote:
> revision-id: c16ad313bc4 (mariadb-10.2.19-4-gc16ad313bc4)
> parent(s): 7e756437789
> author: Thirunarayanan Balathandayuthapani <thiru(a)mariadb.com>
> committer: Thirunarayanan Balathandayuthapani <thiru(a)mariadb.com>
> timestamp: 2018-11-15 15:14:02 +0530
> message:
>
> MDEV-16849 Extending indexed VARCHAR column should be instantaneous
...
> diff --git a/mysql-test/suite/innodb/r/alter_varchar_change.result b/mysql-test/suite/innodb/r/alter_varchar_change.result
> new file mode 100644
> index 00000000000..5d2c6403aab
> --- /dev/null
> +++ b/mysql-test/suite/innodb/r/alter_varchar_change.result
> @@ -0,0 +1,335 @@
...
> +CREATE TABLE t1(f1 INT NOT NULL,
> +f2 VARCHAR(100),
> +INDEX idx(f2(10)))ENGINE=InnoDB;
> +CALL get_table_id("test/t1", @tbl_id);
> +CALL get_index_id(@tbl_id, "idx", @idx_id);
> +ALTER TABLE t1 MODIFY f2 VARCHAR(200), DROP INDEX idx, ADD INDEX idx(f2(50));
> +CALL get_table_id("test/t1", @tbl1_id);
> +CALL get_index_id(@tbl1_id, "idx", @idx1_id);
> +SELECT @tbl1_id = @tbl_id;
> +@tbl1_id = @tbl_id
> +1
> +SELECT @idx1_id = @idx_id;
> +@idx1_id = @idx_id
> +1
Is that right? Old index would sort values "aaaaaaaaaa1", "aaaaaaaaaa2",
"aaaaaaaaaa3" as equal, so they could be in the index in any order.
But in the new index they aren't equal.
> +CREATE TABLE t1(f1 INT NOT NULL,
> +f2 VARCHAR(100), FULLTEXT idx(f2))ENGINE=InnoDB;
> +CALL get_table_id("test/t1", @tbl_id);
> +CALL get_index_id(@tbl_id, "idx", @idx_id);
> +ALTER TABLE t1 MODIFY f2 VARCHAR(200);
please, add a case where there's a FULLTEXT index, as above,
but ALTER TABLE makes a column shorter.
The index must be rebuilt.
> +CREATE TABLE t1(f1 INT NOT NULL,
> +f2 VARCHAR(100),
> +INDEX idx(f2(10)),
> +INDEX idx1(f1))ENGINE=InnoDB;
> +CALL get_table_id("test/t1", @tbl_id);
> +CALL get_index_id(@tbl_id, "idx", @idx_id);
> +ALTER TABLE t1 MODIFY f2 VARCHAR(200), DROP INDEX idx1;
Please also add a test case where VARCHAR is made shorter, but still
longer than a prefix:
ALTER TABLE t1 MODIFY f2 VARCHAR(50)
The index should not be rebuilt.
And shorter than a prefix:
ALTER TABLE t1 MODIFY f2 VARCHAR(5)
The index must be rebuilt.
Also add a test when ALTER TABLE changes column's charset, or column
type (CHAR/VARCHAR/TEXT). And a test where column's length is changed
over 255. E.g. from 200 to 300.
> diff --git a/sql/sql_table.cc b/sql/sql_table.cc
> --- a/sql/sql_table.cc
> +++ b/sql/sql_table.cc
> @@ -6687,18 +6687,28 @@ static bool fill_alter_inplace_info(THD *thd,
> key_part < end;
> key_part++, new_part++)
> {
> + new_field= get_field_by_index(alter_info, new_part->fieldnr);
> + old_field= table->field[key_part->fieldnr - 1];
> /*
> + If there is a change in index length due to column expansion
> + like varchar(X) changed to varchar(X + N) and has a compatible
> + packed data representation, we mark it for fast/INPLACE change
> + in index definition. InnoDB supports INPLACE for this cases
> +
> Key definition has changed if we are using a different field or
> - if the used key part length is different. It makes sense to
> - check lengths first as in case when fields differ it is likely
> - that lengths differ too and checking fields is more expensive
> - in general case.
> + if the user key part length is different.
> */
> - if (key_part->length != new_part->length)
> + if (key_part->length <= new_part->length &&
> + old_field->pack_length() < new_field->pack_length &&
I still don't understand this condition. Why do you care whether a field
length has changed, while you're should interested to know whether an
index length have changed? Field length changes are completely
irrelevant here.
again, the only possible ALTER_COLUMN_INDEX_LENGTH case:
* key part length was the same as column length, and it was not decreased.
So, your condition should, probably, be something like
if (key_part->length == old_field->pack_length() &&
key_part->length < new_part->length)
> + (key_part->field->is_equal((Create_field*) new_field)
> + == IS_EQUAL_PACK_LENGTH))
> + {
> + ha_alter_info->handler_flags |=
> + Alter_inplace_info::ALTER_COLUMN_INDEX_LENGTH;
> + }
> + else if (key_part->length != new_part->length)
> goto index_changed;
>
> - new_field= get_field_by_index(alter_info, new_part->fieldnr);
> -
> /*
> For prefix keys KEY_PART_INFO::field points to cloned Field
> object with adjusted length. So below we have to check field
Regards,
Sergei
Chief Architect MariaDB
and security(a)mariadb.org
1
0
Re: [Maria-developers] [Commits] 667d178: MDEV-14576 Include full name of object in message about incorrect value for column.
by Sergei Golubchik 12 Dec '18
by Sergei Golubchik 12 Dec '18
12 Dec '18
Hi, Alexey!
Looks ok. I didn't expect make_truncated_value_warning to be used that
much :(
Jusr one comment below:
On Nov 13, Alexey Botchkov wrote:
> revision-id: 667d1786a96a40ee5074c9f1b493c7cf05a85cca (mariadb-10.2.18-80-g667d178)
> parent(s): 59b87e75d04f0ed75256724c78f7fd7af2f96e9b
> committer: Alexey Botchkov
> timestamp: 2018-11-13 00:11:32 +0400
> message:
>
> MDEV-14576 Include full name of object in message about incorrect value for column.
>
> The error message modified.
> Then the TABLE_SHARE::error_table_name() implementation taken from 10.3,
> to be used as a name of the table in this message.
>
> diff --git a/sql/field.cc b/sql/field.cc
> index caa84dc..4cb6eb2 100644
> --- a/sql/field.cc
> +++ b/sql/field.cc
> @@ -2079,7 +2079,7 @@ bool Field_num::get_date(MYSQL_TIME *ltime,ulonglong fuzzydate)
> longlong nr= val_int();
> bool neg= !(flags & UNSIGNED_FLAG) && nr < 0;
> return int_to_datetime_with_warn(neg, neg ? -nr : nr, ltime, fuzzydate,
> - field_name);
> + table->s, field_name);
couldn't you just pass this instead of this->table->s and this->field_name ?
> }
>
>
> diff --git a/sql/sql_class.cc b/sql/sql_class.cc
> index 71d5b80..8f8607d 100644
> --- a/sql/sql_class.cc
> +++ b/sql/sql_class.cc
> @@ -3009,6 +3009,10 @@ int select_export::send_data(List<Item> &items)
> error_pos= copier.most_important_error_pos();
> if (error_pos)
> {
> + /*
> + TODO:
> + add new error message that will show user this printable_buff
> +
> char printable_buff[32];
> convert_to_printable(printable_buff, sizeof(printable_buff),
> error_pos, res->ptr() + res->length() - error_pos,
> @@ -3018,6 +3022,11 @@ int select_export::send_data(List<Item> &items)
> ER_THD(thd, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
> "string", printable_buff,
> item->name, static_cast<long>(row_count));
> + */
> + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
> + ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
> + ER_THD(thd, WARN_DATA_TRUNCATED),
> + item->name, static_cast<long>(row_count));
Why not ER_TRUNCATED_WRONG_VALUE "Truncated incorrect %-.32s value: '%-.128s'" ?
Ah, it has no row number...
> }
> else if (copier.source_end_pos() < res->ptr() + res->length())
> {
Regards,
Sergei
Chief Architect MariaDB
and security(a)mariadb.org
1
0
[Maria-developers] Extending storage engine API for random-row extraction for histogram collection (and others)
by Vicențiu Ciorbaru 11 Dec '18
by Vicențiu Ciorbaru 11 Dec '18
11 Dec '18
Hi!
Here is my proposal on extending the storage engine API to provide a
functionality for retrieving random rows from tables (those that have
indexes). The storage engines for which I plan to implement this are:
MyISAM, Aria, Innodb. Possibly RocksDB, TokuDB.
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -2927,7 +2927,7 @@ class handler :public Sql_alloc
/** Length of ref (1-8 or the clustered key length) */
uint ref_length;
FT_INFO *ft_handler;
- enum init_stat { NONE=0, INDEX, RND };
+ enum init_stat { NONE=0, INDEX, RND, RANDOM };
init_stat inited, pre_inited;
........
+ virtual int ha_random_sample_init() __attribute__((warn_unused_result))
+ {
+ DBUG_ENTER("ha_random_sample_init");
+ inited= RANDOM;
+ DBUG_RETURN(random_sample_init());
+ }
+ virtual int ha_random_sample(uint inx,
+ key_range *min_key,
+ key_range *max_key)
+ __attribute__((warn_unused_result))
+ {
+ DBUG_ENTER("ha_random_sample");
+ DBUG_ASSERT(inited == RANDOM);
+ DBUG_RETURN(random_sample(inx, min_key, max_key));
+ }
+ virtual int ha_random_sample_end() __attribute__((warn_unused_result))
+ {
+ DBUG_ENTER("ha_random_sample_end");
+ inited= NONE;
+ DBUG_RETURN(random_sample_end());
+ }
+
This is the default implementation for a storage engine which does not
support it:
+ virtual int random_sample_init() { return 0; } ;
+ virtual int random_sample(uint idx, key_range *min_key, key_range
*max_key)
+ {
+ return HA_ERR_WRONG_COMMAND;
+ }
+ virtual int random_sample_end() { return 0; };
Alternative ideas: random_sample_init() takes the idx as a parameter and
random_sample just fetches a row from the range using the index previously
specified. The range can be left unspecified with nulls to provide a fetch
from the full table range.
I don't know enough about storage engine internals to know if an index
declaration within the init function instead of within the "sample"
function is better. Maybe I am complicating it too much and a simple
random_sample() function is sufficient, kind of how ha_records_in_range
does it.
Thoughts?
Vicențiu
3
6
Re: [Maria-developers] [Commits] 07f5d03: MDEV-5313 Improving merge audit api.
by Sergei Golubchik 10 Dec '18
by Sergei Golubchik 10 Dec '18
10 Dec '18
Hi, Alexey!
On Dec 03, Alexey Botchkov wrote:
> revision-id: 07f5d036fbb5bdcb011a84f5c882a062d9e609e7 (mariadb-10.4.0-54-g07f5d03)
> parent(s): 3afae13b548c903d86a55530d59fbf7d8666281f
> committer: Alexey Botchkov
> timestamp: 2018-12-03 02:35:52 +0400
> message:
>
> MDEV-5313 Improving merge audit api.
"improving audit api" ?
we don't merge it anymore, right?
> service_json and service_cfg_table interfaces added.
>
> diff --git a/include/mysql/service_cfg_table.h b/include/mysql/service_cfg_table.h
> new file mode 100644
> index 0000000..36660ff
> --- /dev/null
> +++ b/include/mysql/service_cfg_table.h
> @@ -0,0 +1,109 @@
> +/* Copyright (C) 2018 MariaDB Corporation
> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; version 2 of the License.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program; if not, write to the Free Software
> + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
> +
> +#ifndef MYSQL_SERVICE_CFG_TABLE
> +#define MYSQL_SERVICE_CFG_TABLE
> +
> +/**
> + @file
> + cfg table service
> +
> + Reading plugin settings from the server table.
> +
> + This service lets the plugin to read it's settings
> + from the server table.
> + This 'cfg' table has two VARCHAR fields per row
> + which are
> + 'id' - the name of the setting
> + 'value' - the value of the respective 'id' setting
> + The structure 'cfg_table_row' reflects this format for
> + the data interchange with the service.
> + Fuctions of the service:
> + cfg_table_check_exists
> + checks if the table exists.
> + cfg_table_create - creates the table
> + parameers:
> + table_name - the name of the table
> + int id_len, int value_len - the length of the 'id' and 'value'
> + fields.
> + const struct cfg_table_row *defaults - table will be filled
> + with these default rows.
> + The list of rows is ended by {0,0} row
> +
> + cfg_table_set - replaces (inserts if not exists) the setting
> + parameters:
> + table_name - the name of the table
> + const char *id, const char *value - the 'id' and the new value
> +
> + cfg_table_get - reads the cft tale content
> + It return the number of rows in the table or -1 if error.
> + parameters:
> + table_name,
> + struct cfg_table_row *data_buf - buffer to store the records
> + int n_buf_row - the size of the buffer
> +*/
This is, of course, a very much overdue functionality.
I'm not sure it should be limited to key/value pairs.
On the other hand, if it's plugin config only, loaded once on startup -
then it should be fine. Just need to make clear it's not for run-time
data.
In the implementation, make sure that when a table is opened, there are
no other opened/locked tables. This will avoid deadlocks and prevent
plugins from using it from inside another statement.
Also,
* I'd put the table name into pluginname namespace. Not quite sure how
to do it, though.
* Why do you need cfg_table_check_exists and cfg_table_create? A plugin
only needs set and get methods. A table is automatically created on
the first access, a plugin can always assume it exists.
* may be even just one table, plugin_config? with two columns
plugin_name and json_config? plugin gets the json on startup and
that's all. No way for a plugin to access the table directly.
> diff --git a/include/mysql/service_json.h b/include/mysql/service_json.h
> new file mode 100644
> index 0000000..5d0e260
> --- /dev/null
> +++ b/include/mysql/service_json.h
...
> +int js_type(const char *js, const char *js_end,
> + enum js_value_types *vt,
> + const char **v, int *vlen);
> +int js_get_array_item(const char *js, const char *js_end, int n_item,
> + enum js_value_types *vt,
> + const char **v, int *vlen);
> +int js_get_object_key(const char *js, const char *js_end, const char *key,
> + enum js_value_types *vt,
> + const char **v, int *vlen);
> +int js_get_object_nkey(const char *js,const char *js_end, int nkey,
> + const char **keyname, const char **keyname_end,
> + enum js_value_types *vt,
> + const char **v, int *vlen);
I wouldn't introduce ten different apis to access json. Just
json_get(json, path) and json_set(json, path, value). Same path syntax
everywhere, like on SQL level.
Regards,
Sergei
Chief Architect MariaDB
and security(a)mariadb.org
1
0
Hi, Alexey!
It's important for USER_TABLE to be last. And there can be only one last
value in the enum, if I add TABLES_MAX, someone will eventually put new
table just before TABLES_MAX (tests will catch it, but it might take
hours to track down to the actual reason).
So, it's safer to have just one last element in the enum, USER_TABLE.
On Dec 09, Alexey Botchkov wrote:
> Hello, Sergei!
>
> I haven't found anything serious to improve there in your changes.
> sql_acl.cc gets nicer with this patch. So ok to push.
>
> Only question left -
> - TABLES_MAX // <== always the last
> + USER_TABLE // <== always the last
>
> Why not preserve the TABLES_MAX? Seems more self-explaining.
> +static LEX_CSTRING MYSQL_TABLE_NAME[USER_TABLE+1]=
> could be
> +static LEX_CSTRING MYSQL_TABLE_NAME[TABLES_MAX]=
> and a couple of similar places.
>
> Best regards.
> HF
Regards,
Sergei
Chief Architect MariaDB
and security(a)mariadb.org
1
0
Re: [Maria-developers] [Commits] 8bfb140d5dc: Move deletion of old GTID rows to slave background thread
by andrei.elkin@pp.inet.fi 07 Dec '18
by andrei.elkin@pp.inet.fi 07 Dec '18
07 Dec '18
Kristian, hello.
The patch is great and instructive in many ways.
Thanks!
There is something to improve in the test organization, like
to base two tests of
> storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
> storage/tokudb /mysql-test/tokudb_rpl /t/mdev12179.test
on a common parent.
I thought for a second to place it in mysql-test/include/
but again the parent file is so specific that I had to stop it.
This apparently can wait until a third engine shows up and require the
same coverage.
Cheers,
Andrei
> revision-id: 8bfb140d5dc247c183787b8a0a1799cf375845bd (mariadb-10.3.10-25-g8bfb140d5dc)
> parent(s): 74387028a06c557f36a0fd1bbde347f1551c8fb7
> author: Kristian Nielsen
> committer: Kristian Nielsen
> timestamp: 2018-11-25 19:38:33 +0100
> message:
>
> Move deletion of old GTID rows to slave background thread
>
> This patch changes how old rows in mysql.gtid_slave_pos* tables are deleted.
> Instead of doing it as part of every replicated transaction in
> record_gtid(), it is done periodically (every @@gtid_cleanup_batch_size
> transaction) in the slave background thread.
>
> This removes the deletion step from the replication process in SQL or worker
> threads, which could speed up replication with many small transactions. It
> also decreases contention on the global mutex LOCK_slave_state. And it
> simplifies the logic, eg. when a replicated transaction fails after having
> deleted old rows.
>
> With this patch, the deletion of old GTID rows happens asynchroneously and
> slightly non-deterministic. Thus the number of old rows in
> mysql.gtid_slave_pos can temporarily exceed @@gtid_cleanup_batch_size. But
> all old rows will be deleted eventually after sufficiently many new GTIDs
> have been replicated.
>
> ---
> mysql-test/main/mysqld--help.result | 10 +
> mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result | 40 +-
> mysql-test/suite/rpl/r/rpl_gtid_stop_start.result | 8 +-
> .../suite/rpl/r/rpl_parallel_optimistic.result | 14 +-
> mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test | 68 +++-
> .../suite/rpl/t/rpl_parallel_optimistic.test | 42 ++-
> .../sys_vars/r/sysvars_server_notembedded.result | 14 +
> sql/log_event.cc | 6 +-
> sql/mysqld.cc | 1 +
> sql/mysqld.h | 1 +
> sql/rpl_gtid.cc | 413 +++++++++++++--------
> sql/rpl_gtid.h | 12 +-
> sql/rpl_rli.cc | 87 +----
> sql/rpl_rli.h | 11 -
> sql/slave.cc | 35 +-
> sql/slave.h | 1 +
> sql/sys_vars.cc | 13 +
> .../mysql-test/rocksdb_rpl/r/mdev12179.result | 18 +
> .../mysql-test/rocksdb_rpl/t/mdev12179.test | 85 +++++
> .../mysql-test/tokudb_rpl/r/mdev12179.result | 18 +
> .../tokudb/mysql-test/tokudb_rpl/t/mdev12179.test | 85 +++++
> 21 files changed, 675 insertions(+), 307 deletions(-)
>
> diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
> index 5a7153f32d3..4f801ec5275 100644
> --- a/mysql-test/main/mysqld--help.result
> +++ b/mysql-test/main/mysqld--help.result
> @@ -294,6 +294,15 @@ The following specify which files/extra groups are read (specified before remain
> --group-concat-max-len=#
> The maximum length of the result of function
> GROUP_CONCAT()
> + --gtid-cleanup-batch-size=#
> + Normally does not need tuning. How many old rows must
> + accumulate in the mysql.gtid_slave_pos table before a
> + background job will be run to delete them. Can be
> + increased to reduce number of commits if using many
> + different engines with --gtid_pos_auto_engines, or to
> + reduce CPU overhead if using a huge number of different
> + gtid_domain_ids. Can be decreased to reduce number of old
> + rows in the table.
> --gtid-domain-id=# Used with global transaction ID to identify logically
> independent replication streams. When events can
> propagate through multiple parallel paths (for example
> @@ -1425,6 +1434,7 @@ gdb FALSE
> general-log FALSE
> getopt-prefix-matching FALSE
> group-concat-max-len 1048576
> +gtid-cleanup-batch-size 64
> gtid-domain-id 0
> gtid-ignore-duplicates FALSE
> gtid-pos-auto-engines
> diff --git a/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result b/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
> index aaeb0c8f119..55d2831dcf4 100644
> --- a/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
> +++ b/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
> @@ -16,36 +16,32 @@ INSERT INTO t1 VALUES (1);
> connection slave;
> connection slave;
> include/stop_slave.inc
> +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
> +SET GLOBAL gtid_cleanup_batch_size= 2;
> SET @old_dbug= @@GLOBAL.debug_dbug;
> SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete";
> SET sql_log_bin= 0;
> -CALL mtr.add_suppression("Can't find file");
> +CALL mtr.add_suppression("<DEBUG> Error deleting old GTID row");
> SET sql_log_bin= 1;
> include/start_slave.inc
> connection master;
> -INSERT INTO t1 VALUES (2);
> -connection slave;
> -include/wait_for_slave_sql_error.inc [errno=1942]
> -STOP SLAVE IO_THREAD;
> -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
> -ORDER BY domain_id, sub_id DESC LIMIT 1;
> -domain_id server_id seq_no
> -0 1 3
> +connection slave;
> +SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no
> +FROM mysql.gtid_slave_pos;
> +SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count));
> +IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count))
> +OK
> SET GLOBAL debug_dbug= @old_dbug;
> -include/start_slave.inc
> connection master;
> -INSERT INTO t1 VALUES (3);
> -connection slave;
> -connection slave;
> -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
> -ORDER BY domain_id, sub_id DESC LIMIT 1;
> -domain_id server_id seq_no
> -0 1 4
> -SELECT * FROM t1 ORDER BY i;
> -i
> -1
> -2
> -3
> +connection slave;
> +connection slave;
> +SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
> +FROM mysql.gtid_slave_pos
> +WHERE seq_no <= @pre_max_seq_no;
> +IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
> +OK
> connection master;
> DROP TABLE t1;
> +connection slave;
> +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
> include/rpl_end.inc
> diff --git a/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result b/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
> index ff845794c22..b27ffed9f94 100644
> --- a/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
> +++ b/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
> @@ -171,7 +171,7 @@ include/start_slave.inc
> *** MDEV-4692: mysql.gtid_slave_pos accumulates values for a domain ***
> SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
> domain_id COUNT(*)
> -0 2
> +0 3
> 1 2
> connection server_1;
> INSERT INTO t1 VALUES (11);
> @@ -179,7 +179,7 @@ connection server_2;
> FLUSH NO_WRITE_TO_BINLOG TABLES;
> SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
> domain_id COUNT(*)
> -0 2
> +0 4
> 1 2
> include/start_slave.inc
> connection server_1;
> @@ -189,8 +189,8 @@ connection server_2;
> FLUSH NO_WRITE_TO_BINLOG TABLES;
> SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
> domain_id COUNT(*)
> -0 2
> -1 2
> +0 3
> +1 1
> *** MDEV-4650: show variables; ERROR 1946 (HY000): Failed to load replication slave GTID position ***
> connection server_2;
> SET sql_log_bin=0;
> diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
> index ca202a66b0e..83343e52cab 100644
> --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
> +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
> @@ -12,6 +12,8 @@ SET GLOBAL slave_parallel_threads=10;
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
> SET GLOBAL slave_parallel_mode='optimistic';
> +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
> +SET GLOBAL gtid_cleanup_batch_size= 1000000;
> connection server_1;
> INSERT INTO t1 VALUES(1,1);
> BEGIN;
> @@ -131,6 +133,11 @@ c
> 204
> 205
> 206
> +SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
> +FROM mysql.gtid_slave_pos;
> +IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
> +OK
> +SET GLOBAL gtid_cleanup_batch_size=1;
> *** Test @@skip_parallel_replication. ***
> connection server_2;
> include/stop_slave.inc
> @@ -651,9 +658,10 @@ DROP TABLE t1, t2, t3;
> include/save_master_gtid.inc
> connection server_2;
> include/sync_with_master_gtid.inc
> -Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos
> -select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id;
> -count(4) <= 4
> +SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
> +FROM mysql.gtid_slave_pos;
> +COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
> 1
> +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
> connection server_1;
> include/rpl_end.inc
> diff --git a/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test b/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
> index e1f5696f5a1..a28bff3d27a 100644
> --- a/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
> +++ b/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
> @@ -28,37 +28,79 @@ INSERT INTO t1 VALUES (1);
> # Inject an artificial error deleting entries, and check that the error handling code works.
> --connection slave
> --source include/stop_slave.inc
> +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
> +SET GLOBAL gtid_cleanup_batch_size= 2;
> SET @old_dbug= @@GLOBAL.debug_dbug;
> SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete";
> SET sql_log_bin= 0;
> -CALL mtr.add_suppression("Can't find file");
> +CALL mtr.add_suppression("<DEBUG> Error deleting old GTID row");
> SET sql_log_bin= 1;
> --source include/start_slave.inc
>
> --connection master
> -INSERT INTO t1 VALUES (2);
> +--disable_query_log
> +let $i = 20;
> +while ($i) {
> + eval INSERT INTO t1 VALUES ($i+10);
> + dec $i;
> +}
> +--enable_query_log
> +--save_master_pos
>
> --connection slave
> ---let $slave_sql_errno= 1942
> ---source include/wait_for_slave_sql_error.inc
> -STOP SLAVE IO_THREAD;
> -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
> - ORDER BY domain_id, sub_id DESC LIMIT 1;
> +--sync_with_master
> +
> +# Now wait for the slave background thread to try to delete old rows and
> +# hit the error injection.
> +--let _TEST_MYSQLD_ERROR_LOG=$MYSQLTEST_VARDIR/log/mysqld.2.err
> +--perl
> + open F, '<', $ENV{'_TEST_MYSQLD_ERROR_LOG'} or die;
> + outer: while (1) {
> + inner: while (<F>) {
> + last outer if /<DEBUG> Error deleting old GTID row/;
> + }
> + # Easy way to do sub-second sleep without extra modules.
> + select(undef, undef, undef, 0.1);
> + }
> +EOF
> +
> +# Since we injected error in the cleanup code, the rows should remain in
> +# mysql.gtid_slave_pos. Check that we have at least 20 (more robust against
> +# non-deterministic cleanup and future changes than checking for exact number).
> +SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no
> + FROM mysql.gtid_slave_pos;
> +SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count));
> SET GLOBAL debug_dbug= @old_dbug;
> ---source include/start_slave.inc
>
> --connection master
> -INSERT INTO t1 VALUES (3);
> +--disable_query_log
> +let $i = 20;
> +while ($i) {
> + eval INSERT INTO t1 VALUES ($i+40);
> + dec $i;
> +}
> +--enable_query_log
> --sync_slave_with_master
>
> --connection slave
> -SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
> - ORDER BY domain_id, sub_id DESC LIMIT 1;
> -SELECT * FROM t1 ORDER BY i;
> -
> +# Now check that 1) rows are being deleted again after removing error
> +# injection, and 2) old rows are left that failed their delete while errors
> +# where injected (again compensating for non-deterministic deletion).
> +# Deletion is async and slightly non-deterministic, so we wait for at
> +# least 10 of the 20 new rows to be deleted.
> +let $wait_condition=
> + SELECT COUNT(*) <= 20-10
> + FROM mysql.gtid_slave_pos
> + WHERE seq_no > @pre_max_seq_no;
> +--source include/wait_condition.inc
> +SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
> + FROM mysql.gtid_slave_pos
> + WHERE seq_no <= @pre_max_seq_no;
>
> # Clean up
> --connection master
> DROP TABLE t1;
> +--connection slave
> +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
>
> --source include/rpl_end.inc
> diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
> index e08472d5f51..0060cf4416c 100644
> --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
> +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
> @@ -21,6 +21,10 @@ SET GLOBAL slave_parallel_threads=10;
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
> SET GLOBAL slave_parallel_mode='optimistic';
> +# Run the first part of the test with high batch size and see that
> +# old rows remain in the table.
> +SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
> +SET GLOBAL gtid_cleanup_batch_size= 1000000;
>
>
> --connection server_1
> @@ -108,7 +112,12 @@ SELECT * FROM t3 ORDER BY c;
> SELECT * FROM t1 ORDER BY a;
> SELECT * FROM t2 ORDER BY a;
> SELECT * FROM t3 ORDER BY c;
> -#SHOW STATUS LIKE 'Slave_retried_transactions';
> +# Check that we have a bunch of old rows left-over - they were not deleted
> +# due to high @@gtid_cleanup_batch_size. Then set a low
> +# @@gtid_cleanup_batch_size so we can test that rows start being deleted.
> +SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
> + FROM mysql.gtid_slave_pos;
> +SET GLOBAL gtid_cleanup_batch_size=1;
>
>
> --echo *** Test @@skip_parallel_replication. ***
> @@ -557,25 +566,18 @@ DROP TABLE t1, t2, t3;
>
> --connection server_2
> --source include/sync_with_master_gtid.inc
> -# Check for left-over rows in table mysql.gtid_slave_pos (MDEV-12147).
> -#
> -# There was a bug when a transaction got a conflict and was rolled back. It
> -# might have also handled deletion of some old rows, and these deletions would
> -# then also be rolled back. And since the deletes were never re-tried, old no
> -# longer needed rows would accumulate in the table without limit.
> -#
> -# The earlier part of this test file have plenty of transactions being rolled
> -# back. But the last DROP TABLE statement runs on its own and should never
> -# conflict, thus at this point the mysql.gtid_slave_pos table should be clean.
> -#
> -# To support @@gtid_pos_auto_engines, when a row is inserted in the table, it
> -# is associated with the engine of the table at insertion time, and it will
> -# only be deleted during record_gtid from a table of the same engine. Since we
> -# alter the table from MyISAM to InnoDB at the start of this test, we should
> -# end up with 4 rows: two left-over from when the table was MyISAM, and two
> -# left-over from the InnoDB part.
> ---echo Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos
> -select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id;
> +# Check that old rows are deleted from mysql.gtid_slave_pos.
> +# Deletion is asynchronous, so use wait_condition.inc.
> +# Also, there is a small amount of non-determinism in the deletion of old
> +# rows, so it is not guaranteed that there can never be more than
> +# @@gtid_cleanup_batch_size rows in the table; so allow a bit of slack
> +# here.
> +let $wait_condition=
> + SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
> + FROM mysql.gtid_slave_pos;
> +--source include/wait_condition.inc
> +eval $wait_condition;
> +SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
>
> --connection server_1
> --source include/rpl_end.inc
> diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> index e8e4d671eb9..5c5ca8b66b2 100644
> --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
> @@ -1202,6 +1202,20 @@ NUMERIC_BLOCK_SIZE NULL
> ENUM_VALUE_LIST NULL
> READ_ONLY NO
> COMMAND_LINE_ARGUMENT NULL
> +VARIABLE_NAME GTID_CLEANUP_BATCH_SIZE
> +SESSION_VALUE NULL
> +GLOBAL_VALUE 64
> +GLOBAL_VALUE_ORIGIN COMPILE-TIME
> +DEFAULT_VALUE 64
> +VARIABLE_SCOPE GLOBAL
> +VARIABLE_TYPE INT UNSIGNED
> +VARIABLE_COMMENT Normally does not need tuning. How many old rows must accumulate in the mysql.gtid_slave_pos table before a background job will be run to delete them. Can be increased to reduce number of commits if using many different engines with --gtid_pos_auto_engines, or to reduce CPU overhead if using a huge number of different gtid_domain_ids. Can be decreased to reduce number of old rows in the table.
> +NUMERIC_MIN_VALUE 0
> +NUMERIC_MAX_VALUE 2147483647
> +NUMERIC_BLOCK_SIZE 1
> +ENUM_VALUE_LIST NULL
> +READ_ONLY NO
> +COMMAND_LINE_ARGUMENT REQUIRED
> VARIABLE_NAME GTID_CURRENT_POS
> SESSION_VALUE NULL
> GLOBAL_VALUE
> diff --git a/sql/log_event.cc b/sql/log_event.cc
> index 8813d20578e..e10480fb015 100644
> --- a/sql/log_event.cc
> +++ b/sql/log_event.cc
> @@ -5565,7 +5565,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi,
> gtid= rgi->current_gtid;
> if (unlikely(rpl_global_gtid_slave_state->record_gtid(thd, >id,
> sub_id,
> - rgi, false,
> + true, false,
> &hton)))
> {
> int errcode= thd->get_stmt_da()->sql_errno();
> @@ -8362,7 +8362,7 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi)
> {
> if ((ret= rpl_global_gtid_slave_state->record_gtid(thd, &list[i],
> sub_id_list[i],
> - NULL, false, &hton)))
> + false, false, &hton)))
> return ret;
> rpl_global_gtid_slave_state->update_state_hash(sub_id_list[i], &list[i],
> hton, NULL);
> @@ -8899,7 +8899,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
> rgi->gtid_pending= false;
>
> gtid= rgi->current_gtid;
> - err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, rgi,
> + err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, true,
> false, &hton);
> if (unlikely(err))
> {
> diff --git a/sql/mysqld.cc b/sql/mysqld.cc
> index afef4a5f52c..07bdd66f74c 100644
> --- a/sql/mysqld.cc
> +++ b/sql/mysqld.cc
> @@ -580,6 +580,7 @@ ulong opt_binlog_commit_wait_count= 0;
> ulong opt_binlog_commit_wait_usec= 0;
> ulong opt_slave_parallel_max_queued= 131072;
> my_bool opt_gtid_ignore_duplicates= FALSE;
> +uint opt_gtid_cleanup_batch_size= 64;
>
> const double log_10[] = {
> 1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009,
> diff --git a/sql/mysqld.h b/sql/mysqld.h
> index d5cabd790b2..261748372f9 100644
> --- a/sql/mysqld.h
> +++ b/sql/mysqld.h
> @@ -258,6 +258,7 @@ extern ulong opt_slave_parallel_mode;
> extern ulong opt_binlog_commit_wait_count;
> extern ulong opt_binlog_commit_wait_usec;
> extern my_bool opt_gtid_ignore_duplicates;
> +extern uint opt_gtid_cleanup_batch_size;
> extern ulong back_log;
> extern ulong executed_events;
> extern char language[FN_REFLEN];
> diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc
> index fabd09adaa7..196c2fe3d16 100644
> --- a/sql/rpl_gtid.cc
> +++ b/sql/rpl_gtid.cc
> @@ -79,7 +79,7 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi)
> rgi->gtid_pending= false;
> if (rgi->gtid_ignore_duplicate_state!=rpl_group_info::GTID_DUPLICATE_IGNORE)
> {
> - if (record_gtid(thd, &rgi->current_gtid, sub_id, NULL, false, &hton))
> + if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false, &hton))
> DBUG_RETURN(1);
> update_state_hash(sub_id, &rgi->current_gtid, hton, rgi);
> }
> @@ -244,7 +244,7 @@ rpl_slave_state_free_element(void *arg)
>
>
> rpl_slave_state::rpl_slave_state()
> - : last_sub_id(0), gtid_pos_tables(0), loaded(false)
> + : pending_gtid_count(0), last_sub_id(0), gtid_pos_tables(0), loaded(false)
> {
> mysql_mutex_init(key_LOCK_slave_state, &LOCK_slave_state,
> MY_MUTEX_INIT_SLOW);
> @@ -331,14 +331,11 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
> }
> }
> rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL;
> -
> -#ifdef HAVE_REPLICATION
> - rgi->pending_gtid_deletes_clear();
> -#endif
> }
>
> if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME))))
> return 1;
> + list_elem->domain_id= domain_id;
> list_elem->server_id= server_id;
> list_elem->sub_id= sub_id;
> list_elem->seq_no= seq_no;
> @@ -348,6 +345,15 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
> if (last_sub_id < sub_id)
> last_sub_id= sub_id;
>
> +#ifdef HAVE_REPLICATION
> + ++pending_gtid_count;
> + if (pending_gtid_count >= opt_gtid_cleanup_batch_size)
> + {
> + pending_gtid_count = 0;
> + slave_background_gtid_pending_delete_request();
> + }
> +#endif
> +
> return 0;
> }
>
> @@ -382,20 +388,22 @@ rpl_slave_state::get_element(uint32 domain_id)
>
>
> int
> -rpl_slave_state::put_back_list(uint32 domain_id, list_element *list)
> +rpl_slave_state::put_back_list(list_element *list)
> {
> - element *e;
> + element *e= NULL;
> int err= 0;
>
> mysql_mutex_lock(&LOCK_slave_state);
> - if (!(e= (element *)my_hash_search(&hash, (const uchar *)&domain_id, 0)))
> - {
> - err= 1;
> - goto end;
> - }
> while (list)
> {
> list_element *next= list->next;
> +
> + if ((!e || e->domain_id != list->domain_id) &&
> + !(e= (element *)my_hash_search(&hash, (const uchar *)&list->domain_id, 0)))
> + {
> + err= 1;
> + goto end;
> + }
> e->add(list);
> list= next;
> }
> @@ -572,12 +580,12 @@ rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename)
> /*
> Write a gtid to the replication slave state table.
>
> + Do it as part of the transaction, to get slave crash safety, or as a separate
> + transaction if !in_transaction (eg. MyISAM or DDL).
> +
> gtid The global transaction id for this event group.
> sub_id Value allocated within the sub_id when the event group was
> read (sub_id must be consistent with commit order in master binlog).
> - rgi rpl_group_info context, if we are recording the gtid transactionally
> - as part of replicating a transactional event. NULL if called from
> - outside of a replicated transaction.
>
> Note that caller must later ensure that the new gtid and sub_id is inserted
> into the appropriate HASH element with rpl_slave_state.add(), so that it can
> @@ -585,16 +593,13 @@ rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename)
> */
> int
> rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
> - rpl_group_info *rgi, bool in_statement,
> + bool in_transaction, bool in_statement,
> void **out_hton)
> {
> TABLE_LIST tlist;
> int err= 0, not_sql_thread;
> bool table_opened= false;
> TABLE *table;
> - list_element *delete_list= 0, *next, *cur, **next_ptr_ptr, **best_ptr_ptr;
> - uint64 best_sub_id;
> - element *elem;
> ulonglong thd_saved_option= thd->variables.option_bits;
> Query_tables_list lex_backup;
> wait_for_commit* suspended_wfc;
> @@ -684,7 +689,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
> thd->wsrep_ignore_table= true;
> #endif
>
> - if (!rgi)
> + if (!in_transaction)
> {
> DBUG_PRINT("info", ("resetting OPTION_BEGIN"));
> thd->variables.option_bits&=
> @@ -716,168 +721,280 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
> my_error(ER_OUT_OF_RESOURCES, MYF(0));
> goto end;
> }
> +end:
>
> - mysql_mutex_lock(&LOCK_slave_state);
> - if ((elem= get_element(gtid->domain_id)) == NULL)
> +#ifdef WITH_WSREP
> + thd->wsrep_ignore_table= false;
> +#endif
> +
> + if (table_opened)
> {
> - mysql_mutex_unlock(&LOCK_slave_state);
> - my_error(ER_OUT_OF_RESOURCES, MYF(0));
> - err= 1;
> - goto end;
> + if (err || (err= ha_commit_trans(thd, FALSE)))
> + ha_rollback_trans(thd, FALSE);
> +
> + close_thread_tables(thd);
> + if (in_transaction)
> + thd->mdl_context.release_statement_locks();
> + else
> + thd->mdl_context.release_transactional_locks();
> }
> + thd->lex->restore_backup_query_tables_list(&lex_backup);
> + thd->variables.option_bits= thd_saved_option;
> + thd->resume_subsequent_commits(suspended_wfc);
> + DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep",
> + {
> + if (gtid->server_id == 100)
> + my_sleep(500000);
> + });
> + DBUG_RETURN(err);
> +}
>
> - /* Now pull out all GTIDs that were recorded in this engine. */
> - delete_list = NULL;
> - next_ptr_ptr= &elem->list;
> - cur= elem->list;
> - best_sub_id= 0;
> - best_ptr_ptr= NULL;
> - while (cur)
> +
> +/*
> + Return a list of all old GTIDs in any mysql.gtid_slave_pos* table that are
> + no longer needed and can be deleted from the table.
> +
> + Within each domain, we need to keep around the latest GTID (the one with the
> + highest sub_id), but any others in that domain can be deleted.
> +*/
> +rpl_slave_state::list_element *
> +rpl_slave_state::gtid_grab_pending_delete_list()
> +{
> + uint32 i;
> + list_element *full_list;
> +
> + mysql_mutex_lock(&LOCK_slave_state);
> + full_list= NULL;
> + for (i= 0; i < hash.records; ++i)
> {
> - list_element *next= cur->next;
> - if (cur->hton == hton)
> - {
> - /* Belongs to same engine, so move it to the delete list. */
> - cur->next= delete_list;
> - delete_list= cur;
> - if (cur->sub_id > best_sub_id)
> + element *elem= (element *)my_hash_element(&hash, i);
> + list_element *elist= elem->list;
> + list_element *last_elem, **best_ptr_ptr, *cur, *next;
> + uint64 best_sub_id;
> +
> + if (!elist)
> + continue; /* Nothing here */
> +
> + /* Delete any old stuff, but keep around the most recent one. */
> + cur= elist;
> + best_sub_id= cur->sub_id;
> + best_ptr_ptr= &elist;
> + last_elem= cur;
> + while ((next= cur->next)) {
> + last_elem= next;
> + if (next->sub_id > best_sub_id)
> {
> - best_sub_id= cur->sub_id;
> - best_ptr_ptr= &delete_list;
> - }
> - else if (best_ptr_ptr == &delete_list)
> + best_sub_id= next->sub_id;
> best_ptr_ptr= &cur->next;
> - }
> - else
> - {
> - /* Another engine, leave it in the list. */
> - if (cur->sub_id > best_sub_id)
> - {
> - best_sub_id= cur->sub_id;
> - /* Current best is not on the delete list. */
> - best_ptr_ptr= NULL;
> }
> - *next_ptr_ptr= cur;
> - next_ptr_ptr= &cur->next;
> + cur= next;
> }
> - cur= next;
> - }
> - *next_ptr_ptr= NULL;
> - /*
> - If the highest sub_id element is on the delete list, put it back on the
> - original list, to preserve the highest sub_id element in the table for
> - GTID position recovery.
> - */
> - if (best_ptr_ptr)
> - {
> + /*
> + Append the new elements to the full list. Note the order is important;
> + we do it here so that we do not break the list if best_sub_id is the
> + last of the new elements.
> + */
> + last_elem->next= full_list;
> + /*
> + Delete the highest sub_id element from the old list, and put it back as
> + the single-element new list.
> + */
> cur= *best_ptr_ptr;
> *best_ptr_ptr= cur->next;
> - cur->next= elem->list;
> + cur->next= NULL;
> elem->list= cur;
> +
> + /*
> + Collect the full list so far here. Note that elist may have moved if we
> + deleted the first element, so order is again important.
> + */
> + full_list= elist;
> }
> mysql_mutex_unlock(&LOCK_slave_state);
>
> - if (!delete_list)
> - goto end;
> + return full_list;
> +}
> +
>
> - /* Now delete any already committed GTIDs. */
> - bitmap_set_bit(table->read_set, table->field[0]->field_index);
> - bitmap_set_bit(table->read_set, table->field[1]->field_index);
> +/* Find the mysql.gtid_slave_posXXX table associated with a given hton. */
> +LEX_CSTRING *
> +rpl_slave_state::select_gtid_pos_table(void *hton)
> +{
> + struct gtid_pos_table *table_entry;
>
> - if ((err= table->file->ha_index_init(0, 0)))
> + /*
> + See comments on rpl_slave_state::gtid_pos_tables for rules around proper
> + access to the list.
> + */
> + table_entry= (struct gtid_pos_table *)
> + my_atomic_loadptr_explicit(>id_pos_tables, MY_MEMORY_ORDER_ACQUIRE);
> +
> + while (table_entry)
> {
> - table->file->print_error(err, MYF(0));
> - goto end;
> + if (table_entry->table_hton == hton)
> + {
> + if (likely(table_entry->state == GTID_POS_AVAILABLE))
> + return &table_entry->table_name;
> + }
> + table_entry= table_entry->next;
> }
> - cur = delete_list;
> - while (cur)
> - {
> - uchar key_buffer[4+8];
>
> - DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete",
> - { err= ENOENT;
> - table->file->print_error(err, MYF(0));
> - /* `break' does not work inside DBUG_EXECUTE_IF */
> - goto dbug_break; });
> + table_entry= (struct gtid_pos_table *)
> + my_atomic_loadptr_explicit(&default_gtid_pos_table, MY_MEMORY_ORDER_ACQUIRE);
> + return &table_entry->table_name;
> +}
>
> - next= cur->next;
>
> - table->field[1]->store(cur->sub_id, true);
> - /* domain_id is already set in table->record[0] from write_row() above. */
> - key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false);
> - if (table->file->ha_index_read_map(table->record[1], key_buffer,
> - HA_WHOLE_KEY, HA_READ_KEY_EXACT))
> - /* We cannot find the row, assume it is already deleted. */
> - ;
> - else if ((err= table->file->ha_delete_row(table->record[1])))
> - table->file->print_error(err, MYF(0));
> - /*
> - In case of error, we still discard the element from the list. We do
> - not want to endlessly error on the same element in case of table
> - corruption or such.
> - */
> - cur= next;
> - if (err)
> - break;
> - }
> -IF_DBUG(dbug_break:, )
> - table->file->ha_index_end();
> +void
> +rpl_slave_state::gtid_delete_pending(THD *thd,
> + rpl_slave_state::list_element **list_ptr)
> +{
> + int err= 0;
> + ulonglong thd_saved_option;
>
> -end:
> + if (unlikely(!loaded))
> + return;
>
> #ifdef WITH_WSREP
> - thd->wsrep_ignore_table= false;
> + /*
> + Updates in slave state table should not be appended to galera transaction
> + writeset.
> + */
> + thd->wsrep_ignore_table= true;
> #endif
>
> - if (table_opened)
> + thd_saved_option= thd->variables.option_bits;
> + thd->variables.option_bits&=
> + ~(ulonglong)(OPTION_NOT_AUTOCOMMIT |OPTION_BEGIN |OPTION_BIN_LOG |
> + OPTION_GTID_BEGIN);
> +
> + while (*list_ptr)
> {
> - if (err || (err= ha_commit_trans(thd, FALSE)))
> - {
> - /*
> - If error, we need to put any remaining delete_list back into the HASH
> - so we can do another delete attempt later.
> - */
> - if (delete_list)
> - {
> - put_back_list(gtid->domain_id, delete_list);
> - delete_list = 0;
> - }
> + LEX_CSTRING *gtid_pos_table_name, *tmp_table_name;
> + Query_tables_list lex_backup;
> + TABLE_LIST tlist;
> + TABLE *table;
> + handler::Table_flags direct_pos;
> + list_element *cur, **cur_ptr_ptr;
> + bool table_opened= false;
> + void *hton= (*list_ptr)->hton;
>
> - ha_rollback_trans(thd, FALSE);
> + thd->reset_for_next_command();
> +
> + /*
> + Only the SQL thread can call select_gtid_pos_table without a mutex
> + Other threads needs to use a mutex and take into account that the
> + result may change during execution, so we have to make a copy.
> + */
> + mysql_mutex_lock(&LOCK_slave_state);
> + tmp_table_name= select_gtid_pos_table(hton);
> + gtid_pos_table_name= thd->make_clex_string(tmp_table_name->str,
> + tmp_table_name->length);
> + mysql_mutex_unlock(&LOCK_slave_state);
> + if (!gtid_pos_table_name)
> + {
> + /* Out of memory - we can try again later. */
> + break;
> }
> - close_thread_tables(thd);
> - if (rgi)
> +
> + thd->lex->reset_n_backup_query_tables_list(&lex_backup);
> + tlist.init_one_table(&MYSQL_SCHEMA_NAME, gtid_pos_table_name, NULL, TL_WRITE);
> + if ((err= open_and_lock_tables(thd, &tlist, FALSE, 0)))
> + goto end;
> + table_opened= true;
> + table= tlist.table;
> +
> + if ((err= gtid_check_rpl_slave_state_table(table)))
> + goto end;
> +
> + direct_pos= table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION;
> + bitmap_set_all(table->write_set);
> + table->rpl_write_set= table->write_set;
> +
> + /* Now delete any already committed GTIDs. */
> + bitmap_set_bit(table->read_set, table->field[0]->field_index);
> + bitmap_set_bit(table->read_set, table->field[1]->field_index);
> +
> + if (!direct_pos && (err= table->file->ha_index_init(0, 0)))
> {
> - thd->mdl_context.release_statement_locks();
> - /*
> - Save the list of old gtid entries we deleted. If this transaction
> - fails later for some reason and is rolled back, the deletion of those
> - entries will be rolled back as well, and we will need to put them back
> - on the to-be-deleted list so we can re-do the deletion. Otherwise
> - redundant rows in mysql.gtid_slave_pos may accumulate if transactions
> - are rolled back and retried after record_gtid().
> - */
> -#ifdef HAVE_REPLICATION
> - rgi->pending_gtid_deletes_save(gtid->domain_id, delete_list);
> -#endif
> + table->file->print_error(err, MYF(0));
> + goto end;
> }
> - else
> +
> + cur = *list_ptr;
> + cur_ptr_ptr = list_ptr;
> + do
> {
> - thd->mdl_context.release_transactional_locks();
> -#ifdef HAVE_REPLICATION
> - rpl_group_info::pending_gtid_deletes_free(delete_list);
> -#endif
> + uchar key_buffer[4+8];
> + list_element *next= cur->next;
> +
> + if (cur->hton == hton)
> + {
> + int res;
> +
> + table->field[0]->store((ulonglong)cur->domain_id, true);
> + table->field[1]->store(cur->sub_id, true);
> + if (direct_pos)
> + {
> + res= table->file->ha_rnd_pos_by_record(table->record[0]);
> + }
> + else
> + {
> + key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false);
> + res= table->file->ha_index_read_map(table->record[0], key_buffer,
> + HA_WHOLE_KEY, HA_READ_KEY_EXACT);
> + }
> + DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete",
> + { res= 1;
> + err= ENOENT;
> + sql_print_error("<DEBUG> Error deleting old GTID row");
> + });
> + if (res)
> + /* We cannot find the row, assume it is already deleted. */
> + ;
> + else if ((err= table->file->ha_delete_row(table->record[0])))
> + {
> + sql_print_error("Error deleting old GTID row: %s",
> + thd->get_stmt_da()->message());
> + /*
> + In case of error, we still discard the element from the list. We do
> + not want to endlessly error on the same element in case of table
> + corruption or such.
> + */
> + }
> + *cur_ptr_ptr= next;
> + my_free(cur);
> + }
> + else
> + {
> + /* Leave this one in the list until we get to the table for its hton. */
> + cur_ptr_ptr= &cur->next;
> + }
> + cur= next;
> + if (err)
> + break;
> + } while (cur);
> +end:
> + if (table_opened)
> + {
> + if (!direct_pos)
> + table->file->ha_index_end();
> +
> + if (err || (err= ha_commit_trans(thd, FALSE)))
> + ha_rollback_trans(thd, FALSE);
> }
> + close_thread_tables(thd);
> + thd->mdl_context.release_transactional_locks();
> + thd->lex->restore_backup_query_tables_list(&lex_backup);
> +
> + if (err)
> + break;
> }
> - thd->lex->restore_backup_query_tables_list(&lex_backup);
> thd->variables.option_bits= thd_saved_option;
> - thd->resume_subsequent_commits(suspended_wfc);
> - DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep",
> - {
> - if (gtid->server_id == 100)
> - my_sleep(500000);
> - });
> - DBUG_RETURN(err);
> +
> +#ifdef WITH_WSREP
> + thd->wsrep_ignore_table= false;
> +#endif
> }
>
>
> @@ -1251,7 +1368,7 @@ rpl_slave_state::load(THD *thd, const char *state_from_master, size_t len,
>
> if (gtid_parser_helper(&state_from_master, end, >id) ||
> !(sub_id= next_sub_id(gtid.domain_id)) ||
> - record_gtid(thd, >id, sub_id, NULL, in_statement, &hton) ||
> + record_gtid(thd, >id, sub_id, false, in_statement, &hton) ||
> update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, hton, NULL))
> return 1;
> if (state_from_master == end)
> diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h
> index 0fc92d5e33c..60d822f7b0d 100644
> --- a/sql/rpl_gtid.h
> +++ b/sql/rpl_gtid.h
> @@ -118,8 +118,9 @@ struct rpl_slave_state
> {
> struct list_element *next;
> uint64 sub_id;
> - uint64 seq_no;
> + uint32 domain_id;
> uint32 server_id;
> + uint64 seq_no;
> /*
> hton of mysql.gtid_slave_pos* table used to record this GTID.
> Can be NULL if the gtid table failed to load (eg. missing
> @@ -191,6 +192,8 @@ struct rpl_slave_state
>
> /* Mapping from domain_id to its element. */
> HASH hash;
> + /* GTIDs added since last purge of old mysql.gtid_slave_pos rows. */
> + uint32 pending_gtid_count;
> /* Mutex protecting access to the state. */
> mysql_mutex_t LOCK_slave_state;
> /* Auxiliary buffer to sort gtid list. */
> @@ -233,7 +236,10 @@ struct rpl_slave_state
> int truncate_state_table(THD *thd);
> void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename);
> int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
> - rpl_group_info *rgi, bool in_statement, void **out_hton);
> + bool in_transaction, bool in_statement, void **out_hton);
> + list_element *gtid_grab_pending_delete_list();
> + LEX_CSTRING *select_gtid_pos_table(void *hton);
> + void gtid_delete_pending(THD *thd, rpl_slave_state::list_element **list_ptr);
> uint64 next_sub_id(uint32 domain_id);
> int iterate(int (*cb)(rpl_gtid *, void *), void *data,
> rpl_gtid *extra_gtids, uint32 num_extra,
> @@ -245,7 +251,7 @@ struct rpl_slave_state
> bool is_empty();
>
> element *get_element(uint32 domain_id);
> - int put_back_list(uint32 domain_id, list_element *list);
> + int put_back_list(list_element *list);
>
> void update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton,
> rpl_group_info *rgi);
> diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
> index b275ad884bd..2d91620c898 100644
> --- a/sql/rpl_rli.cc
> +++ b/sql/rpl_rli.cc
> @@ -1820,6 +1820,7 @@ rpl_load_gtid_slave_state(THD *thd)
> int err= 0;
> uint32 i;
> load_gtid_state_cb_data cb_data;
> + rpl_slave_state::list_element *old_gtids_list;
> DBUG_ENTER("rpl_load_gtid_slave_state");
>
> mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
> @@ -1905,6 +1906,13 @@ rpl_load_gtid_slave_state(THD *thd)
> rpl_global_gtid_slave_state->loaded= true;
> mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
>
> + /* Clear out no longer needed elements now. */
> + old_gtids_list=
> + rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
> + rpl_global_gtid_slave_state->gtid_delete_pending(thd, &old_gtids_list);
> + if (old_gtids_list)
> + rpl_global_gtid_slave_state->put_back_list(old_gtids_list);
> +
> end:
> if (array_inited)
> delete_dynamic(&array);
> @@ -2086,7 +2094,6 @@ rpl_group_info::reinit(Relay_log_info *rli)
> long_find_row_note_printed= false;
> did_mark_start_commit= false;
> gtid_ev_flags2= 0;
> - pending_gtid_delete_list= NULL;
> last_master_timestamp = 0;
> gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL;
> speculation= SPECULATE_NO;
> @@ -2217,12 +2224,6 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
> erroneously update the GTID position.
> */
> gtid_pending= false;
> -
> - /*
> - Rollback will have undone any deletions of old rows we might have made
> - in mysql.gtid_slave_pos. Put those rows back on the list to be deleted.
> - */
> - pending_gtid_deletes_put_back();
> }
> m_table_map.clear_tables();
> slave_close_thread_tables(thd);
> @@ -2448,78 +2449,6 @@ rpl_group_info::unmark_start_commit()
> }
>
>
> -/*
> - When record_gtid() has deleted any old rows from the table
> - mysql.gtid_slave_pos as part of a replicated transaction, save the list of
> - rows deleted here.
> -
> - If later the transaction fails (eg. optimistic parallel replication), the
> - deletes will be undone when the transaction is rolled back. Then we can
> - put back the list of rows into the rpl_global_gtid_slave_state, so that
> - we can re-do the deletes and avoid accumulating old rows in the table.
> -*/
> -void
> -rpl_group_info::pending_gtid_deletes_save(uint32 domain_id,
> - rpl_slave_state::list_element *list)
> -{
> - /*
> - We should never get to a state where we try to save a new pending list of
> - gtid deletes while we still have an old one. But make sure we handle it
> - anyway just in case, so we avoid leaving stray entries in the
> - mysql.gtid_slave_pos table.
> - */
> - DBUG_ASSERT(!pending_gtid_delete_list);
> - if (unlikely(pending_gtid_delete_list))
> - pending_gtid_deletes_put_back();
> -
> - pending_gtid_delete_list= list;
> - pending_gtid_delete_list_domain= domain_id;
> -}
> -
> -
> -/*
> - Take the list recorded by pending_gtid_deletes_save() and put it back into
> - rpl_global_gtid_slave_state. This is needed if deletion of the rows was
> - rolled back due to transaction failure.
> -*/
> -void
> -rpl_group_info::pending_gtid_deletes_put_back()
> -{
> - if (pending_gtid_delete_list)
> - {
> - rpl_global_gtid_slave_state->put_back_list(pending_gtid_delete_list_domain,
> - pending_gtid_delete_list);
> - pending_gtid_delete_list= NULL;
> - }
> -}
> -
> -
> -/*
> - Free the list recorded by pending_gtid_deletes_save(). Done when the deletes
> - in the list have been permanently committed.
> -*/
> -void
> -rpl_group_info::pending_gtid_deletes_clear()
> -{
> - pending_gtid_deletes_free(pending_gtid_delete_list);
> - pending_gtid_delete_list= NULL;
> -}
> -
> -
> -void
> -rpl_group_info::pending_gtid_deletes_free(rpl_slave_state::list_element *list)
> -{
> - rpl_slave_state::list_element *next;
> -
> - while (list)
> - {
> - next= list->next;
> - my_free(list);
> - list= next;
> - }
> -}
> -
> -
> rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter)
> : rpl_filter(filter)
> {
> diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
> index d9f0e0e5d3b..b8b153c34be 100644
> --- a/sql/rpl_rli.h
> +++ b/sql/rpl_rli.h
> @@ -757,11 +757,6 @@ struct rpl_group_info
> /* Needs room for "Gtid D-S-N\x00". */
> char gtid_info_buf[5+10+1+10+1+20+1];
>
> - /* List of not yet committed deletions in mysql.gtid_slave_pos. */
> - rpl_slave_state::list_element *pending_gtid_delete_list;
> - /* Domain associated with pending_gtid_delete_list. */
> - uint32 pending_gtid_delete_list_domain;
> -
> /*
> The timestamp, from the master, of the commit event.
> Used to do delayed update of rli->last_master_timestamp, for getting
> @@ -903,12 +898,6 @@ struct rpl_group_info
> char *gtid_info();
> void unmark_start_commit();
>
> - static void pending_gtid_deletes_free(rpl_slave_state::list_element *list);
> - void pending_gtid_deletes_save(uint32 domain_id,
> - rpl_slave_state::list_element *list);
> - void pending_gtid_deletes_put_back();
> - void pending_gtid_deletes_clear();
> -
> longlong get_row_stmt_start_timestamp()
> {
> return row_stmt_start_timestamp;
> diff --git a/sql/slave.cc b/sql/slave.cc
> index bb1300d36e6..f8499513dd6 100644
> --- a/sql/slave.cc
> +++ b/sql/slave.cc
> @@ -465,6 +465,8 @@ static struct slave_background_gtid_pos_create_t {
> void *hton;
> } *slave_background_gtid_pos_create_list;
>
> +static volatile bool slave_background_gtid_pending_delete_flag;
> +
>
> pthread_handler_t
> handle_slave_background(void *arg __attribute__((unused)))
> @@ -499,6 +501,7 @@ handle_slave_background(void *arg __attribute__((unused)))
> {
> slave_background_kill_t *kill_list;
> slave_background_gtid_pos_create_t *create_list;
> + bool pending_deletes;
>
> thd->ENTER_COND(&COND_slave_background, &LOCK_slave_background,
> &stage_slave_background_wait_request,
> @@ -508,13 +511,15 @@ handle_slave_background(void *arg __attribute__((unused)))
> stop= abort_loop || thd->killed || slave_background_thread_stop;
> kill_list= slave_background_kill_list;
> create_list= slave_background_gtid_pos_create_list;
> - if (stop || kill_list || create_list)
> + pending_deletes= slave_background_gtid_pending_delete_flag;
> + if (stop || kill_list || create_list || pending_deletes)
> break;
> mysql_cond_wait(&COND_slave_background, &LOCK_slave_background);
> }
>
> slave_background_kill_list= NULL;
> slave_background_gtid_pos_create_list= NULL;
> + slave_background_gtid_pending_delete_flag= false;
> thd->EXIT_COND(&old_stage);
>
> while (kill_list)
> @@ -541,6 +546,17 @@ handle_slave_background(void *arg __attribute__((unused)))
> create_list= next;
> }
>
> + if (pending_deletes)
> + {
> + rpl_slave_state::list_element *list;
> +
> + slave_background_gtid_pending_delete_flag= false;
> + list= rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
> + rpl_global_gtid_slave_state->gtid_delete_pending(thd, &list);
> + if (list)
> + rpl_global_gtid_slave_state->put_back_list(list);
> + }
> +
> mysql_mutex_lock(&LOCK_slave_background);
> } while (!stop);
>
> @@ -615,6 +631,23 @@ slave_background_gtid_pos_create_request(
>
>
> /*
> + Request the slave background thread to delete no longer used rows from the
> + mysql.gtid_slave_pos* tables.
> +
> + This is called from time-critical rpl_slave_state::update(), so we avoid
> + taking any locks here. This means we may race with the background thread
> + to occasionally lose a signal. This is not a problem; any pending rows to
> + be deleted will just be deleted a bit later as part of the next batch.
> +*/
> +void
> +slave_background_gtid_pending_delete_request(void)
> +{
> + slave_background_gtid_pending_delete_flag= true;
> + mysql_cond_signal(&COND_slave_background);
> +}
> +
> +
> +/*
> Start the slave background thread.
>
> This thread is currently used for two purposes:
> diff --git a/sql/slave.h b/sql/slave.h
> index 649d55b45b9..12d569b0333 100644
> --- a/sql/slave.h
> +++ b/sql/slave.h
> @@ -276,6 +276,7 @@ bool net_request_file(NET* net, const char* fname);
> void slave_background_kill_request(THD *to_kill);
> void slave_background_gtid_pos_create_request
> (rpl_slave_state::gtid_pos_table *table_entry);
> +void slave_background_gtid_pending_delete_request(void);
>
> extern bool volatile abort_loop;
> extern Master_info *active_mi; /* active_mi for multi-master */
> diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
> index 6d4c135683a..9348f4e5c98 100644
> --- a/sql/sys_vars.cc
> +++ b/sql/sys_vars.cc
> @@ -1942,6 +1942,19 @@ Sys_var_last_gtid::session_value_ptr(THD *thd, const LEX_CSTRING *base)
> }
>
>
> +static Sys_var_uint Sys_gtid_cleanup_batch_size(
> + "gtid_cleanup_batch_size",
> + "Normally does not need tuning. How many old rows must accumulate in "
> + "the mysql.gtid_slave_pos table before a background job will be run to "
> + "delete them. Can be increased to reduce number of commits if "
> + "using many different engines with --gtid_pos_auto_engines, or to "
> + "reduce CPU overhead if using a huge number of different "
> + "gtid_domain_ids. Can be decreased to reduce number of old rows in the "
> + "table.",
> + GLOBAL_VAR(opt_gtid_cleanup_batch_size), CMD_LINE(REQUIRED_ARG),
> + VALID_RANGE(0,2147483647), DEFAULT(64), BLOCK_SIZE(1));
> +
> +
> static bool
> check_slave_parallel_threads(sys_var *self, THD *thd, set_var *var)
> {
> diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
> index 9c20fea97ae..a1e501f78f4 100644
> --- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
> +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
> @@ -2,6 +2,7 @@ include/master-slave.inc
> [connection master]
> connection server_2;
> include/stop_slave.inc
> +SET GLOBAL gtid_cleanup_batch_size = 999999999;
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET sql_log_bin=0;
> CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
> @@ -41,6 +42,8 @@ a
> 1
> SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id;
> domain_id sub_id server_id seq_no
> +0 1 1 1
> +0 2 1 2
> 0 3 1 3
> 0 4 1 4
> SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> @@ -121,6 +124,21 @@ Transactions_multi_engine 6
> DELETE FROM t1 WHERE a >= 100;
> DELETE FROM t2 WHERE a >= 100;
> DELETE FROM t3 WHERE a >= 100;
> +connection server_1;
> +include/save_master_gtid.inc
> +connection server_2;
> +include/sync_with_master_gtid.inc
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
> +COUNT(*)>=10
> +1
> +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> +UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
> +COUNT(*)>=10
> +1
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb;
> +COUNT(*)>=10
> +1
> +SET GLOBAL gtid_cleanup_batch_size = 3;
> connection server_2;
> include/stop_slave.inc
> SET sql_log_bin=0;
> diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
> index e0d16e7f242..631d9ca533f 100644
> --- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
> +++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
> @@ -4,6 +4,12 @@
>
> --connection server_2
> --source include/stop_slave.inc
> +
> +# Set GTID cleanup limit high enough that cleanup will not run and we
> +# can rely on consistent table output in .result.
> +--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size`
> +SET GLOBAL gtid_cleanup_batch_size = 999999999;
> +
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET sql_log_bin=0;
> CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
> @@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100;
> DELETE FROM t3 WHERE a >= 100;
>
>
> +# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with.
> +--connection server_1
> +--disable_query_log
> +let $i=10;
> +while ($i) {
> + eval INSERT INTO t1 VALUES (300+$i);
> + eval INSERT INTO t2 VALUES (300+$i);
> + eval INSERT INTO t3 VALUES (300+$i);
> + dec $i;
> +}
> +--enable_query_log
> +--source include/save_master_gtid.inc
> +
> +--connection server_2
> +--source include/sync_with_master_gtid.inc
> +
> +# Check that we have many rows in mysql.gtid_slave_pos now (since
> +# @@gtid_cleanup_batch_size was set to a huge value). No need to check
> +# for an exact number, since that will require changing .result if
> +# anything changes prior to this point, and we just need to know that
> +# we have still have some data in the tables to make the following
> +# test effective.
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
> +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb;
> +
> +# Check that old GTID rows will be deleted when batch delete size is
> +# set reasonably. Old row deletion is not 100% deterministic (by design), so
> +# we must wait for it to occur, but it should occur eventually.
> +SET GLOBAL gtid_cleanup_batch_size = 3;
> +let $i=40;
> +--disable_query_log
> +--let $keep_include_silent=1
> +while ($i) {
> + let N=`SELECT 1+($i MOD 3)`;
> + --connection server_1
> + eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N);
> + --source include/save_master_gtid.inc
> + --connection server_2
> + --source include/sync_with_master_gtid.inc
> + let $j=50;
> + while ($j) {
> + let $is_done=`SELECT SUM(a)=1 FROM (
> + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos
> + UNION ALL
> + SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select
> + UNION ALL
> + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_rocksdb) outer_select`;
> + if ($is_done) {
> + let $j=0;
> + }
> + if (!$is_done) {
> + real_sleep 0.1;
> + dec $j;
> + }
> + }
> + dec $i;
> + if ($is_done) {
> + let $i=0;
> + }
> +}
> +--enable_query_log
> +--let $keep_include_silent=0
> +if (!$is_done) {
> + --echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up
> +}
> +
> +--disable_query_log
> +DELETE FROM t1 WHERE a >= 100;
> +DELETE FROM t2 WHERE a >= 100;
> +DELETE FROM t3 WHERE a >= 100;
> +--enable_query_log
> +
> +
> # Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine.
> # Have mysql.gtid_slave_pos* for myisam and innodb but not rocksdb.
> --connection server_2
> @@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine";
> SET sql_log_bin=0;
> DROP TABLE mysql.gtid_slave_pos_innodb;
> SET sql_log_bin=1;
> +--disable_query_log
> +eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size;
> +--enable_query_log
>
> --connection server_1
> DROP TABLE t1;
> diff --git a/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result b/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
> index d4532eec4e2..d79e7e59aa4 100644
> --- a/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
> +++ b/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
> @@ -2,6 +2,7 @@ include/master-slave.inc
> [connection master]
> connection server_2;
> include/stop_slave.inc
> +SET GLOBAL gtid_cleanup_batch_size = 999999999;
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET sql_log_bin=0;
> CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
> @@ -41,6 +42,8 @@ a
> 1
> SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id;
> domain_id sub_id server_id seq_no
> +0 1 1 1
> +0 2 1 2
> 0 3 1 3
> 0 4 1 4
> SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> @@ -121,6 +124,21 @@ Transactions_multi_engine 6
> DELETE FROM t1 WHERE a >= 100;
> DELETE FROM t2 WHERE a >= 100;
> DELETE FROM t3 WHERE a >= 100;
> +connection server_1;
> +include/save_master_gtid.inc
> +connection server_2;
> +include/sync_with_master_gtid.inc
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
> +COUNT(*)>=10
> +1
> +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> +UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
> +COUNT(*)>=10
> +1
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb;
> +COUNT(*)>=10
> +1
> +SET GLOBAL gtid_cleanup_batch_size = 3;
> connection server_2;
> include/stop_slave.inc
> SET sql_log_bin=0;
> diff --git a/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test b/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
> index ceb119cd0dc..1d19a25889e 100644
> --- a/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
> +++ b/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
> @@ -4,6 +4,12 @@
>
> --connection server_2
> --source include/stop_slave.inc
> +
> +# Set GTID cleanup limit high enough that cleanup will not run and we
> +# can rely on consistent table output in .result.
> +--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size`
> +SET GLOBAL gtid_cleanup_batch_size = 999999999;
> +
> CHANGE MASTER TO master_use_gtid=slave_pos;
> SET sql_log_bin=0;
> CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
> @@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100;
> DELETE FROM t3 WHERE a >= 100;
>
>
> +# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with.
> +--connection server_1
> +--disable_query_log
> +let $i=10;
> +while ($i) {
> + eval INSERT INTO t1 VALUES (300+$i);
> + eval INSERT INTO t2 VALUES (300+$i);
> + eval INSERT INTO t3 VALUES (300+$i);
> + dec $i;
> +}
> +--enable_query_log
> +--source include/save_master_gtid.inc
> +
> +--connection server_2
> +--source include/sync_with_master_gtid.inc
> +
> +# Check that we have many rows in mysql.gtid_slave_pos now (since
> +# @@gtid_cleanup_batch_size was set to a huge value). No need to check
> +# for an exact number, since that will require changing .result if
> +# anything changes prior to this point, and we just need to know that
> +# we have still have some data in the tables to make the following
> +# test effective.
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
> +SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
> +SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb;
> +
> +# Check that old GTID rows will be deleted when batch delete size is
> +# set reasonably. Old row deletion is not 100% deterministic (by design), so
> +# we must wait for it to occur, but it should occur eventually.
> +SET GLOBAL gtid_cleanup_batch_size = 3;
> +let $i=40;
> +--disable_query_log
> +--let $keep_include_silent=1
> +while ($i) {
> + let N=`SELECT 1+($i MOD 3)`;
> + --connection server_1
> + eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N);
> + --source include/save_master_gtid.inc
> + --connection server_2
> + --source include/sync_with_master_gtid.inc
> + let $j=50;
> + while ($j) {
> + let $is_done=`SELECT SUM(a)=1 FROM (
> + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos
> + UNION ALL
> + SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
> + UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select
> + UNION ALL
> + SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_tokudb) outer_select`;
> + if ($is_done) {
> + let $j=0;
> + }
> + if (!$is_done) {
> + real_sleep 0.1;
> + dec $j;
> + }
> + }
> + dec $i;
> + if ($is_done) {
> + let $i=0;
> + }
> +}
> +--enable_query_log
> +--let $keep_include_silent=0
> +if (!$is_done) {
> + --echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up
> +}
> +
> +--disable_query_log
> +DELETE FROM t1 WHERE a >= 100;
> +DELETE FROM t2 WHERE a >= 100;
> +DELETE FROM t3 WHERE a >= 100;
> +--enable_query_log
> +
> +
> # Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine.
> # Have mysql.gtid_slave_pos* for myisam and innodb but not tokudb.
> --connection server_2
> @@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine";
> SET sql_log_bin=0;
> DROP TABLE mysql.gtid_slave_pos_innodb;
> SET sql_log_bin=1;
> +--disable_query_log
> +eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size;
> +--enable_query_log
>
> --connection server_1
> DROP TABLE t1;
> _______________________________________________
> commits mailing list
> commits(a)mariadb.org
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
2
3