Compute binlog checksums (when enabled) already when writing events into the statement or transaction caches, where before it was done when the caches are copied to the real binlog file. This moves the checksum computation outside of holding LOCK_log, improving scalabitily. At stmt/trx cache write time, the final end_log_pos values are not known, so with this patch these will be set to 0. Events that are written directly to the binlog file (not through stmt/trx cache) keep the correct end_log_pos value. The GTID and COMMIT/XID events at the start and end of event groups are written directly, so the zero end_log_pos is only for events in the middle of event groups, which do not negatively affect replication. An option --binlog-legacy-event-pos, off by default, is provided to disable this behavior to provide backwards compatibility with any external applications that might rely on end_log_pos in events in the middle of event groups. Checksums cannot be pre-computed when binlog encryption is enabled, as encryption relies on correct end_log_pos to provide part of the nonce/IV. Checksum pre-computation is also disabled for WSREP/Galera. The current --binlog-checksum configuration is saved in binlog_cache_data at transaction start and used to pre-compute checksums in cache, if applicable. When the cache is later copied to the binlog, a check is made if the saved value still matches the configured global value; if so, the events are block-copied directly into the binlog file. If --binlog-checksum was changed during the transaction, events are re-written to the binlog file one-by-one and the checksums recomputed/discarded as appropriate. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org> --- include/my_atomic.h | 41 ++++- include/my_sys.h | 2 + .../main/mysqlbinlog_row_compressed.result | 48 ++--- .../main/mysqlbinlog_row_minimal.result | 48 ++--- .../main/mysqlbinlog_stmt_compressed.result | 16 +- mysql-test/main/mysqld--help.result | 7 + .../suite/binlog/include/binlog_ioerr.inc | 3 + mysql-test/suite/binlog/r/binlog_ioerr.result | 2 + mysql-test/suite/binlog/t/binlog_killed.test | 2 +- .../t/binlog_table_map_optional_metadata.test | 4 +- .../binlog_encryption/binlog_ioerr.result | 2 + .../suite/rpl/r/rpl_checksum_cache.result | 43 ++++- .../suite/rpl/t/rpl_checksum_cache.test | 98 +++++++++- .../r/sysvars_server_notembedded.result | 10 + mysys/mf_iocache2.c | 34 ++++ sql/log.cc | 173 ++++++++++++++---- sql/log.h | 2 +- sql/log_event.h | 2 +- sql/log_event_server.cc | 24 +-- sql/mysqld.cc | 1 + sql/mysqld.h | 1 + sql/privilege.h | 3 + sql/sys_vars.cc | 13 ++ 23 files changed, 466 insertions(+), 113 deletions(-) diff --git a/include/my_atomic.h b/include/my_atomic.h index 270134a6caf..01e9170cb15 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -62,8 +62,8 @@ Order must be one of MY_MEMORY_ORDER_RELAXED, MY_MEMORY_ORDER_RELEASE, MY_MEMORY_ORDER_SEQ_CST. - '#' is substituted by a size suffix - 8, 16, 32, 64, or ptr - (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr). + '#' is substituted by a size suffix - 8, 16, 32, 64, ptr, or ul (for unsigned + long) (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr). The first version orders memory accesses according to MY_MEMORY_ORDER_SEQ_CST, the second version (with _explicit suffix) orders memory accesses according to @@ -153,4 +153,41 @@ #define my_atomic_casptr_strong_explicit(P, E, D, S, F) \ my_atomic_casptr((P), (E), (D)) #endif + +/* Convenience macros since ulong is 32 or 64 bit depending on platform. */ +#if SIZEOF_LONG == 4 +#define my_atomic_storeul(P, D) my_atomic_store32((int32 volatile *)(P), (D)) +#define my_atomic_storeul_explicit(P, D, O) \ + my_atomic_store32_explicit((int32 volatile *)(P), (D), (O)) +#define my_atomic_loadul(P) my_atomic_load32((int32 volatile *)(P)) +#define my_atomic_loadul_explicit(P, O) \ + my_atomic_load32_explicit((int32 volatile *)(P), (O)) +#define my_atomic_fasul(P, D) my_atomic_fas32((int32 volatile *)(P), (D)) +#define my_atomic_fasul_explict(P, D, O) \ + my_atomic_fas32_explicit((int32 volatile *)(P), (D), (O)) +#define my_atomic_addul(P, A) my_atomic_add32((int32 volatile *)(P), (A)) +#define my_atomic_addul_explict(P, A, O) \ + my_atomic_add32_explicit((int32 volatile *)(P), (A), (O)) +#define my_atomic_casul(P, E, D) \ + my_atomic_cas32((int32 volatile *)(P), (E), (D)) +#define my_atomic_casul_weak_explicit(P, E, D, S, F) \ + my_atomic_cas32_weak_explicit((int32 volatile *)(P), (E), (D), (S), (F)) +#define my_atomic_casul_strong_explicit(P, E, D, S, F) \ + my_atomic_cas32_strong_explicit((int32 volatile *)(P), (E), (D), (S), (F)) +#elif SIZEOF_LONG == 8 +#define my_atomic_storeul(P, D) my_atomic_store64((P), (D)) +#define my_atomic_storeul_explicit(P, D, O) my_atomic_store64_explicit((P), (D), (O)) +#define my_atomic_loadul(P) my_atomic_load64((P)) +#define my_atomic_loadul_explicit(P, O) my_atomic_load64_explicit((P), (O)) +#define my_atomic_fasul(P, D) my_atomic_fas64((P), (D)) +#define my_atomic_fasul_explict(P, D, O) my_atomic_fas64_explicit((P), (D), (O)) +#define my_atomic_addul(P, A) my_atomic_add64((P), (A)) +#define my_atomic_addul_explict(P, A, O) my_atomic_add64_explicit((P), (A), (O)) +#define my_atomic_casul(P, E, D) my_atomic_cas64((P), (E), (D)) +#define my_atomic_casul_weak_explicit(P, E, D, S, F) \ + my_atomic_cas64_weak_explicit((P), (E), (D), (S), (F)) +#define my_atomic_casul_strong_explicit(P, E, D, S, F) \ + my_atomic_cas64_strong_explicit((P), (E), (D), (S), (F)) +#endif + #endif /* MY_ATOMIC_INCLUDED */ diff --git a/include/my_sys.h b/include/my_sys.h index 2d1dbb7b2bf..145d9fb2603 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -599,6 +599,8 @@ static inline size_t my_b_bytes_in_cache(const IO_CACHE *info) int my_b_copy_to_file (IO_CACHE *cache, FILE *file, size_t count); int my_b_copy_all_to_file(IO_CACHE *cache, FILE *file); +int my_b_copy_to_cache(IO_CACHE *from_cache, IO_CACHE *to_cache, size_t count); +int my_b_copy_all_to_cache(IO_CACHE *from_cache, IO_CACHE *to_cache); my_off_t my_b_append_tell(IO_CACHE* info); my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */ diff --git a/mysql-test/main/mysqlbinlog_row_compressed.result b/mysql-test/main/mysqlbinlog_row_compressed.result index 2cf652655e0..96a0ed61a71 100644 --- a/mysql-test/main/mysqlbinlog_row_compressed.result +++ b/mysql-test/main/mysqlbinlog_row_compressed.result @@ -57,11 +57,11 @@ START TRANSACTION /*!*/; # at 787 # at 861 -#<date> server id 1 end_log_pos 861 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t1 VALUES (10, 1, 2, 3, 4, 5, 6, 7, "") -#<date> server id 1 end_log_pos 917 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 917 -#<date> server id 1 end_log_pos 985 CRC32 XXX Write_compressed_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_compressed_rows: table id 32 flags: STMT_END_F ### INSERT INTO `test`.`t1` ### SET ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ @@ -86,11 +86,11 @@ START TRANSACTION /*!*/; # at 1100 # at 1176 -#<date> server id 1 end_log_pos 1176 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t1 VALUES (11, 1, 2, 3, 4, 5, 6, 7, NULL) -#<date> server id 1 end_log_pos 1232 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 1232 -#<date> server id 1 end_log_pos 1299 CRC32 XXX Write_compressed_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_compressed_rows: table id 32 flags: STMT_END_F ### INSERT INTO `test`.`t1` ### SET ### @1=11 /* INT meta=0 nullable=0 is_null=0 */ @@ -115,11 +115,11 @@ START TRANSACTION /*!*/; # at 1414 # at 1492 -#<date> server id 1 end_log_pos 1492 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t1 VALUES (12, 1, 2, 3, NULL, 5, 6, 7, "A") -#<date> server id 1 end_log_pos 1548 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 1548 -#<date> server id 1 end_log_pos 1614 CRC32 XXX Write_compressed_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_compressed_rows: table id 32 flags: STMT_END_F ### INSERT INTO `test`.`t1` ### SET ### @1=12 /* INT meta=0 nullable=0 is_null=0 */ @@ -144,11 +144,11 @@ START TRANSACTION /*!*/; # at 1729 # at 1804 -#<date> server id 1 end_log_pos 1804 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t1 VALUES (13, 1, 2, 3, 0, 5, 6, 7, "A") -#<date> server id 1 end_log_pos 1860 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 1860 -#<date> server id 1 end_log_pos 1927 CRC32 XXX Write_compressed_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_compressed_rows: table id 32 flags: STMT_END_F ### INSERT INTO `test`.`t1` ### SET ### @1=13 /* INT meta=0 nullable=0 is_null=0 */ @@ -173,11 +173,11 @@ START TRANSACTION /*!*/; # at 2042 # at 2096 -#<date> server id 1 end_log_pos 2096 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t2 SELECT * FROM t1 -#<date> server id 1 end_log_pos 2152 CRC32 XXX Table_map: `test`.`t2` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t2` mapped to number num # at 2152 -#<date> server id 1 end_log_pos 2243 CRC32 XXX Write_compressed_rows: table id 33 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_compressed_rows: table id 33 flags: STMT_END_F ### INSERT INTO `test`.`t2` ### SET ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ @@ -235,11 +235,11 @@ START TRANSACTION /*!*/; # at 2358 # at 2424 -#<date> server id 1 end_log_pos 2424 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> UPDATE t2 SET f4=5 WHERE f4>0 or f4 is NULL -#<date> server id 1 end_log_pos 2480 CRC32 XXX Table_map: `test`.`t2` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t2` mapped to number num # at 2480 -#<date> server id 1 end_log_pos 2579 CRC32 XXX Update_compressed_rows: table id 33 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Update_compressed_rows: table id 33 flags: STMT_END_F ### UPDATE `test`.`t2` ### WHERE ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ @@ -316,11 +316,11 @@ START TRANSACTION /*!*/; # at 2694 # at 2731 -#<date> server id 1 end_log_pos 2731 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> DELETE FROM t1 -#<date> server id 1 end_log_pos 2787 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 2787 -#<date> server id 1 end_log_pos 2879 CRC32 XXX Delete_compressed_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Delete_compressed_rows: table id 32 flags: STMT_END_F ### DELETE FROM `test`.`t1` ### WHERE ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ @@ -378,11 +378,11 @@ START TRANSACTION /*!*/; # at 2994 # at 3031 -#<date> server id 1 end_log_pos 3031 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> DELETE FROM t2 -#<date> server id 1 end_log_pos 3087 CRC32 XXX Table_map: `test`.`t2` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t2` mapped to number num # at 3087 -#<date> server id 1 end_log_pos 3172 CRC32 XXX Delete_compressed_rows: table id 33 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Delete_compressed_rows: table id 33 flags: STMT_END_F ### DELETE FROM `test`.`t2` ### WHERE ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ diff --git a/mysql-test/main/mysqlbinlog_row_minimal.result b/mysql-test/main/mysqlbinlog_row_minimal.result index 69aa91a8903..6871d75a985 100644 --- a/mysql-test/main/mysqlbinlog_row_minimal.result +++ b/mysql-test/main/mysqlbinlog_row_minimal.result @@ -55,11 +55,11 @@ START TRANSACTION /*!*/; # at 834 # at 908 -#<date> server id 1 end_log_pos 908 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t1 VALUES (10, 1, 2, 3, 4, 5, 6, 7, "") -#<date> server id 1 end_log_pos 964 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 964 -#<date> server id 1 end_log_pos 1033 CRC32 XXX Write_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_rows: table id 32 flags: STMT_END_F ### INSERT INTO `test`.`t1` ### SET ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ @@ -84,11 +84,11 @@ START TRANSACTION /*!*/; # at 1148 # at 1224 -#<date> server id 1 end_log_pos 1224 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t1 VALUES (11, 1, 2, 3, 4, 5, 6, 7, NULL) -#<date> server id 1 end_log_pos 1280 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 1280 -#<date> server id 1 end_log_pos 1348 CRC32 XXX Write_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_rows: table id 32 flags: STMT_END_F ### INSERT INTO `test`.`t1` ### SET ### @1=11 /* INT meta=0 nullable=0 is_null=0 */ @@ -113,11 +113,11 @@ START TRANSACTION /*!*/; # at 1463 # at 1541 -#<date> server id 1 end_log_pos 1541 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t1 VALUES (12, 1, 2, 3, NULL, 5, 6, 7, "A") -#<date> server id 1 end_log_pos 1597 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 1597 -#<date> server id 1 end_log_pos 1664 CRC32 XXX Write_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_rows: table id 32 flags: STMT_END_F ### INSERT INTO `test`.`t1` ### SET ### @1=12 /* INT meta=0 nullable=0 is_null=0 */ @@ -142,11 +142,11 @@ START TRANSACTION /*!*/; # at 1779 # at 1854 -#<date> server id 1 end_log_pos 1854 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t1 VALUES (13, 1, 2, 3, 0, 5, 6, 7, "A") -#<date> server id 1 end_log_pos 1910 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 1910 -#<date> server id 1 end_log_pos 1980 CRC32 XXX Write_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_rows: table id 32 flags: STMT_END_F ### INSERT INTO `test`.`t1` ### SET ### @1=13 /* INT meta=0 nullable=0 is_null=0 */ @@ -171,11 +171,11 @@ START TRANSACTION /*!*/; # at 2095 # at 2149 -#<date> server id 1 end_log_pos 2149 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> INSERT INTO t2 SELECT * FROM t1 -#<date> server id 1 end_log_pos 2205 CRC32 XXX Table_map: `test`.`t2` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t2` mapped to number num # at 2205 -#<date> server id 1 end_log_pos 2372 CRC32 XXX Write_rows: table id 33 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Write_rows: table id 33 flags: STMT_END_F ### INSERT INTO `test`.`t2` ### SET ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ @@ -233,11 +233,11 @@ START TRANSACTION /*!*/; # at 2487 # at 2553 -#<date> server id 1 end_log_pos 2553 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> UPDATE t2 SET f4=5 WHERE f4>0 or f4 is NULL -#<date> server id 1 end_log_pos 2609 CRC32 XXX Table_map: `test`.`t2` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t2` mapped to number num # at 2609 -#<date> server id 1 end_log_pos 2675 CRC32 XXX Update_rows: table id 33 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Update_rows: table id 33 flags: STMT_END_F ### UPDATE `test`.`t2` ### WHERE ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ @@ -266,11 +266,11 @@ START TRANSACTION /*!*/; # at 2790 # at 2827 -#<date> server id 1 end_log_pos 2827 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> DELETE FROM t1 -#<date> server id 1 end_log_pos 2883 CRC32 XXX Table_map: `test`.`t1` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t1` mapped to number num # at 2883 -#<date> server id 1 end_log_pos 2937 CRC32 XXX Delete_rows: table id 32 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Delete_rows: table id 32 flags: STMT_END_F ### DELETE FROM `test`.`t1` ### WHERE ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ @@ -296,11 +296,11 @@ START TRANSACTION /*!*/; # at 3052 # at 3089 -#<date> server id 1 end_log_pos 3089 CRC32 XXX Annotate_rows: +#<date> server id 1 end_log_pos 0 CRC32 XXX Annotate_rows: #Q> DELETE FROM t2 -#<date> server id 1 end_log_pos 3145 CRC32 XXX Table_map: `test`.`t2` mapped to number num +#<date> server id 1 end_log_pos 0 CRC32 XXX Table_map: `test`.`t2` mapped to number num # at 3145 -#<date> server id 1 end_log_pos 3199 CRC32 XXX Delete_rows: table id 33 flags: STMT_END_F +#<date> server id 1 end_log_pos 0 CRC32 XXX Delete_rows: table id 33 flags: STMT_END_F ### DELETE FROM `test`.`t2` ### WHERE ### @1=10 /* INT meta=0 nullable=0 is_null=0 */ diff --git a/mysql-test/main/mysqlbinlog_stmt_compressed.result b/mysql-test/main/mysqlbinlog_stmt_compressed.result index c0d26f3f9df..6321e74127f 100644 --- a/mysql-test/main/mysqlbinlog_stmt_compressed.result +++ b/mysql-test/main/mysqlbinlog_stmt_compressed.result @@ -56,7 +56,7 @@ CREATE TABLE t2 (pk INT PRIMARY KEY, f1 INT, f2 INT, f3 INT, f4 INT, f5 MEDIUMIN START TRANSACTION /*!*/; # at 787 -#<date> server id 1 end_log_pos 915 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> +#<date> server id 1 end_log_pos 0 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> SET TIMESTAMP=X/*!*/; INSERT INTO t1 VALUES (10, 1, 2, 3, 4, 5, 6, 7, "") /*!*/; @@ -71,7 +71,7 @@ COMMIT START TRANSACTION /*!*/; # at 1030 -#<date> server id 1 end_log_pos 1158 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> +#<date> server id 1 end_log_pos 0 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> SET TIMESTAMP=X/*!*/; INSERT INTO t1 VALUES (11, 1, 2, 3, 4, 5, 6, 7, NULL) /*!*/; @@ -86,7 +86,7 @@ COMMIT START TRANSACTION /*!*/; # at 1273 -#<date> server id 1 end_log_pos 1403 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> +#<date> server id 1 end_log_pos 0 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> SET TIMESTAMP=X/*!*/; INSERT INTO t1 VALUES (12, 1, 2, 3, NULL, 5, 6, 7, "A") /*!*/; @@ -101,7 +101,7 @@ COMMIT START TRANSACTION /*!*/; # at 1518 -#<date> server id 1 end_log_pos 1645 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> +#<date> server id 1 end_log_pos 0 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> SET TIMESTAMP=X/*!*/; INSERT INTO t1 VALUES (13, 1, 2, 3, 0, 5, 6, 7, "A") /*!*/; @@ -116,7 +116,7 @@ COMMIT START TRANSACTION /*!*/; # at 1760 -#<date> server id 1 end_log_pos 1868 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> +#<date> server id 1 end_log_pos 0 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> SET TIMESTAMP=X/*!*/; INSERT INTO t2 SELECT * FROM t1 /*!*/; @@ -131,7 +131,7 @@ COMMIT START TRANSACTION /*!*/; # at 1983 -#<date> server id 1 end_log_pos 2100 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> +#<date> server id 1 end_log_pos 0 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> SET TIMESTAMP=X/*!*/; UPDATE t2 SET f4=5 WHERE f4>0 or f4 is NULL /*!*/; @@ -146,7 +146,7 @@ COMMIT START TRANSACTION /*!*/; # at 2215 -#<date> server id 1 end_log_pos 2306 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> +#<date> server id 1 end_log_pos 0 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> SET TIMESTAMP=X/*!*/; DELETE FROM t1 /*!*/; @@ -161,7 +161,7 @@ COMMIT START TRANSACTION /*!*/; # at 2421 -#<date> server id 1 end_log_pos 2512 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> +#<date> server id 1 end_log_pos 0 CRC32 XXX Query_compressed thread_id=5 exec_time=x error_code=0 xid=<xid> SET TIMESTAMP=X/*!*/; DELETE FROM t2 /*!*/; diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index de0a8310ec1..999e0212d77 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -101,6 +101,12 @@ The following specify which files/extra groups are read (specified before remain --binlog-ignore-db=name Tells the master that updates to the given database should not be logged to the binary log. + --binlog-legacy-event-pos + Fill in the end_log_pos field of _all_ events in the + binlog, even when doing so costs performance. Can be used + in case some old application needs it for backwards + compatibility. Setting this option can hurt binlog + scalability. --binlog-optimize-thread-scheduling Run fast part of group commit in a single thread, to optimize kernel thread scheduling. On by default. Disable @@ -1526,6 +1532,7 @@ binlog-direct-non-transactional-updates FALSE binlog-expire-logs-seconds 0 binlog-file-cache-size 16384 binlog-format MIXED +binlog-legacy-event-pos FALSE binlog-optimize-thread-scheduling TRUE binlog-row-event-max-size 8192 binlog-row-image FULL diff --git a/mysql-test/suite/binlog/include/binlog_ioerr.inc b/mysql-test/suite/binlog/include/binlog_ioerr.inc index da6fb5ac727..b710eccc64b 100644 --- a/mysql-test/suite/binlog/include/binlog_ioerr.inc +++ b/mysql-test/suite/binlog/include/binlog_ioerr.inc @@ -17,11 +17,14 @@ CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; INSERT INTO t1 VALUES(0); SET @saved_dbug = @@SESSION.debug_dbug; SET SESSION debug_dbug='+d,fail_binlog_write_1'; +# The error injection is in the "legacy" code path. +SET GLOBAL binlog_legacy_event_pos= 1; --error ER_ERROR_ON_WRITE INSERT INTO t1 VALUES(1); --error ER_ERROR_ON_WRITE INSERT INTO t1 VALUES(2); SET SESSION debug_dbug=@saved_dbug; +SET GLOBAL binlog_legacy_event_pos= 0; INSERT INTO t1 VALUES(3); SELECT * FROM t1; diff --git a/mysql-test/suite/binlog/r/binlog_ioerr.result b/mysql-test/suite/binlog/r/binlog_ioerr.result index e4f00a017ba..aa4042d3f6f 100644 --- a/mysql-test/suite/binlog/r/binlog_ioerr.result +++ b/mysql-test/suite/binlog/r/binlog_ioerr.result @@ -4,11 +4,13 @@ CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; INSERT INTO t1 VALUES(0); SET @saved_dbug = @@SESSION.debug_dbug; SET SESSION debug_dbug='+d,fail_binlog_write_1'; +SET GLOBAL binlog_legacy_event_pos= 1; INSERT INTO t1 VALUES(1); ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on device") INSERT INTO t1 VALUES(2); ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on device") SET SESSION debug_dbug=@saved_dbug; +SET GLOBAL binlog_legacy_event_pos= 0; INSERT INTO t1 VALUES(3); SELECT * FROM t1; a diff --git a/mysql-test/suite/binlog/t/binlog_killed.test b/mysql-test/suite/binlog/t/binlog_killed.test index 7c3a262d2c1..271da705c82 100644 --- a/mysql-test/suite/binlog/t/binlog_killed.test +++ b/mysql-test/suite/binlog/t/binlog_killed.test @@ -67,7 +67,7 @@ let $rows= `select count(*) from t2 /* must be 2 or 0 */`; let $MYSQLD_DATADIR= `select @@datadir`; --let $binlog_killed_pos=query_get_value(SHOW BINLOG EVENTS, Pos, 6) ---let $binlog_killed_end_log_pos=query_get_value(SHOW BINLOG EVENTS, End_log_pos, 6) +--let $binlog_killed_end_log_pos=query_get_value(SHOW BINLOG EVENTS, Pos, 7) --exec $MYSQL_BINLOG --force-if-open --start-position=$binlog_killed_pos --stop-position=$binlog_killed_end_log_pos $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/kill_query_calling_sp.binlog --replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR --disable_result_log diff --git a/mysql-test/suite/binlog/t/binlog_table_map_optional_metadata.test b/mysql-test/suite/binlog/t/binlog_table_map_optional_metadata.test index 4577c6c1de1..ea4397306f3 100644 --- a/mysql-test/suite/binlog/t/binlog_table_map_optional_metadata.test +++ b/mysql-test/suite/binlog/t/binlog_table_map_optional_metadata.test @@ -275,7 +275,7 @@ INSERT INTO t1 VALUES(2, "b"); # The invalid metadata will case assertion failure on Write_rows_log_event # So we need to stop mysqlbinlog before reading Write_rows_log_event. ---let $stop_position= query_get_value(SHOW BINLOG EVENTS FROM $start_pos LIMIT 3, End_log_pos, 3) +--let $stop_position= query_get_value(SHOW BINLOG EVENTS FROM $start_pos LIMIT 4, Pos, 4) --source include/print_optional_metadata.inc --echo # @@ -291,7 +291,7 @@ INSERT INTO t1(c_point) VALUES(ST_PointFromText('POINT(10 10)')); # The invalid metadata will case assertion failure on Write_rows_log_event # So we need to stop mysqlbinlog before reading Write_rows_log_event. ---let $stop_position= query_get_value(SHOW BINLOG EVENTS FROM $start_pos LIMIT 3, End_log_pos, 3) +--let $stop_position= query_get_value(SHOW BINLOG EVENTS FROM $start_pos LIMIT 4, Pos, 4) --source include/print_optional_metadata.inc DROP TABLE t1; diff --git a/mysql-test/suite/binlog_encryption/binlog_ioerr.result b/mysql-test/suite/binlog_encryption/binlog_ioerr.result index 2823b7050c3..146bc50c964 100644 --- a/mysql-test/suite/binlog_encryption/binlog_ioerr.result +++ b/mysql-test/suite/binlog_encryption/binlog_ioerr.result @@ -4,11 +4,13 @@ CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb; INSERT INTO t1 VALUES(0); SET @saved_dbug = @@SESSION.debug_dbug; SET SESSION debug_dbug='+d,fail_binlog_write_1'; +SET GLOBAL binlog_legacy_event_pos= 1; INSERT INTO t1 VALUES(1); ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on device") INSERT INTO t1 VALUES(2); ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on device") SET SESSION debug_dbug=@saved_dbug; +SET GLOBAL binlog_legacy_event_pos= 0; INSERT INTO t1 VALUES(3); SELECT * FROM t1; a diff --git a/mysql-test/suite/rpl/r/rpl_checksum_cache.result b/mysql-test/suite/rpl/r/rpl_checksum_cache.result index e8f221cc181..b908c546ce3 100644 --- a/mysql-test/suite/rpl/r/rpl_checksum_cache.result +++ b/mysql-test/suite/rpl/r/rpl_checksum_cache.result @@ -121,13 +121,54 @@ connection slave; include/diff_tables.inc [master:test.t1, slave:test.t1] include/diff_tables.inc [master:test.t2, slave:test.t2] include/diff_tables.inc [master:test.t3, slave:test.t3] +*** Test switching checksum algorithm while ongoing transactions have pre-computed checksum in their binlog cache *** +connection master; +CREATE TABLE t4 (a INT, b INT, c VARCHAR(1024), PRIMARY KEY (a,b)) ENGINE=InnoDB; +BEGIN; +INSERT INTO t4 VALUES (1, 1, "small, pre-computed checksums"); +connection server_1; +BEGIN; +INSERT INTO t4 VALUES (2, 1, "big, pre-computed checksums"); +set @@global.binlog_checksum = NONE; +connection master; +INSERT INTO t4 VALUES (1, 2, "done"); +COMMIT; +connection server_1; +INSERT INTO t4 VALUES (2, 22, "done"); +COMMIT; +connection master; +BEGIN; +INSERT INTO t4 VALUES (3, 1, "small, no pre-computed checksums"); +connection server_1; +BEGIN; +INSERT INTO t4 VALUES (4, 1, "big, no pre-computed checksums"); +set @@global.binlog_checksum = CRC32; +connection master; +INSERT INTO t4 VALUES (3, 2, "done"); +COMMIT; +connection server_1; +INSERT INTO t4 VALUES (4, 22, "done"); +COMMIT; +connection slave; +*** Test the --binlog-legacy-event-pos option. +connection master; +FLUSH BINARY LOGS; +BEGIN; +INSERT INTO t4 VALUES (5, 1, "Zero end_log_pos"); +COMMIT; +set @@global.binlog_legacy_event_pos= 1; +BEGIN; +INSERT INTO t4 VALUES (6, 1, "Non-zero end_log_pos"); +COMMIT; +set @@global.binlog_legacy_event_pos= 0; +connection slave; connection master; begin; delete from t1; delete from t2; delete from t3; commit; -drop table t1, t2, t3; +drop table t1, t2, t3, t4; set @@global.binlog_cache_size = @save_binlog_cache_size; set @@global.binlog_checksum = @save_binlog_checksum; set @@global.master_verify_checksum = @save_master_verify_checksum; diff --git a/mysql-test/suite/rpl/t/rpl_checksum_cache.test b/mysql-test/suite/rpl/t/rpl_checksum_cache.test index e04f618b81e..173af8c1d0b 100644 --- a/mysql-test/suite/rpl/t/rpl_checksum_cache.test +++ b/mysql-test/suite/rpl/t/rpl_checksum_cache.test @@ -243,6 +243,102 @@ let $diff_tables=master:test.t3, slave:test.t3; source include/diff_tables.inc; +--echo *** Test switching checksum algorithm while ongoing transactions have pre-computed checksum in their binlog cache *** + +--connection master +CREATE TABLE t4 (a INT, b INT, c VARCHAR(1024), PRIMARY KEY (a,b)) ENGINE=InnoDB; + +# Create a couple transactions that will precompute checksums but commit +# without them. + +BEGIN; +INSERT INTO t4 VALUES (1, 1, "small, pre-computed checksums"); + +--connection server_1 +BEGIN; +INSERT INTO t4 VALUES (2, 1, "big, pre-computed checksums"); +--let $i= 20 +--disable_query_log +while ($i) { + eval INSERT INTO t4 VALUES (2, 22-$i, REPEAT("x", FLOOR(RAND()*100) + 831)); + dec $i; +} +--enable_query_log + +# Disable checksums dynamically, so MYSQL_BIN_LOG::write_cache() will have +# to drop the pre-computed checksums. +set @@global.binlog_checksum = NONE; + +--connection master +INSERT INTO t4 VALUES (1, 2, "done"); +COMMIT; +--connection server_1 +INSERT INTO t4 VALUES (2, 22, "done"); +COMMIT; + +# Create a couple transactions that will not precompute checksums but commit +# with them. + +--connection master +BEGIN; +INSERT INTO t4 VALUES (3, 1, "small, no pre-computed checksums"); + +--connection server_1 +BEGIN; +INSERT INTO t4 VALUES (4, 1, "big, no pre-computed checksums"); +--let $i= 20 +--disable_query_log +while ($i) { + eval INSERT INTO t4 VALUES (4, 22-$i, REPEAT("x", FLOOR(RAND()*100) + 853)); + dec $i; +} +--enable_query_log + +# Ebable checksums dynamically, so MYSQL_BIN_LOG::write_cache() will have +# to recompute the checksums. +set @@global.binlog_checksum = CRC32; + +--connection master +INSERT INTO t4 VALUES (3, 2, "done"); +COMMIT; +--connection server_1 +INSERT INTO t4 VALUES (4, 22, "done"); +COMMIT; + +sync_slave_with_master; + + +--echo *** Test the --binlog-legacy-event-pos option. +--connection master +FLUSH BINARY LOGS; +--source include/wait_for_binlog_checkpoint.inc + +--let $query_file= query_get_value(SHOW MASTER STATUS, File, 1) +--let $query_pos= query_get_value(SHOW MASTER STATUS, Position, 1) +BEGIN; +INSERT INTO t4 VALUES (5, 1, "Zero end_log_pos"); +COMMIT; +--let $end_log_pos= query_get_value(SHOW BINLOG EVENTS IN "$query_file" FROM $query_pos LIMIT 3, End_log_pos, 2) +if ($end_log_pos!=0) { + eval SHOW BINLOG EVENTS IN "$query_file"; + --die Wrong End_log_pos=$end_log_pos, expected zero. +} + +set @@global.binlog_legacy_event_pos= 1; +--let $query_pos= query_get_value(SHOW MASTER STATUS, Position, 1) +BEGIN; +INSERT INTO t4 VALUES (6, 1, "Non-zero end_log_pos"); +COMMIT; +--let $end_log_pos= query_get_value(SHOW BINLOG EVENTS IN "$query_file" FROM $query_pos LIMIT 3, End_log_pos, 2) +if ($end_log_pos==0) { + eval SHOW BINLOG EVENTS IN "$query_file"; + --die Wrong End_log_pos=$end_log_pos, expected non-zero. +} +set @@global.binlog_legacy_event_pos= 0; + +sync_slave_with_master; + + connection master; begin; @@ -251,7 +347,7 @@ delete from t2; delete from t3; commit; -drop table t1, t2, t3; +drop table t1, t2, t3, t4; set @@global.binlog_cache_size = @save_binlog_cache_size; set @@global.binlog_checksum = @save_binlog_checksum; set @@global.master_verify_checksum = @save_master_verify_checksum; diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index d1a6d85f861..6d43640bf65 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -442,6 +442,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST MIXED,STATEMENT,ROW READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME BINLOG_LEGACY_EVENT_POS +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Fill in the end_log_pos field of _all_ events in the binlog, even when doing so costs performance. Can be used in case some old application needs it for backwards compatibility. Setting this option can hurt binlog scalability. +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME BINLOG_OPTIMIZE_THREAD_SCHEDULING VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c index 4622b68646e..5d2863bca1c 100644 --- a/mysys/mf_iocache2.c +++ b/mysys/mf_iocache2.c @@ -74,6 +74,40 @@ int my_b_copy_all_to_file(IO_CACHE *cache, FILE *file) DBUG_RETURN(my_b_copy_to_file(cache, file, SIZE_T_MAX)); } +/** + Similar to above my_b_copy_to_file(), but destination is another IO_CACHE. +*/ +int +my_b_copy_to_cache(IO_CACHE *from_cache, IO_CACHE *to_cache, + size_t count) +{ + size_t curr_write, bytes_in_cache; + DBUG_ENTER("my_b_copy_to_cache"); + + bytes_in_cache= my_b_bytes_in_cache(from_cache); + do + { + curr_write= MY_MIN(bytes_in_cache, count); + if (my_b_write(to_cache, from_cache->read_pos, curr_write)) + DBUG_RETURN(1); + + from_cache->read_pos += curr_write; + count -= curr_write; + } while (count && (bytes_in_cache= my_b_fill(from_cache))); + if(from_cache->error == -1) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + +int my_b_copy_all_to_cache(IO_CACHE *from_cache, IO_CACHE *to_cache) +{ + DBUG_ENTER("my_b_copy_all_to_cache"); + /* Reinit the cache to read from the beginning of the cache */ + if (reinit_io_cache(from_cache, READ_CACHE, 0L, FALSE, FALSE)) + DBUG_RETURN(1); + DBUG_RETURN(my_b_copy_to_cache(from_cache, to_cache, SIZE_T_MAX)); +} + my_off_t my_b_append_tell(IO_CACHE* info) { /* diff --git a/sql/log.cc b/sql/log.cc index 1ab90389a37..dfed8265a69 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -107,6 +107,15 @@ static const LEX_CSTRING write_error_msg= { STRING_WITH_LEN("error writing to the binary log") }; static my_bool opt_optimize_thread_scheduling= TRUE; +/* + The binlog_checksum_options value is accessed protected under LOCK_log. As + the checksum option used must be consistent across an entire binlog file, + and log rotation is needed whenever this is changed. + + As an exception, event checksums are precomputed using a non-locked read + of binlog_checksum_options. Thus updates to this variable must be atomic, + with relaxed semantics. +*/ ulong binlog_checksum_options; #ifndef DBUG_OFF ulong opt_binlog_dbug_fsync_sleep= 0; @@ -275,12 +284,22 @@ void make_default_log_name(char **out, const char* log_ext, bool once) class binlog_cache_data { public: - binlog_cache_data(): m_pending(0), status(0), - before_stmt_pos(MY_OFF_T_UNDEF), - incident(FALSE), + binlog_cache_data(bool precompute_checksums_) : m_pending(0), status(0), + before_stmt_pos(MY_OFF_T_UNDEF), incident(FALSE), + precompute_checksums(precompute_checksums_), saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0), ptr_binlog_cache_disk_use(0) - { } + { + /* + Read the current checksum setting. We will use this setting to decide + whether to pre-compute checksums in the cache. Then when writing the cache + to the actual binlog, another check will be made and checksums recomputed + in the unlikely case that the setting changed meanwhile. + */ + checksum_opt= !precompute_checksums_ ? (uchar)BINLOG_CHECKSUM_ALG_OFF : + (uchar)my_atomic_loadul_explicit(&binlog_checksum_options, + MY_MEMORY_ORDER_RELAXED); + } ~binlog_cache_data() { @@ -332,6 +351,9 @@ class binlog_cache_data bool truncate_file= (cache_log.file != -1 && my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE); truncate(0,1); // Forget what's in cache + checksum_opt= !precompute_checksums ? (uchar)BINLOG_CHECKSUM_ALG_OFF : + (uchar)my_atomic_loadul_explicit(&binlog_checksum_options, + MY_MEMORY_ORDER_RELAXED); if (!cache_was_empty) compute_statistics(); if (truncate_file) @@ -435,6 +457,17 @@ class binlog_cache_data */ bool incident; + /* Whether the caller requested precomputing checksums. */ + bool precompute_checksums; + +public: + /* + The algorithm (if any) used to pre-compute checksums in the cache. + Initialized from binlog_checksum_options when the cache is reset. + */ + uchar checksum_opt; + +private: /** This function computes binlog cache and disk usage. */ @@ -508,6 +541,37 @@ void Log_event_writer::set_incident() } +/** + Select if and how to write checksum for an event written to the binlog. + + - When writing directly to the binlog, the user-configured checksum option + is used. + - When writing to a transaction or statement cache, we have + binlog_cache_data that contains the checksum option to use (pre-computed + checksums). + - Otherwise, no checksum used. +*/ +enum enum_binlog_checksum_alg +Log_event::select_checksum_alg(const binlog_cache_data *data) +{ + if (cache_type == Log_event::EVENT_NO_CACHE) + { + DBUG_ASSERT(!data); + /* + When we're selecting the checksum algorithm to write directly to the + actual binlog, we must be holding the LOCK_log, otherwise the checksum + configuration could change just after we read it. + */ + mysql_mutex_assert_owner(mysql_bin_log.get_log_lock()); + return (enum enum_binlog_checksum_alg)binlog_checksum_options; + } + else if (data) + return (enum enum_binlog_checksum_alg)data->checksum_opt; + else + return BINLOG_CHECKSUM_ALG_OFF; +} + + class binlog_cache_mngr { public: binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size, @@ -515,8 +579,10 @@ class binlog_cache_mngr { ulong *param_ptr_binlog_stmt_cache_use, ulong *param_ptr_binlog_stmt_cache_disk_use, ulong *param_ptr_binlog_cache_use, - ulong *param_ptr_binlog_cache_disk_use) - : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0) + ulong *param_ptr_binlog_cache_disk_use, + bool precompute_checksums) + : stmt_cache(precompute_checksums), trx_cache(precompute_checksums), + last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0) { stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size, param_ptr_binlog_stmt_cache_use, @@ -5558,7 +5624,8 @@ int MYSQL_BIN_LOG::new_file_impl() { DBUG_ASSERT(!is_relay_log); DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset); - binlog_checksum_options= checksum_alg_reset; + my_atomic_storeul_explicit(&binlog_checksum_options, checksum_alg_reset, + MY_MEMORY_ORDER_RELAXED); } /* Note that at this point, log_state != LOG_CLOSED @@ -5634,19 +5701,19 @@ int MYSQL_BIN_LOG::new_file_impl() bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *data, IO_CACHE *file) { - return write_event(ev, ev->select_checksum_alg(), data, file); + return write_event(ev, ev->select_checksum_alg(data), data, file); } bool MYSQL_BIN_LOG::write_event(Log_event *ev) { - return write_event(ev, ev->select_checksum_alg(), 0, &log_file); + return write_event(ev, ev->select_checksum_alg(NULL), 0, &log_file); } bool MYSQL_BIN_LOG::write_event(Log_event *ev, enum enum_binlog_checksum_alg checksum_alg, binlog_cache_data *cache_data, IO_CACHE *file) { - Log_event_writer writer(file, 0, checksum_alg, &crypto); + Log_event_writer writer(file, cache_data, checksum_alg, &crypto); if (crypto.scheme && file == &log_file) { writer.ctx= alloca(crypto.ctx_size); @@ -5953,13 +6020,22 @@ binlog_cache_mngr *THD::binlog_setup_trx_data() } thd_set_ha_data(this, binlog_hton, cache_mngr); + /* + Don't attempt to precompute checksums if: + - Disabled by user request, --binlog-legacy-event-pos + - Binlog is encrypted, cannot use precomputed checksums + - WSREP/Galera. + */ + bool precompute_checksums= + !WSREP_NNULL(this) && !encrypt_binlog && !opt_binlog_legacy_event_pos; cache_mngr= new (cache_mngr) binlog_cache_mngr(max_binlog_stmt_cache_size, max_binlog_cache_size, &binlog_stmt_cache_use, &binlog_stmt_cache_disk_use, &binlog_cache_use, - &binlog_cache_disk_use); + &binlog_cache_disk_use, + precompute_checksums); DBUG_RETURN(cache_mngr); } @@ -6287,7 +6363,8 @@ bool THD::binlog_write_table_map(TABLE *table, bool with_annotate) binlog_cache_data *cache_data= (cache_mngr-> get_binlog_cache_data(is_transactional)); IO_CACHE *file= &cache_data->cache_log; - Log_event_writer writer(file, cache_data, the_event.select_checksum_alg(), NULL); + Log_event_writer writer(file, cache_data, + the_event.select_checksum_alg(cache_data), NULL); if (with_annotate) if (binlog_write_annotated_row(&writer)) @@ -6442,7 +6519,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, if (Rows_log_event* pending= cache_data->pending()) { Log_event_writer writer(&cache_data->cache_log, cache_data, - pending->select_checksum_alg(), NULL); + pending->select_checksum_alg(cache_data), NULL); /* Write pending event to the cache. @@ -7502,22 +7579,37 @@ uint MYSQL_BIN_LOG::next_file_id() events prior to fill in the binlog cache. */ -int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache) +int MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data) { DBUG_ENTER("MYSQL_BIN_LOG::write_cache"); - + IO_CACHE *cache= &cache_data->cache_log; mysql_mutex_assert_owner(&LOCK_log); + + /* + If possible, just copy the cache over byte-by-byte with pre-computed + checksums. + */ + if (likely(binlog_checksum_options == cache_data->checksum_opt) && + likely(!crypto.scheme) && + likely(!opt_binlog_legacy_event_pos)) + { + int res= my_b_copy_all_to_cache(cache, &log_file); + status_var_add(thd->status_var.binlog_bytes_written, my_b_tell(cache)); + DBUG_RETURN(res ? ER_ERROR_ON_WRITE : 0); + } + if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) DBUG_RETURN(ER_ERROR_ON_WRITE); /* Amount of remaining bytes in the IO_CACHE read buffer. */ size_t length= my_b_bytes_in_cache(cache); size_t group; - size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t uchar header_buf[LOG_EVENT_HEADER_LEN]; Log_event_writer writer(&log_file, 0, (enum_binlog_checksum_alg)binlog_checksum_options, &crypto); uint checksum_len= writer.checksum_len; + bool precomputed_checksums= (cache_data->checksum_opt != BINLOG_CHECKSUM_ALG_OFF); + uint old_checksum_len= precomputed_checksums ? BINLOG_CHECKSUM_LEN : 0; int err= 0; if (crypto.scheme) @@ -7591,13 +7683,13 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache) /* Adjust the length and end_log_pos appropriately. */ uint ev_len= uint4korr(header + EVENT_LEN_OFFSET); // netto len - DBUG_ASSERT(ev_len >= LOG_EVENT_HEADER_LEN); - if (unlikely(ev_len < LOG_EVENT_HEADER_LEN)) + DBUG_ASSERT(ev_len >= LOG_EVENT_HEADER_LEN + old_checksum_len); + if (unlikely(ev_len < LOG_EVENT_HEADER_LEN + old_checksum_len)) goto error_in_read; - int4store(header + EVENT_LEN_OFFSET, ev_len + checksum_len); - end_log_pos_inc += checksum_len; - size_t val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc; - int4store(header + LOG_POS_OFFSET, val); + uint new_len= ev_len - old_checksum_len + checksum_len; + int4store(header + EVENT_LEN_OFFSET, new_len); + group+= new_len; + int4store(header + LOG_POS_OFFSET, group); /* Write the header to the binlog. */ if (writer.write_header(header, LOG_EVENT_HEADER_LEN)) @@ -7614,8 +7706,18 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache) goto error_in_read; } uint chunk= std::min(ev_len, (uint)length); - if (writer.write_data(cache->read_pos, chunk)) - goto error_in_write; + /* + Any old precomputed checksum must _not_ be written here. Instead, it + must be discarded; the new checksum, if needed, is written by + writer.write_footer(). + */ + if (ev_len > old_checksum_len) + { + uint bytes_to_skip= + old_checksum_len - std::min(old_checksum_len, ev_len - chunk); + if (writer.write_data(cache->read_pos, chunk - bytes_to_skip)) + goto error_in_write; + } cache->read_pos+= chunk; length-= chunk; ev_len-= chunk; @@ -8745,7 +8847,7 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, DBUG_RETURN(ER_ERROR_ON_WRITE); if (entry->using_stmt_cache && !mngr->stmt_cache.empty() && - write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE))) + write_cache(entry->thd, mngr->get_binlog_cache_data(FALSE))) { entry->error_cache= &mngr->stmt_cache.cache_log; DBUG_RETURN(ER_ERROR_ON_WRITE); @@ -8756,7 +8858,7 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, DBUG_EXECUTE_IF("crash_before_writing_xid", { if ((write_cache(entry->thd, - mngr->get_binlog_cache_log(TRUE)))) + mngr->get_binlog_cache_data(TRUE)))) DBUG_PRINT("info", ("error writing binlog cache")); else flush_and_sync(0); @@ -8765,7 +8867,7 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, DBUG_SUICIDE(); }); - if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE))) + if (write_cache(entry->thd, mngr->get_binlog_cache_data(TRUE))) { entry->error_cache= &mngr->trx_cache.cache_log; DBUG_RETURN(ER_ERROR_ON_WRITE); @@ -11383,6 +11485,7 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, char binlog_checkpoint_name[FN_REFLEN]; bool binlog_checkpoint_found; IO_CACHE log; + IO_CACHE *cur_log; File file= -1; const char *errmsg; #ifdef HAVE_REPLICATION @@ -11429,12 +11532,16 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, */ binlog_checkpoint_found= false; + cur_log= first_log; for (round= 1;;) { - while ((ev= Log_event::read_log_event(round == 1 ? first_log : &log, - fdle, opt_master_verify_checksum)) + while ((ev= Log_event::read_log_event(cur_log, fdle, + opt_master_verify_checksum)) && ev->is_valid()) { +#ifdef HAVE_REPLICATION + my_off_t end_pos= my_b_tell(cur_log); +#endif enum Log_event_type typ= ev->get_type_code(); switch (typ) { @@ -11451,7 +11558,7 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, member->decided_to_commit= true; } #else - if (ctx.decide_or_assess(member, round, fdle, linfo, ev->log_pos)) + if (ctx.decide_or_assess(member, round, fdle, linfo, end_pos)) goto err2; #endif } @@ -11552,11 +11659,12 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, goto err2; ctx.last_gtid_valid= false; } - ctx.prev_event_pos= ev->log_pos; + ctx.prev_event_pos= end_pos; #endif delete ev; ev= NULL; } // end of while + cur_log= &log; /* If the last binlog checkpoint event points to an older log, we have to @@ -11813,7 +11921,8 @@ binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var, } else { - binlog_checksum_options= value; + my_atomic_storeul_explicit(&binlog_checksum_options, value, + MY_MEMORY_ORDER_RELAXED); } DBUG_ASSERT(binlog_checksum_options == value); mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF; diff --git a/sql/log.h b/sql/log.h index f02b20c12bf..91c406a71aa 100644 --- a/sql/log.h +++ b/sql/log.h @@ -821,7 +821,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG bool write_incident_already_locked(THD *thd); bool write_incident(THD *thd); void write_binlog_checkpoint_event_already_locked(const char *name, uint len); - int write_cache(THD *thd, IO_CACHE *cache); + int write_cache(THD *thd, binlog_cache_data *cache_data); void set_write_error(THD *thd, bool is_transactional); bool check_write_error(THD *thd); diff --git a/sql/log_event.h b/sql/log_event.h index 33f689c9330..5abc5fa0caf 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -1456,7 +1456,7 @@ class Log_event bool write_footer(Log_event_writer *writer) { return writer->write_footer(); } - enum enum_binlog_checksum_alg select_checksum_alg(); + enum enum_binlog_checksum_alg select_checksum_alg(const binlog_cache_data *data); virtual bool write(Log_event_writer *writer) { diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index bee594291d6..468d28c389c 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -735,20 +735,6 @@ void Log_event::init_show_field_list(THD *thd, List<Item>* field_list) mem_root); } -/** - Select if and how to write checksum for an event written to the binlog. - It returns the actively configured binlog checksum option, unless the event - is being written to a cache (in which case the checksum, if any, is added - later when the cache is copied to the real binlog). -*/ -enum enum_binlog_checksum_alg Log_event::select_checksum_alg() -{ - if (cache_type == Log_event::EVENT_NO_CACHE) - return (enum_binlog_checksum_alg)binlog_checksum_options; - else - return BINLOG_CHECKSUM_ALG_OFF; -} - int Log_event_writer::write_internal(const uchar *pos, size_t len) { DBUG_ASSERT(!ctx || encrypt_or_write == &Log_event_writer::encrypt_and_write); @@ -901,11 +887,17 @@ bool Log_event::write_header(Log_event_writer *writer, size_t event_data_length) change the position */ - if (is_artificial_event()) + if (is_artificial_event() || + cache_type == Log_event::EVENT_STMT_CACHE || + cache_type == Log_event::EVENT_TRANSACTIONAL_CACHE) { /* Artificial events are automatically generated and do not exist in master's binary log, so log_pos should be set to 0. + + Events written through transaction or statement cache have log_pos set + to 0 so that they can be copied directly to the binlog without having + to compute the real end_log_pos. */ log_pos= 0; } @@ -4937,7 +4929,7 @@ void Create_file_log_event::pack_info(Protocol *protocol) /** Create_file_log_event::do_apply_event() - Constructor for Create_file_log_event to intantiate an event + Constructor for Create_file_log_event to instantiate an event from the relay log on the slave. @retval diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 99717a2c058..0ddefe81856 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -762,6 +762,7 @@ char *relay_log_info_file, *report_user, *report_password, *report_host; char *opt_relay_logname = 0, *opt_relaylog_index_name=0; char *opt_logname, *opt_slow_logname, *opt_bin_logname; char *opt_binlog_index_name=0; +my_bool opt_binlog_legacy_event_pos= FALSE; diff --git a/sql/mysqld.h b/sql/mysqld.h index e99d5cb300c..113bc9112cb 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -173,6 +173,7 @@ extern ulong delay_key_write_options; extern char *opt_logname, *opt_slow_logname, *opt_bin_logname, *opt_relay_logname; extern char *opt_binlog_index_name; +extern my_bool opt_binlog_legacy_event_pos; extern char *opt_backup_history_logname, *opt_backup_progress_logname, *opt_backup_settings_name; extern const char *log_output_str; diff --git a/sql/privilege.h b/sql/privilege.h index 8e9b9a3748e..7356181975d 100644 --- a/sql/privilege.h +++ b/sql/privilege.h @@ -362,6 +362,9 @@ constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_COMMIT_WAIT_USEC= constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_ROW_METADATA= SUPER_ACL | BINLOG_ADMIN_ACL; +constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_LEGACY_EVENT_POS= + SUPER_ACL | BINLOG_ADMIN_ACL; + constexpr privilege_t PRIV_SET_SYSTEM_GLOBAL_VAR_EXPIRE_LOGS_DAYS= SUPER_ACL | BINLOG_ADMIN_ACL; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 6142c0bf077..eef6cd34043 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -3518,6 +3518,19 @@ Sys_master_verify_checksum( GLOBAL_VAR(opt_master_verify_checksum), CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static Sys_var_on_access_global<Sys_var_mybool, + PRIV_SET_SYSTEM_GLOBAL_VAR_BINLOG_LEGACY_EVENT_POS> +Sys_binlog_legacy_event_pos( + "binlog_legacy_event_pos", + "Fill in the end_log_pos field of _all_ events in the binlog, even when " + "doing so costs performance. Can be used in case some old application needs " + "it for backwards compatibility. Setting this option can hurt binlog " + "scalability.", + GLOBAL_VAR(opt_binlog_legacy_event_pos), CMD_LINE(OPT_ARG), + DEFAULT(FALSE)); + + /* These names must match RPL_SKIP_XXX #defines in slave.h. */ static const char *replicate_events_marked_for_skip_names[]= { "REPLICATE", "FILTER_ON_SLAVE", "FILTER_ON_MASTER", 0 -- 2.30.2