On Mon, Mar 20, 2017 at 3:59 PM, jan <jan.lindstrom@mariadb.com> wrote:
diff --git a/mysql-test/suite/encryption/include/innodb-util.pl b/mysql-test/suite/encryption/include/innodb-util.pl
index 241545dac18..5a7295682dd 100644
--- a/mysql-test/suite/encryption/include/innodb-util.pl
+++ b/mysql-test/suite/encryption/include/innodb-util.pl
@@ -4,8 +4,11 @@
 # All the tables must be in the same database, you can call it like so:
 # ib_backup_tablespaces("test", "t1", "blah", ...).

+use strict;
+use warnings;
 use File::Copy;
 use File::Spec;
+use Fcntl qw(:DEFAULT :seek);

 sub ib_normalize_path {
     my ($path) = @_;
@@ -124,3 +127,32 @@ sub ib_restore_ibd_files {
         ib_restore_ibd_file($tmpd, $datadir, $db, $table);
     }
 }
+
+sub ib_corrupt_tablespace {
+    my ($db, $table) = @_;
+    my $datadir = $ENV{'MYSQLD_DATADIR'};
+    my $page_size = $ENV{'INNODB_PAGE_SIZE'};
+    my $ibd_file = sprintf("%s%s/%s.ibd", $datadir,$db,$table);

The sprintf() function and the separate parameters $db, $table look like obfuscation to me. Why not just write "$ENV{MYSQLD_DATADIR}/test/t1.ibd" when invoking this function in the .test file?
 
+    print "file: $ibd_file $page_size\n";

Why do you want to pollute the output with the page size? That would require rdiff files for different page sizes.
 
+#   We try to corrupt FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION field
+    my $len;
+
+    sysopen IBD_FILE, $ibd_file, O_RDWR || die "Unable to open $ibd_file";
+
+    while ($len = sysread(IBD_FILE, $_, $page_size))
+    {
+       sysseek(IBD_FILE, -$len, SEEK_CUR);
+        substr($_, 26, 8) = pack("N",0xdeadbeef);
+       print "Corrupted page with $chunk\n";
+       syswrite(IBD_FILE, $_, $len);
+    }
+    close IBD_FILE;
+}

Where is the variable $chunk defined? Why does this script execute issue output to stdout for every iteration of the loop? That would seem to make the test depend on the number of pages in the file, and thus cause result differences when using a different innodb_page_size.

If the sysread() function happens to be interrupted by a signal, it could return a different amount than 0 or $page_size bytes. Thus, the operation of the loop would be nondeterministic. I would rather use a simple copying loop with a regular expression replacement, instead of this in-place modification.

I would just do something like this:
--exec perl -ei 's|(.{26}).{8}(.$ENV{INNODB_PAGE_SIZE})|"$1".pack("N",0xdeadbeef),"$2"|seg' $MYSQLD_DATADIR/test/t1.ibd

Shorter and faster, and using fewer system calls.
 
diff --git a/mysql-test/suite/encryption/r/innodb-force-corrupt.result b/mysql-test/suite/encryption/r/innodb-force-corrupt.result
new file mode 100644
index 00000000000..bba2ec17bbc
--- /dev/null
+++ b/mysql-test/suite/encryption/r/innodb-force-corrupt.result
@@ -0,0 +1,38 @@
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed.*");
+call mtr.add_suppression("InnoDB: Corruption: Block in space_id .* in file .* corrupted.");
+call mtr.add_suppression("InnoDB: Based on page type .*");
+call mtr.add_suppression("InnoDB: Unable to read tablespace .* page no .* into the buffer pool after .* attempts");
+CALL mtr.add_suppression("InnoDB: Error: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
+CALL mtr.add_suppression("InnoDB: Database page corruption on disk or a failed");
+CALL mtr.add_suppression("InnoDB: Space .* file test/t1 read of page .*");
+CALL mtr.add_suppression("InnoDB: You may have to recover from a backup.");
+CALL mtr.add_suppression("InnoDB: It is also possible that your operatingsystem has corrupted its own file cache.");
+CALL mtr.add_suppression("InnoDB: and rebooting your computer removes the error.");
+CALL mtr.add_suppression("InnoDB: If the corrupt page is an index page you can also try to");
+CALL mtr.add_suppression("InnoDB: fix the corruption by dumping, dropping, and reimporting");
+CALL mtr.add_suppression("InnoDB: the corrupt table. You can use CHECK");
+CALL mtr.add_suppression("InnoDB: TABLE to scan your table for corruption.");
+CALL mtr.add_suppression("InnoDB: See also .* about forcing recovery.");
+CALL mtr.add_suppression("InnoDB: However key management plugin or used key_version 3221605118 is not found or used encryption algorithm or method does not match.");
+CALL mtr.add_suppression("Marking tablespace as missing. You may drop this table or install correct key management plugin and key file.");
+CALL mtr.add_suppression("InnoDB: Block in space_id .* in file .* encrypted.");

Are all these suppressions really required? (The same problem is with multiple tests.

+call mtr.add_suppression("mysqld: File .* not found .*");

The suppression is looking for a substring by default (no implied $ at the end of the regular expression). The .* at the end of the regexp is clutter.
 
+call mtr.add_suppression(".*");

This catch-all is absolutely not acceptable.
 
+--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--shutdown_server
+--source include/wait_until_disconnected.inc

Do not use the low-level commands directly. Use restart_mysqld.inc. See the encryption test cleanup that I did in 10.2 in the merge: https://github.com/MariaDB/server/commit/2d96d13ecdcab4ce16c0d8976753d8aa50137e17
 
++--write_file $MYSQLTEST_VARDIR/keys1.txt
+1;770A8A65DA156D24EE2A093277530142
+4;770A8A65DA156D24EE2A093277530143
+EOF

This file can be created already before the shutdown (or restart).

Why not use key files in std_data? What is the benefit of creating files with identical static content in multiple --suite=encryption tests?

+--exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc

This is missing disable_reconnect, which is part of restart_mysqld.inc.
 
+--exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys2.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+--source include/restart_mysqld.inc

What is the purpose of restarting the server twice here?
 
+--error 1296
+select count(*) from t1 FORCE INDEX (test) where b like 'secret%';
+--error 1296
+select count(*) from t2 FORCE INDEX (test) where b like 'secret%';
+select count(*) from t3 FORCE INDEX (test) where b like 'secret%';

Do not use numeric error codes. Refer to include/mysqld_ername.h in the build directory for the symbolic codes. 1296 is ER_GET_ERRMSG.

+--remove_file $MYSQLTEST_VARDIR/keys1.txt
+--remove_file $MYSQLTEST_VARDIR/keys2.txt
+
+--write_file $MYSQLTEST_VARDIR/keys1.txt
+1;770A8A65DA156D24EE2A093277530142
+4;770A8A65DA156D24EE2A093277530143
+EOF

Why remove and rewrite the file with the identical content?

Similar steps are being done in multiple encryption tests. Can some of the repetition be avoided? Or could we test multiple operations with one server restart? Instead of having one test to check if CHECK TABLE crashes when the keys are incorrect or not available, another to check if ALTER TABLE crashes, and so on, just restart the server once and then execute all the commands that are to be tested. That would make --suite=encryption run faster too.
 
+--exec echo "restart:--innodb-encrypt-tables --innodb-stats-persistent --plugin-load-add=file_key_management.so --file-key-management --file-key-management-filename=$MYSQLTEST_VARDIR/keys1.txt" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+drop table t1,t2,t3;
+
+--remove_file $MYSQLTEST_VARDIR/keys1.txt

The server was left running, tied to keys1.txt. Shouldn’t we rather restart the server at the end of the test, to have the same configuration as it was at the start of the test?
 
diff --git a/mysql-test/suite/encryption/t/innodb-force-corrupt.test b/mysql-test/suite/encryption/t/innodb-force-corrupt.test
new file mode 100644
index 00000000000..3127b8f4006
--- /dev/null
+++ b/mysql-test/suite/encryption/t/innodb-force-corrupt.test
@@ -0,0 +1,122 @@
+#
+# MDEV-11759: Encryption code in MariaDB 10.1/10.2 causes compatibility problems
+#
+
+-- source include/have_innodb.inc
+-- source include/have_file_key_management_plugin.inc
+# Don't test under valgrind, memory leaks will occur
+source include/not_valgrind.inc;

Which memory leaks? Please run this with --valgrind and show.
 
+# Avoid CrashReporter popup on Mac
+source include/not_crashrep.inc;

Why? The test should not crash.
 
+# Don't test under embedded
+source include/not_embedded.inc;
+# Require InnoDB
+source include/have_innodb.inc;

I would omit this kind of redundant comments. I’d add a comment to explain that embedded server tests do not support restarting.
 
+# Test could open crash reporter on Windows
+# if compiler set up
+source include/not_windows.inc;

This is totally unacceptable. We must test under Windows on Buildbot.
 
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed.*");
+call mtr.add_suppression("InnoDB: Corruption: Block in space_id .* in file .* corrupted.");
+call mtr.add_suppression("InnoDB: Based on page type .*");
+call mtr.add_suppression("InnoDB: Unable to read tablespace .* page no .* into the buffer pool after .* attempts");
+CALL mtr.add_suppression("InnoDB: Error: Unable to read tablespace .* page no .* into the buffer pool after 100 attempts");
+CALL mtr.add_suppression("InnoDB: Database page corruption on disk or a failed");
+CALL mtr.add_suppression("InnoDB: Space .* file test/t1 read of page .*");
+CALL mtr.add_suppression("InnoDB: You may have to recover from a backup.");
+CALL mtr.add_suppression("InnoDB: It is also possible that your operatingsystem has corrupted its own file cache.");
+CALL mtr.add_suppression("InnoDB: and rebooting your computer removes the error.");
+CALL mtr.add_suppression("InnoDB: If the corrupt page is an index page you can also try to");
+CALL mtr.add_suppression("InnoDB: fix the corruption by dumping, dropping, and reimporting");
+CALL mtr.add_suppression("InnoDB: the corrupt table. You can use CHECK");
+CALL mtr.add_suppression("InnoDB: TABLE to scan your table for corruption.");
+CALL mtr.add_suppression("InnoDB: See also .* about forcing recovery.");
+CALL mtr.add_suppression("InnoDB: However key management plugin or used key_version 3221605118 is not found or used encryption algorithm or method does not match.");
+CALL mtr.add_suppression("Marking tablespace as missing. You may drop this table or install correct key management plugin and key file.");
+CALL mtr.add_suppression("InnoDB: Block in space_id .* in file .* encrypted.");

Are all these suppressions really necessary? Some of the first 4 suppressions are matching same strings as some of the later suppressions.
 
+let $MYSQLD_DATADIR=`select @@datadir`;
+let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
+let MYSQLD_DATADIR=`select @@datadir`;

Why is MYSQLD_DATADIR assigned both to an mtr internal variable and an environment variable? The latter should suffice.
 
+--source include/shutdown_mysqld.inc
+--source include/wait_until_disconnected.inc

The wait_until_disconnected.inc is redundant and should be removed.
 
+--disable_result_log
+--error 1296
+SELECT * FROM t1;
+#
+# There is change that index is marked as corrupted here
+#
+--error 0,1712
+SELECT * FROM t2;

chance != change.

Why would t2 sometimes be not corrupted? In which way does it fail if the 0, is removed?

Use symbolic error codes, not numeric ones.
 
+--error 1296
+SELECT * FROM t3;
+--enable_result_log
+
+--source include/shutdown_mysqld.inc
+--source include/wait_until_disconnected.inc

Again, the wait_until_disconnected.inc should be removed.
 
+
+--echo # Restore the original t1.ibd
+--remove_file $MYSQLD_DATADIR/test/t1.ibd
+--move_file $MYSQLD_DATADIR/test/t1.ibd.backup $MYSQLD_DATADIR/test/t1.ibd
+--move_file $MYSQLD_DATADIR/test/t2.ibd.backup $MYSQLD_DATADIR/test/t2.ibd
+--move_file $MYSQLD_DATADIR/test/t3.ibd.backup $MYSQLD_DATADIR/test/t3.ibd
+
+--source include/start_mysqld.inc
+
+DROP TABLE t1,t2,t3;
 diff --git a/mysql-test/suite/encryption/t/innodb-redo-badkey.test b/mysql-test/suite/encryption/t/innodb-redo-badkey.test
new file mode 100644
index 00000000000..34ffeb1b59a
--- /dev/null
+++ b/mysql-test/suite/encryption/t/innodb-redo-badkey.test
@@ -0,0 +1,121 @@
+-- source include/have_innodb.inc
+-- source include/have_file_key_management_plugin.inc
+# embedded does not support restart
+-- source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur.
+--source include/not_valgrind.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+--source include/not_windows.inc

The not_windows.inc is not acceptable. I would say that crashes or memory leaks are not acceptable either.
 
+--source ../../suite/innodb/include/no_checkpoint_start.inc
+
+--disable_warnings
+SET GLOBAL innodb_file_format = `Barracuda`;
+SET GLOBAL innodb_file_per_table = ON;
+--enable_warnings
+
+create table t1(a int not null primary key auto_increment, c char(250), b blob) engine=innodb row_format=compressed encrypted=yes encryption_key_id=10;
+create table t2(a int not null primary key auto_increment, c char(250), b blob) engine=innodb row_format=compressed;
+create table t3(a int not null primary key auto_increment, c char(250), b blob) engine=innodb encrypted=yes encryption_key_id=10;
+create table t4(a int not null primary key auto_increment, c char(250), b blob) engine=innodb;
+CREATE INDEX test ON t1 (b(10));
+CREATE INDEX test ON t2 (b(10));
+CREATE INDEX test ON t3 (b(10));
+CREATE INDEX test ON t4 (b(10));
+
+--disable_query_log
+--let $i = 20
+begin;
+while ($i)
+{
+  insert into t1(c,b) values (repeat('secret1',20), repeat('secret2',6000));
+  dec $i;
+}
+commit;
+--enable_query_log
+
+begin;
+insert into t2 select * from t1;
+insert into t3 select * from t1;
+insert into t4 select * from t1;
+commit;
+
+--remove_file $MYSQLTEST_VARDIR/keys1.txt
+let SEARCH_FILE= $MYSQLTEST_VARDIR/log/my_restart.err;
+
+#
+# We test redo log page read at recv_read_page using
+# incorrect keys from std_data/keys.txt. If checkpoint
+# happens we will skip this test. If no checkpoint
+# happens, InnoDB refuses to start as used
+# encryption key is incorrect.
+#
+SET GLOBAL innodb_buf_flush_list_now = 1;
+begin;
+update t1 set c = repeat('secret3', 20);
+update t2 set c = repeat('secret4', 20);
+update t3 set c = repeat('secret4', 20);
+update t4 set c = repeat('secret4', 20);
+insert into t1 (c,b) values (repeat('secret5',20), repeat('secret6',6000));
+insert into t2 (c,b) values (repeat('secret7',20), repeat('secret8',6000));
+insert into t3 (c,b) values (repeat('secret9',20), repeat('secre10',6000));
+insert into t4 (c,b) values (repeat('secre11',20), repeat('secre12',6000));
+COMMIT;
+let $cleanup= drop table t1,t2,t3,t4;
+--let CLEANUP_IF_CHECKPOINT= $cleanup;
+--source ../../suite/innodb/include/no_checkpoint_end.inc

Do we really need to execute all this code while not allowing any redo log checkpoint? It would seem to me that this test would be skipped most of the time.

Where is the file my_restart.err created?

+--write_file $MYSQLTEST_VARDIR/keys1.txt
+1;770A8A65DA156D24EE2A093277530997
+10;770A8A65DA156D24EE2A093277530142
+EOF
+
+--echo # restart
+--error 1
+-- source include/start_mysqld.inc
+--source include/kill_mysqld.inc

This looks very suspicious. Why is the start and kill needed? And when would --source return an error?
Why would we want to kill the server here? A shutdown would make the test more deterministic.

I think that we would want to ensure that InnoDB refused to start up after the start_mysqld.inc.
 
diff --git a/mysql-test/suite/encryption/t/innodb-redo-nokeys.test b/mysql-test/suite/encryption/t/innodb-redo-nokeys.test
new file mode 100644
index 00000000000..bfe5cd78e5b
--- /dev/null
+++ b/mysql-test/suite/encryption/t/innodb-redo-nokeys.test
@@ -0,0 +1,98 @@
+-- source include/have_innodb.inc
+-- source include/have_file_key_management_plugin.inc
+# embedded does not support restart
+-- source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur.
+--source include/not_valgrind.inc
+# Avoid CrashReporter popup on Mac
+--source include/not_crashrep.inc
+--source include/not_windows.inc
+
+--write_file $MYSQLTEST_VARDIR/keys1.txt
+1;770A8A65DA156D24EE2A093277530997
+10;770A8A65DA156D24EE2A093277530999
+EOF

This file is very similar to the other added tests. I would prefer to have one test that tests multiple things across a server restart, instead of having a large number of tests that duplicate a lot of boilerplate code and make it hard to follow the actual purpose of the test.

Can you please merge some of the tests?

+--source ../../suite/innodb/include/no_checkpoint_start.inc
+
+create table t1(a int not null primary key auto_increment, c char(200), b blob) engine=innodb row_format=compressed encrypted=yes encryption_key_id=10;
+create table t2(a int not null primary key auto_increment, c char(200), b blob) engine=innodb row_format=compressed;
+create table t3(a int not null primary key auto_increment, c char(200), b blob) engine=innodb encrypted=yes encryption_key_id=10;
+create table t4(a int not null primary key auto_increment, c char(200), b blob) engine=innodb;
+CREATE INDEX test ON t1 (b(10));
+CREATE INDEX test ON t2 (b(10));
+CREATE INDEX test ON t3 (b(10));
+CREATE INDEX test ON t4 (b(10));
+
+--disable_query_log
+--let $i = 20
+begin;
+while ($i)
+{
+  insert into t1(c,b) values (repeat('secret1',20), repeat('secret2',6000));
+  dec $i;
+}
+commit;
+--enable_query_log
+
+begin;
+insert into t2 select * from t1;
+insert into t3 select * from t1;
+insert into t4 select * from t1;
+commit;
+
+--remove_file $MYSQLTEST_VARDIR/keys1.txt
+
+#
+# We test redo log page read at recv_read_page using
+# incorrect keys from std_data/keys.txt. If checkpoint
+# happens we will skip this test. If no checkpoint
+# happens, InnoDB refuses to start as used
+# encryption key is not found.
+#
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+begin;
+update t1 set c = repeat('secret3', 20);
+update t2 set c = repeat('secret4', 20);
+update t3 set c = repeat('secret4', 20);
+update t4 set c = repeat('secret4', 20);
+insert into t1 (c,b) values (repeat('secret5',20), repeat('secret6',6000));
+insert into t2 (c,b) values (repeat('secret7',20), repeat('secret8',6000));
+insert into t3 (c,b) values (repeat('secret9',20), repeat('secre10',6000));
+insert into t4 (c,b) values (repeat('secre11',20), repeat('secre12',6000));
+COMMIT;
+let $cleanup= drop table t1,t2,t3,t4;
+--let CLEANUP_IF_CHECKPOINT= $cleanup;
+--source ../../suite/innodb/include/no_checkpoint_end.inc

Is it really necessary to place the no_checkpoint_start.inc before the table creation statements? Why are there separate CREATE TABLE and CREATE INDEX statements for the empty tables?
 
diff --git a/mysql-test/suite/innodb/t/innodb_bug14147491.test b/mysql-test/suite/innodb/t/innodb_bug14147491.test
index 5776b2c2e37..d16a44e8a55 100644
--- a/mysql-test/suite/innodb/t/innodb_bug14147491.test
+++ b/mysql-test/suite/innodb/t/innodb_bug14147491.test
@@ -7,6 +7,7 @@
 call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed.*");
 call mtr.add_suppression("InnoDB: Corruption: Block in space_id .* in file .* corrupted.");
 call mtr.add_suppression("InnoDB: Based on page type .*");
+call mtr.add_suppression("InnoDB: Unable to read tablespace .* page no .* into the buffer pool after .* attempts");

 # Don't test under valgrind, memory leaks will occur
 source include/not_valgrind.inc;

I think that InnoDB is already noisy enough. Please fix the code instead of adding one more warning that is not really helping users.

@@ -4096,6 +4096,11 @@ btr_estimate_number_of_different_key_vals(

                page = btr_cur_get_page(&cursor);

+               if (index->table->file_unreadable || index->table->corrupted) {
+                       mtr_commit(&mtr);
+                       goto exit_loop;
+               }
+
                rec = page_rec_get_next(page_get_infimum_rec(page));

                if (!page_rec_is_supremum(rec)) {

When is this code covered? Why are we not testing this before assigning page? That variable is not being used after the goto.
In fact, the variable "page" should be declared inside the for loop body.

diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 01d2e1bb8e2..237e4e8d109 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -416,6 +416,11 @@ btr_pcur_move_to_next_page(
        cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;

        page = btr_pcur_get_page(cursor);
+
+       if (!page) {
+               return;
+       }
+
        next_page_no = btr_page_get_next(page, mtr);
        space = buf_block_get_space(btr_pcur_get_block(cursor));
        zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
@@ -425,11 +430,16 @@ btr_pcur_move_to_next_page(
        next_block = btr_block_get(space, zip_size, next_page_no,
                                   cursor->latch_mode,
                                   btr_pcur_get_btr_cur(cursor)->index, mtr);
+
+       if (!next_block) {
+               return;
+       }
+
        next_page = buf_block_get_frame(next_block);
 #ifdef UNIV_BTR_DEBUG
        ut_a(page_is_comp(next_page) == page_is_comp(page));
        ut_a(btr_page_get_prev(next_page, mtr)
-            == buf_block_get_page_no(btr_pcur_get_block(cursor)));
+               == buf_block_get_page_no(btr_pcur_get_block(cursor)));
 #endif /* UNIV_BTR_DEBUG */
        next_block->check_index_page_at_flush = TRUE;

Are these changes really necessary? In which tests would the added "return" statements be executed?
These conditions look like they could hurt performance.
 
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index c7e2183fa7b..066a68da8c0 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -2358,7 +2358,18 @@ buf_page_get_zip(
                /* Page not in buf_pool: needs to be read from file */

                ut_ad(!hash_lock);
-               buf_read_page(space, zip_size, offset, NULL);
+               dberr_t err = buf_read_page(space, zip_size, offset);
+
+               if (err != DB_SUCCESS) {
+                       ib_logf(IB_LOG_LEVEL_ERROR,
+                               "Reading compressed page " ULINTPF
+                               " from tablespace " ULINTPF
+                               " failed with error: %s (%d).",
+                               offset, space, ut_strerr(err), err);
+
+                       goto err_exit;
+               }
+

 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
                ut_a(++buf_dbg_counter % 5771 || buf_validate());

Return the error to the only caller of this function, and let that function report the error.
With the patch as it is, 2 messages would be reported for the single error.
Reporting the ut_strerr(err) alone is sufficient; nobody is interested in the enum dberr_t numeric values.

                        DBUG_EXECUTE_IF(
                                "innodb_page_corruption_retries",
-                               retries = BUF_PAGE_READ_MAX_RETRIES;
+                               goto force_fail;
                        );

This change is wrong. The idea of the debug instrumentation is to speed up the failure (not retry reading the corrupted page for 100 times).
The test must not crash even when this instrumentation is not present.
 
+                        /* Try to set table as corrupted instead of
+                        asserting. */
+                        if (space > TRX_SYS_SPACE &&
+                            dict_set_corrupted_by_space(space)) {
+                                return (NULL);
                        }

Why should we commit suicide when a page in the system tablespace is corrupted? I would just return NULL from here in any case.

-       if (!bpage->encrypted) {
-               ut_ad(buf_pool->n_pend_reads > 0);
-               buf_pool->n_pend_reads--;
+       /* After this point bpage can't be referenced. */
+       buf_LRU_free_one_page(bpage);

-               buf_pool_mutex_exit(buf_pool);
-       }
+       ut_ad(buf_pool->n_pend_reads > 0);
+       buf_pool->n_pend_reads--;

-       return(ret);
+       buf_pool_mutex_exit(buf_pool);
 }

Elsewhere, MariaDB 10.1 is using atomic memory access for the field buf_pool->n_pend_reads. We must be consistent.
If some reader or writer is using atomic memory access and no buf_pool->mutex at all, then every access must use atomic memory access, no matter if holding buf_pool->mutex.

@@ -4708,31 +4696,12 @@ buf_page_io_complete(
                                /* If page space id is larger than TRX_SYS_SPACE
                                (0), we will attempt to mark the corresponding
                                table as corrupted instead of crashing server */
-                               if (bpage->space > TRX_SYS_SPACE
-                                   && buf_mark_space_corrupt(bpage)) {
-                                       return(false);
+                               if (bpage->space > TRX_SYS_SPACE) {
+                                       buf_mark_space_corrupt(bpage);
+                                       return(err);
                                } else {
-                                       if (!bpage->encrypted) {
-                                               ib_logf(IB_LOG_LEVEL_ERROR,
-                                                       "Ending processing because of a corrupt database page.");
-
-                                               ut_error;
-                                       }
-
-                                       ib_push_warning(innobase_get_trx(), DB_DECRYPTION_FAILED,
-                                               "Table in tablespace %lu encrypted."
-                                               "However key management plugin or used key_id %lu is not found or"
-                                               " used encryption algorithm or method does not match."
-                                               " Can't continue opening the table.",
-                                               bpage->space, bpage->key_version);
-
-                                       if (bpage->encrypted && bpage->space > TRX_SYS_SPACE) {
-                                               buf_mark_space_corrupt(bpage);
-                                       } else {
-                                               ut_error;
-                                       }
-
-                                       return(false);
+                                       ib_logf(IB_LOG_LEVEL_FATAL,
+                                               "Ending processing because of a corrupt database page.");
                                }
                        }
                }

Again, why commit suicide if a page in the system tablespace cannot be read? I think that we should try our best to allow any data to be rescue, and not crash early.

diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 3f03ee8256f..e805585ed1e 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -956,7 +956,10 @@ buf_flush_write_block_low(

                /* true means we want to evict this page from the
                LRU list as well. */
-               buf_page_io_complete(bpage, true);
+               dberr_t err = buf_page_io_complete(bpage, true);
+
+               /* This is write path, encryption should not fail here. */
+               ut_ad(err == DB_SUCCESS);
        }

        /* Increment the counter of I/O operations used

I think I already noted in an earlier review that a warning for the unused variable "err" would be issued for the non-debug build. Please build both non-debug and debug, and pay attention to the warnings!

@@ -444,23 +464,23 @@ High-level function which reads a page asynchronously from a file to the
 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
 released by the i/o-handler thread.
-@return TRUE if page has been read in, FALSE in case of failure */
+@param[in]     space           Tablespace id
+@param[in]     offset          Page no */
 UNIV_INTERN
-ibool
+void
 buf_read_page_async(
-/*================*/
-       ulint   space,  /*!< in: space id */
-       ulint   offset) /*!< in: page number */
+       ulint   space,
+       ulint   offset)

Add a function comment that this function is only used for restoring buffer pool dumps, and that we do not care if the page exists or if the read succeeded.

@@ -728,16 +748,30 @@ buf_read_ahead_linear(
                        count += buf_read_page_low(
                                &err, false,
                                ibuf_mode,
-                               space, zip_size, FALSE, tablespace_version, i, NULL);
-                       if (err == DB_TABLESPACE_DELETED) {
-                               ut_print_timestamp(stderr);
-                               fprintf(stderr,
-                                       "  InnoDB: Warning: in"
-                                       " linear readahead trying to access\n"
-                                       "InnoDB: tablespace %lu page %lu,\n"
-                                       "InnoDB: but the tablespace does not"
-                                       " exist or is just being dropped.\n",
-                                       (ulong) space, (ulong) i);
+                               space, zip_size, FALSE, tablespace_version, i);
+
+                       switch(err) {
+                       case DB_SUCCESS:
+                               break;
+                       case DB_TABLESPACE_DELETED:
+                               ib_logf(IB_LOG_LEVEL_WARN,
+                                       "In random"
+                                       "readahead trying to access"
+                                       "tablespace " ULINTPF " page " ULINTPF
+                                       " but the tablespace does not"
+                                       " exist or is just being dropped.",
+                                       space, i);
+                               break;
+
+                       case DB_DECRYPTION_FAILED:
+                               ib_logf(IB_LOG_LEVEL_ERROR,
+                                       "In random readahead read page " ULINTPF
+                                       " from tablespace " ULINTPF
+                                       " but page is still encrypted.",
+                                       i, space);
+                               break;
+                       default:
+                               ut_error;
                        }

You seem to have missed my earlier review comment where I noted that the message about "linear readahead" is misleadingly replaced by messages mentioning "random readahead".
 
@@ -829,6 +863,13 @@ buf_read_ibuf_merge_pages(
                                                      page_nos[i],
                                                      zip_size, FALSE);
                }
+
+               if (err == DB_DECRYPTION_FAILED) {
+                       ib_logf(IB_LOG_LEVEL_ERROR,
+                               "Read page " ULINTPF " from tablespace " ULINTPF
+                               " for insert buffer but page was encrypted.",
+                               page_nos[i], space_ids[i]);
+               }
        }

        os_aio_simulated_wake_handler_threads();

It would be clearer to say in the error message that we were unable to decrypt the page.
 
@@ -2192,7 +2192,7 @@ dict_load_table_low(

        (*table)->id = mach_read_from_8(field);

-       (*table)->ibd_file_missing = FALSE;
+       (*table)->file_unreadable = FALSE;

        return(NULL);
 }
@@ -2379,7 +2379,7 @@ dict_load_table(
                        "Table '%s' tablespace is set as discarded.",
                        table_name);

-               table->ibd_file_missing = TRUE;
+               table->file_unreadable = true;

        } else if (!fil_space_for_table_exists_in_mem(
                        table->space, name, false, true, heap,
@@ -2387,7 +2387,7 @@ dict_load_table(

                if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
                        /* Do not bother to retry opening temporary tables. */
-                       table->ibd_file_missing = TRUE;
+                       table->file_unreadable = true;

                } else {
                        if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) {
@@ -2422,7 +2422,7 @@ dict_load_table(
                                /* We failed to find a sensible
                                tablespace file */

-                               table->ibd_file_missing = TRUE;
+                               table->file_unreadable = TRUE;
                        }
                        if (filepath) {
                                mem_free(filepath);

This is not consistent. Sometimes you are using true, sometimes TRUE or FALSE for the unsigned:1 bit-field. Either way is fine, but be consistent.
 
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc
index 7bef91f47ff..6a54ec6df35 100644
--- a/storage/innobase/fil/fil0crypt.cc
+++ b/storage/innobase/fil/fil0crypt.cc
@@ -541,6 +541,11 @@ fil_parse_write_crypt_data(
                fil_space_release(space);
        }

+       /* Check is used key found from encryption plugin */
+       if (crypt_data->should_encrypt() && !crypt_data->is_key_found()) {
+               return NULL;
+       }
+
        return ptr;
 }


As far as I can understand, this is prematurely ending the redo log recovery, without returning any error!
This is not acceptable. Please add a proper test for this and ensure that an error is being returned.
 
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 627a63aaae0..079803eedf3 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -6292,6 +6292,8 @@ fil_aio_wait(
        mutex_enter(&fil_system->mutex);

        fil_node_complete_io(fil_node, fil_system, type);
+       ulint purpose = fil_node->space->purpose;
+       ulint space_id = fil_node->space->id;

        mutex_exit(&fil_system->mutex);


Why assign fil_node->space->purpose to a local variable? It is only referenced once.
 
@@ -6303,9 +6305,29 @@ fil_aio_wait(
        deadlocks in the i/o system. We keep tablespace 0 data files always
        open, and use a special i/o thread to serve insert buffer requests. */

-       if (fil_node->space->purpose == FIL_TABLESPACE) {
+       if (purpose == FIL_TABLESPACE) {
                srv_set_io_thread_op_info(segment, "complete io for buf page");
-               buf_page_io_complete(static_cast<buf_page_t*>(message));
+               buf_page_t* bpage = static_cast<buf_page_t*>(message);
+               ulint offset = bpage ? bpage->offset : ULINT_UNDEFINED;
+
+               dberr_t err = buf_page_io_complete(bpage);
+
+               if (err != DB_SUCCESS) {
+                       ib_log_level_t level = IB_LOG_LEVEL_FATAL;
+
+                       /* In crash recovery set log corruption on
+                       and produce only an error to fail InnoDB startup. */
+                       if (recv_recovery_is_on()) {
+                               recv_sys->found_corrupt_log = true;
+                               level = IB_LOG_LEVEL_ERROR;
+                       }
+
+                       ib_logf(level,
+                               "%s operation failed for tablespace "
+                               ULINTPF " offset " ULINTPF " error=%s (%d).",
+                               type == OS_FILE_WRITE ? "Write" : "Read",
+                               space_id, offset, ut_strerr(err),err);
+               }
        } else {
                srv_set_io_thread_op_info(segment, "complete io for log");
                log_io_complete(static_cast<log_group_t*>(message));

Can this really ever fail for writes? Please add a debug assertion and simplify the message.
There is no need to display the numeric value of dberr_t; the ut_strerr() suffices.

diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index 1cd13824542..87b48de6389 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -357,6 +357,11 @@ btr_pcur_move_to_next(
                        return(FALSE);
                }

+               if (cursor->btr_cur.index->table->corrupted ||
+                   cursor->btr_cur.index->table->file_unreadable) {
+                       return(FALSE);
+               }
+
                btr_pcur_move_to_next_page(cursor, mtr);

                return(TRUE);

Is this change really needed? Can we merge the "corrupted" flag to "file_unreadable" to make the check less expensive?

diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 9ca1c46d72b..1ff5cf721bf 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -274,9 +274,10 @@ recv_sys_var_init(void);
 #endif /* !UNIV_HOTBACKUP */
 /** Apply the hash table of stored log records to persistent data pages.
 @param[in]     last_batch      whether the change buffer merge will be
-                               performed as part of the operation */
+                               performed as part of the operation
+@return DB_SUCCESS or DB_DECRYPTION_FAILED */
 UNIV_INTERN
-void
+dberr_t
 recv_apply_hashed_log_recs(bool last_batch);

Please try to keep the return value as void, or alternatively add MY_ATTRIBUTE((warn_unused_result)) and fix all callers. I do not think it is easy to fix log_checkpoint(),

 
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 45fd42848e8..e56d4a46650 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -1298,6 +1298,10 @@ recv_parse_or_apply_log_rec_body(
                break;
        case MLOG_FILE_WRITE_CRYPT_DATA:
                ptr = const_cast<byte*>(fil_parse_write_crypt_data(ptr, end_ptr, block));
+
+               if (!ptr) {
+                       recv_sys->found_corrupt_log = TRUE;
+               }
                break;
        default:
                ptr = NULL;

If block==NULL, ptr=NULL is returned on EOF. We do not want to have random bogus failures in redo log recovery!
Even if a redo log record happens to be split across multiple redo log segments, we must parse the whole redo log!
 
@@ -3549,7 +3553,7 @@ row_truncate_table_for_mysql(
                                        "create a new tablespace",
                                        table->name);

-                               table->ibd_file_missing = 1;
+                               table->file_unreadable = 1;
                                err = DB_ERROR;
                                goto funct_exit;
                        }

Here we are assigning yet another literal to the field. Earlier, FALSE, TRUE, and true have been observed. Be consistent. Or even better, add a method
void dict_table_t::set_unreadable(bool unreadable = true)
and use it.
 
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index dab1bc58db5..8f73ed4d49a 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -3714,11 +3714,12 @@ row_search_for_mysql(

                return(DB_TABLESPACE_DELETED);

-       } else if (prebuilt->table->ibd_file_missing) {
+       } else if (prebuilt->table->file_unreadable &&
+                  fil_space_get(prebuilt->table->space) == NULL) {

                return(DB_TABLESPACE_NOT_FOUND);

-       } else if (prebuilt->table->is_encrypted) {
+       } else if (prebuilt->table->file_unreadable) {

                return(DB_DECRYPTION_FAILED);
        } else if (!prebuilt->index_usable) {

I would add an method

bool dict_table_t::is_file_missing() const

and use it instead of duplicating the code.
 
Marko
--
DON’T MISS
M|17
April 11 - 12, 2017
The Conrad Hotel
New York City
https://m17.mariadb.com/

Marko Mäkelä, Lead Developer InnoDB
MariaDB Corporation