Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MDEV-34898 Doublewrite recovery of innodb_checksum_algorithm=full_crc32 encrypted pages does not work #3691

Open
wants to merge 1 commit into
base: 10.6
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mysql-test/suite/encryption/r/debug_key_management.result
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=[0-9]*\\] in file .*");
create table t1(a serial) engine=innoDB;
set global innodb_encrypt_tables=ON;
show variables like 'innodb_encrypt%';
Expand Down
47 changes: 47 additions & 0 deletions mysql-test/suite/encryption/r/doublewrite_debug.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=[0-9]*\\] in file .*");
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;
start transaction;
insert into t1 values(1, repeat('#',12));
insert into t1 values(2, repeat('+',12));
insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1;
commit work;
SET GLOBAL innodb_fast_shutdown = 0;
# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name="test/t1";
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name="test/t2";
begin;
insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400));
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
set global innodb_buf_flush_list_now = 1;
# Kill the server
# restart
FOUND 2 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
check table t2;
Table Op Msg_type Msg_text
test.t2 check status OK
select f1, f2 from t1;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
select f1, f2 from t2;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
drop table t2, t1;
1 change: 1 addition & 0 deletions mysql-test/suite/encryption/t/debug_key_management.test
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
-- source include/innodb_undo_tablespaces.inc
-- source include/not_embedded.inc

call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=[0-9]*\\] in file .*");
if (`select count(*) = 0 from information_schema.plugins
where plugin_name = 'debug_key_management' and plugin_status='active'`)
{
Expand Down
3 changes: 3 additions & 0 deletions mysql-test/suite/encryption/t/doublewrite_debug.opt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--innodb-use-atomic-writes=0
--innodb-encrypt-tables=FORCE
--innodb_sys_tablespaces
105 changes: 105 additions & 0 deletions mysql-test/suite/encryption/t/doublewrite_debug.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/not_embedded.inc
--source include/have_example_key_management_plugin.inc
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=[0-9]*\\] in file .*");

let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
let MYSQLD_DATADIR=`select @@datadir`;
let ALGO=`select @@innodb_checksum_algorithm`;

create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;

start transaction;
insert into t1 values(1, repeat('#',12));
insert into t1 values(2, repeat('+',12));
insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1;
commit work;

# Slow shutdown and restart to make sure ibuf merge is finished
SET GLOBAL innodb_fast_shutdown = 0;
let $shutdown_timeout=;
let $restart_parameters=--debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0;
--source include/restart_mysqld.inc
--source ../../suite/innodb/include/no_checkpoint_start.inc

select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name="test/t1";
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name="test/t2";

begin;
insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400));

set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;

set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id;

set global innodb_buf_flush_list_now = 1;
--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, unexpected_checkpoint;
--source ../../suite/innodb/include/no_checkpoint_end.inc

# Corrupt the page 3 in t1.ibd, t2.ibd file
perl;
use IO::Handle;
do "$ENV{MTR_SUITE_DIR}/include/crc32.pl";
my $polynomial = 0x82f63b78; # CRC-32C
my $algo = $ENV{ALGO};
die "Unsupported innodb_checksum_algorithm=$algo\n" unless $algo =~ /crc32/;

my $fname= "$ENV{'MYSQLD_DATADIR'}test/t1.ibd";
my $page_size = $ENV{INNODB_PAGE_SIZE};
my $page;
do "$ENV{MTR_SUITE_DIR}/../innodb/include/crc32.pl";
open(FILE, "+<", $fname) or die;
sysseek(FILE, 3*$page_size, 0);
sysread(FILE, $page, $page_size)==$page_size||die "Unable to read $name\n";
sysseek(FILE, 3*$page_size, 0)||die "Unable to seek $fname\n";
my $corrupted = $page;
# Set FIL_PAGE_LSN to the maximum
substr($corrupted, 16, 8) = chr(255) x 8;
substr($corrupted, $page_size - 8, 8) = chr(255) x 8;
if ($algo =~ /full_crc32/)
{
my $ck = mycrc32(substr($corrupted, 0, $page_size - 4), 0, $polynomial);
substr($corrupted, $page_size - 4, 4) = pack("N", $ck);
}
else
{
# Replace the innodb_checksum_algorithm=crc32 checksum
my $ck= pack("N",
mycrc32(substr($corrupted, 4, 22), 0, $polynomial) ^
mycrc32(substr($corrupted_, 38, $page_size - 38 - 8), 0,
$polynomial));
substr ($corrupted, 0, 4) = $ck;
substr ($corrupted, $page_size - 8, 4) = $ck;
}
syswrite(FILE, $corrupted);
close FILE;

my $fname= "$ENV{'MYSQLD_DATADIR'}test/t2.ibd";
open(FILE, "+<", $fname) or die;
FILE->autoflush(1);
binmode FILE;
seek(FILE, 3 * $ENV{'INNODB_PAGE_SIZE'}, SEEK_SET);
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
close FILE;
EOF

Comment on lines +85 to +93
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked that FIL_PAGE_LSN is not encrypted on the page. Therefore, we should be able to corrupt just that (for example, write chr(255) x 8, which will be newer than end of the log) and recompute the checksum. I think that it would be more interesting than this "totally corrupted" page.

Can you please also do a test variant that corrupts this page in the doublewrite buffer, with an LSN that is before the checkpoint? That should lead to InnoDB crash recovery failing. Better make it two attempts: also with an LSN that is after the end of the log.

# Successful recover from doublewrite buffer
let $restart_parameters=;
--source include/start_mysqld.inc
let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
let SEARCH_PATTERN=InnoDB: Recovered page \\[page id: space=[1-9]*, page number=3\\];
--source include/search_pattern_in_file.inc

check table t1;
check table t2;
select f1, f2 from t1;
select f1, f2 from t2;
drop table t2, t1;
11 changes: 11 additions & 0 deletions storage/innobase/buf/buf0buf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3779,6 +3779,16 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node)
if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
{
release_page:
if (node.space->full_crc32() && node.space->crypt_data &&
recv_recovery_is_on() &&
recv_sys.dblwr.find_encrypted_page(node, id().page_no(),
const_cast<byte*>(read_frame)))
{
/* Recover from doublewrite buffer */
err= DB_SUCCESS;
goto success_page;
}

if (recv_sys.free_corrupted_page(expected_id, node));
else if (err == DB_FAIL)
err= DB_PAGE_CORRUPTED;
Expand All @@ -3800,6 +3810,7 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node)
buf_pool.corrupted_evict(this, buf_page_t::READ_FIX);
return err;
}
success_page:

const bool recovery= recv_recovery_is_on();

Expand Down
13 changes: 12 additions & 1 deletion storage/innobase/buf/buf0dblwr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ void buf_dblwr_t::recover()
srv_page_size));
byte *const buf= read_buf + srv_page_size;

std::deque<byte*> encrypted_pages;
for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin();
i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr)
{
Expand All @@ -385,8 +386,16 @@ void buf_dblwr_t::recover()
fil_space_t *space= fil_space_t::get(space_id);

if (!space)
/* The tablespace that this page once belonged to does not exist */
{
/* These pages doesn't belong to existing tablespace.
There is a possibility that these pages could be
encrypted using full_crc32 format. If innodb encounters
any corrupted encrypted page during recovery then
InnoDB should use this page to find the valid page.
See find_encrypted_page() */
encrypted_pages.push_back(*i);
continue;
}

if (UNIV_UNLIKELY(page_no >= space->get_size()))
{
Expand Down Expand Up @@ -465,6 +474,8 @@ void buf_dblwr_t::recover()
}

recv_sys.dblwr.pages.clear();
for (auto it : encrypted_pages)
recv_sys.dblwr.pages.push_back(it);
fil_flush_file_spaces();
aligned_free(read_buf);
}
Expand Down
16 changes: 14 additions & 2 deletions storage/innobase/include/log0recv.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,17 @@ struct recv_dblwr_t
const fil_space_t *space= nullptr,
byte *tmp_buf= nullptr) const noexcept;

/** Find the doublewrite copy of an encrypted page with the
smallest FIL_PAGE_LSN that is large enough for recovery.
@param space tablespace object
@param page_no page number to find
@param buf unencrypted page
@retval true if encrypted page found in doublewrite buffer
@retval false otherwise */
bool find_encrypted_page(const fil_node_t &space,
uint32_t page_no,
byte *buf);

/** Restore the first page of the given tablespace from
doublewrite buffer.
1) Find the page which has page_no as 0
Expand Down Expand Up @@ -257,8 +268,9 @@ struct recv_sys_t
during log scan or apply */
bool found_corrupt_fs;
public:
/** whether we are applying redo log records during crash recovery */
bool recovery_on;
/** whether we are applying redo log records during crash recovery.
This is protected by recv_sys.mutex */
Atomic_relaxed<bool> recovery_on= false;
/** whether recv_recover_page(), invoked from buf_page_t::read_complete(),
should apply log records*/
bool apply_log_recs;
Expand Down
42 changes: 42 additions & 0 deletions storage/innobase/log/log0recv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3785,6 +3785,7 @@ void recv_sys_t::apply(bool last_batch)
/* We skipped this in buf_page_create(). */
mlog_init.mark_ibuf_exist();
mlog_init.clear();
dblwr.pages.clear();
}
else
{
Expand Down Expand Up @@ -4841,6 +4842,47 @@ bool recv_dblwr_t::validate_page(const page_id_t page_id, lsn_t max_lsn,
goto check_if_corrupted;
}

bool recv_dblwr_t::find_encrypted_page(const fil_node_t &node,
uint32_t page_no,
byte *buf)
{
ut_ad(node.space->crypt_data);
ut_ad(node.space->full_crc32());
mysql_mutex_lock(&recv_sys.mutex);
for (list::iterator page_it= pages.begin(); page_it != pages.end();
page_it++)
{
const byte *page= *page_it;
if (page_get_page_no(page) != page_no ||
buf_page_is_corrupted(true, page, node.space->flags))
continue;
memcpy(buf, page, node.space->physical_size());
buf_tmp_buffer_t* slot= buf_pool.io_buf_reserve(false);
ut_a(slot);
slot->allocate();
bool invalidate=
!fil_space_decrypt(node.space, slot->crypt_buf, buf) ||
(node.space->is_compressed() &&
!fil_page_decompress(slot->crypt_buf, buf, node.space->flags));
slot->release();

if (invalidate ||
mach_read_from_4(buf + FIL_PAGE_SPACE_ID) != node.space->id)
continue;

pages.erase(page_it);
sql_print_information("InnoDB: Recovered page [page id: space="
UINT32PF ", page number=" UINT32PF "] "
"to '%s' from the doublewrite buffer.",
uint32_t(node.space->id), page_no,
node.name);
mysql_mutex_unlock(&recv_sys.mutex);
return true;
}
mysql_mutex_unlock(&recv_sys.mutex);
return false;
}

const byte *recv_dblwr_t::find_page(const page_id_t page_id, lsn_t max_lsn,
const fil_space_t *space, byte *tmp_buf)
const noexcept
Expand Down
9 changes: 3 additions & 6 deletions storage/innobase/srv/srv0start.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,8 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
srv_startup_is_before_trx_rollback_phase = false;
}

/* Enable checkpoints in buf_flush_page_cleaner(). */
recv_sys.recovery_on = false;
Thirunarayanan marked this conversation as resolved.
Show resolved Hide resolved
ut_ad(!recv_sys.recovery_on);

mysql_mutex_unlock(&log_sys.mutex);

log_make_checkpoint();
Expand Down Expand Up @@ -1351,8 +1351,7 @@ dberr_t srv_start(bool create_new_db)
return(srv_init_abort(DB_ERROR));
}

/* Enable checkpoints in the page cleaner. */
recv_sys.recovery_on = false;
Thirunarayanan marked this conversation as resolved.
Show resolved Hide resolved
ut_ad(!recv_sys.recovery_on);

err= recv_recovery_read_max_checkpoint();

Expand Down Expand Up @@ -1507,8 +1506,6 @@ dberr_t srv_start(bool create_new_db)
: recv_recovery_from_checkpoint_start(flushed_lsn);
recv_sys.close_files();

recv_sys.dblwr.pages.clear();
Thirunarayanan marked this conversation as resolved.
Show resolved Hide resolved

if (err != DB_SUCCESS) {
return(srv_init_abort(err));
}
Expand Down