Skip to content

Commit

Permalink
MDEV-34898 Doublewrite recovery of innodb_checksum_algorithm=full_crc…
Browse files Browse the repository at this point in the history
…32 encrypted pages does not work

- InnoDB fails to recover the full crc32 encrypted page from
doublewrite buffer. The reason is that buf_dblwr_t::recover()
fails to identify the space id from the page because the page has
been encrypted from FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION bytes.

Fix:
===
buf_dblwr_t::recover(): preserve any pages whose space_id
does not match a known tablespace. These could be encrypted pages
of tablespaces that had been created with
innodb_checksum_algorithm=full_crc32.

buf_page_t::read_complete(): If the page looks corrupted and the
tablespace is encrypted and in full_crc32 format, try to
restore the page from doublewrite buffer.

recv_dblwr_t::recover_encrypted_page(): Find the page which
has the same page number and try to decrypt the page using
space->crypt_data. After decryption, compare the space id.
Write the recovered page back to the file.
  • Loading branch information
Thirunarayanan committed Dec 13, 2024
1 parent 155203c commit 4873034
Show file tree
Hide file tree
Showing 10 changed files with 206 additions and 10 deletions.
1 change: 1 addition & 0 deletions mysql-test/suite/encryption/r/debug_key_management.result
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=[0-9]*\\] in file .*");
create table t1(a serial) engine=innoDB;
set global innodb_encrypt_tables=ON;
show variables like 'innodb_encrypt%';
Expand Down
46 changes: 46 additions & 0 deletions mysql-test/suite/encryption/r/doublewrite_debug.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;
start transaction;
insert into t1 values(1, repeat('#',12));
insert into t1 values(2, repeat('+',12));
insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1;
commit work;
SET GLOBAL innodb_fast_shutdown = 0;
# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name="test/t1";
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name="test/t2";
begin;
insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400));
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
set global innodb_buf_flush_list_now = 1;
# Kill the server
# restart
FOUND 2 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
check table t2;
Table Op Msg_type Msg_text
test.t2 check status OK
select f1, f2 from t1;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
select f1, f2 from t2;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
drop table t2, t1;
1 change: 1 addition & 0 deletions mysql-test/suite/encryption/t/debug_key_management.test
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
-- source include/innodb_undo_tablespaces.inc
-- source include/not_embedded.inc

call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=[0-9]*\\] in file .*");
if (`select count(*) = 0 from information_schema.plugins
where plugin_name = 'debug_key_management' and plugin_status='active'`)
{
Expand Down
3 changes: 3 additions & 0 deletions mysql-test/suite/encryption/t/doublewrite_debug.opt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--innodb-use-atomic-writes=0
--innodb-encrypt-tables=FORCE
--innodb_sys_tablespaces
75 changes: 75 additions & 0 deletions mysql-test/suite/encryption/t/doublewrite_debug.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/not_embedded.inc
--source include/have_example_key_management_plugin.inc

let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
let MYSQLD_DATADIR=`select @@datadir`;

create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;

start transaction;
insert into t1 values(1, repeat('#',12));
insert into t1 values(2, repeat('+',12));
insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1;
commit work;

# Slow shutdown and restart to make sure ibuf merge is finished
SET GLOBAL innodb_fast_shutdown = 0;
let $shutdown_timeout=;
let $restart_parameters=--debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0;
--source include/restart_mysqld.inc
--source ../../suite/innodb/include/no_checkpoint_start.inc

select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name="test/t1";
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name="test/t2";

begin;
insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400));

set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;

set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id;

set global innodb_buf_flush_list_now = 1;
--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, unexpected_checkpoint;
--source ../../suite/innodb/include/no_checkpoint_end.inc

# Corrupt the page 3 in t1.ibd, t2.ibd file
perl;
use IO::Handle;
my $fname= "$ENV{'MYSQLD_DATADIR'}test/t1.ibd";
open(FILE, "+<", $fname) or die;
FILE->autoflush(1);
binmode FILE;
seek(FILE, 3 * $ENV{'INNODB_PAGE_SIZE'}, SEEK_SET);
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
close FILE;

my $fname= "$ENV{'MYSQLD_DATADIR'}test/t2.ibd";
open(FILE, "+<", $fname) or die;
FILE->autoflush(1);
binmode FILE;
seek(FILE, 3 * $ENV{'INNODB_PAGE_SIZE'}, SEEK_SET);
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
close FILE;
EOF

let $restart_parameters=;
--source include/start_mysqld.inc
let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
let SEARCH_PATTERN=InnoDB: Recovered page \\[page id: space=[1-9]*, page number=3\\];
--source include/search_pattern_in_file.inc

check table t1;
check table t2;
select f1, f2 from t1;
select f1, f2 from t2;
drop table t2, t1;
11 changes: 11 additions & 0 deletions storage/innobase/buf/buf0buf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3777,6 +3777,16 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node)
if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
{
release_page:
if (node.space->full_crc32() && node.space->crypt_data &&
recv_recovery_is_on() &&
recv_sys.dblwr.find_encrypted_page(node, id().page_no(),
const_cast<byte*>(read_frame)))
{
/* Recover from doublewrite buffer */
err= DB_SUCCESS;
goto success_page;
}

if (recv_sys.free_corrupted_page(expected_id, node));
else if (err == DB_FAIL)
err= DB_PAGE_CORRUPTED;
Expand All @@ -3798,6 +3808,7 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node)
buf_pool.corrupted_evict(this, buf_page_t::READ_FIX);
return err;
}
success_page:

const bool recovery= recv_recovery_is_on();

Expand Down
10 changes: 9 additions & 1 deletion storage/innobase/buf/buf0dblwr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ void buf_dblwr_t::recover()
srv_page_size));
byte *const buf= read_buf + srv_page_size;

std::deque<byte*> encrypted_pages;
for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin();
i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr)
{
Expand All @@ -385,8 +386,13 @@ void buf_dblwr_t::recover()
fil_space_t *space= fil_space_t::get(space_id);

if (!space)
/* The tablespace that this page once belonged to does not exist */
{
/* The tablespace that this page once belonged to does not
exist and validate the lsn for these pages in
find_encrypted_page(). */
encrypted_pages.push_back(*i);
continue;
}

if (UNIV_UNLIKELY(page_no >= space->get_size()))
{
Expand Down Expand Up @@ -465,6 +471,8 @@ void buf_dblwr_t::recover()
}

recv_sys.dblwr.pages.clear();
for (auto it : encrypted_pages)
recv_sys.dblwr.pages.push_back(it);
fil_flush_file_spaces();
aligned_free(read_buf);
}
Expand Down
16 changes: 14 additions & 2 deletions storage/innobase/include/log0recv.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,17 @@ struct recv_dblwr_t
const fil_space_t *space= nullptr,
byte *tmp_buf= nullptr) const noexcept;

/** Find the doublewrite copy of an encrypted page with the
smallest FIL_PAGE_LSN that is large enough for recovery.
@param space tablespace object
@param page_no page number to find
@param buf unencrypted page
@retval true if encrypted page found in doublewrite buffer
@retval false otherwise */
bool find_encrypted_page(const fil_node_t &space,
uint32_t page_no,
byte *buf);

/** Restore the first page of the given tablespace from
doublewrite buffer.
1) Find the page which has page_no as 0
Expand Down Expand Up @@ -257,8 +268,9 @@ struct recv_sys_t
during log scan or apply */
bool found_corrupt_fs;
public:
/** whether we are applying redo log records during crash recovery */
bool recovery_on;
/** whether we are applying redo log records during crash recovery.
This is protected by recv_sys.mutex */
Atomic_relaxed<bool> recovery_on= false;
/** whether recv_recover_page(), invoked from buf_page_t::read_complete(),
should apply log records*/
bool apply_log_recs;
Expand Down
46 changes: 46 additions & 0 deletions storage/innobase/log/log0recv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3811,6 +3811,8 @@ void recv_sys_t::apply(bool last_batch)

mysql_mutex_lock(&mutex);

if (last_batch)
dblwr.pages.clear();
ut_d(after_apply= true);
clear();
mysql_mutex_unlock(&mutex);
Expand Down Expand Up @@ -4841,6 +4843,50 @@ bool recv_dblwr_t::validate_page(const page_id_t page_id, lsn_t max_lsn,
goto check_if_corrupted;
}

bool recv_dblwr_t::find_encrypted_page(const fil_node_t &node,
uint32_t page_no,
byte *buf)
{
ut_ad(node.space->crypt_data);
ut_ad(node.space->full_crc32());
mysql_mutex_lock(&recv_sys.mutex);
const lsn_t max_lsn{log_sys.get_lsn()};
for (list::iterator page_it= pages.begin(); page_it != pages.end();
page_it++)
{
const byte *page= *page_it;
if (page_get_page_no(page) != page_no)
continue;
const lsn_t lsn= mach_read_from_8(page + FIL_PAGE_LSN);
if (buf_page_is_corrupted(lsn <= max_lsn, page, node.space->flags))
continue;
memcpy(buf, page, node.space->physical_size());
buf_tmp_buffer_t* slot= buf_pool.io_buf_reserve(false);
ut_a(slot);
slot->allocate();
bool invalidate=
!fil_space_decrypt(node.space, slot->crypt_buf, buf) ||
(node.space->is_compressed() &&
!fil_page_decompress(slot->crypt_buf, buf, node.space->flags));
slot->release();

if (invalidate ||
mach_read_from_4(buf + FIL_PAGE_SPACE_ID) != node.space->id)
continue;

pages.erase(page_it);
sql_print_information("InnoDB: Recovered page [page id: space="
UINT32PF ", page number=" UINT32PF "] "
"to '%s' from the doublewrite buffer.",
uint32_t(node.space->id), page_no,
node.name);
mysql_mutex_unlock(&recv_sys.mutex);
return true;
}
mysql_mutex_unlock(&recv_sys.mutex);
return false;
}

const byte *recv_dblwr_t::find_page(const page_id_t page_id, lsn_t max_lsn,
const fil_space_t *space, byte *tmp_buf)
const noexcept
Expand Down
7 changes: 0 additions & 7 deletions storage/innobase/srv/srv0start.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,6 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
srv_startup_is_before_trx_rollback_phase = false;
}

/* Enable checkpoints in buf_flush_page_cleaner(). */
recv_sys.recovery_on = false;
mysql_mutex_unlock(&log_sys.mutex);

log_make_checkpoint();
Expand Down Expand Up @@ -1351,9 +1349,6 @@ dberr_t srv_start(bool create_new_db)
return(srv_init_abort(DB_ERROR));
}

/* Enable checkpoints in the page cleaner. */
recv_sys.recovery_on = false;

err= recv_recovery_read_max_checkpoint();

if (err != DB_SUCCESS) {
Expand Down Expand Up @@ -1507,8 +1502,6 @@ dberr_t srv_start(bool create_new_db)
: recv_recovery_from_checkpoint_start(flushed_lsn);
recv_sys.close_files();

recv_sys.dblwr.pages.clear();

if (err != DB_SUCCESS) {
return(srv_init_abort(err));
}
Expand Down

0 comments on commit 4873034

Please sign in to comment.