Skip to content

Commit

Permalink
fix: parse timestamp from the name of data dir for gc instead of the …
Browse files Browse the repository at this point in the history
…last update time (#1667)

#1673

It's found that sometimes the data directories of replicas are removed
immediately after they are renamed with postfixes `.err/.gar`, though
actually both of `gc_disk_error_replica_interval_seconds` and
`gc_disk_garbage_replica_interval_seconds` have been configured
with at least one day.

The reason is that the base time for expiration time is **the last write
time**, that is, `st_mtime` within `struct stat` returned by `stat()`.
Once a long time has passed since the last write time, the data directory
will be removed immediately after it is renamed with postfixes `.err/.gar`.

To fix this problem, just use the timestamp within the directory name as
the base time that is generated when the data directory is renamed with
postfixes `.err/.gar`. The last update time would be used iff the timestamp
is NOT found within the directory name.
  • Loading branch information
empiredan authored Nov 7, 2023
1 parent 724ec0b commit ee369d3
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 75 deletions.
200 changes: 167 additions & 33 deletions src/replica/disk_cleaner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

#include "disk_cleaner.h"

#include <boost/algorithm/string/predicate.hpp>
#include <fmt/core.h>
#include <stdint.h>
#include <sys/types.h>
#include <algorithm>
#include <atomic>
#include <cctype>

#include "common/fs_manager.h"
#include "metadata_types.h"
Expand All @@ -32,6 +34,9 @@
#include "utils/filesystem.h"
#include "utils/flags.h"
#include "utils/fmt_logging.h"
#include "utils/macros.h"
#include "utils/string_conv.h"
#include "utils/string_view.h"

namespace dsn {
namespace replication {
Expand Down Expand Up @@ -70,6 +75,108 @@ const std::string kFolderSuffixBak = ".bak";
const std::string kFolderSuffixOri = ".ori";
const std::string kFolderSuffixTmp = ".tmp";

namespace {

// TODO(wangdan): we could study later whether ctime (i.e. `st_ctime` within `struct stat`,
// the time of last status change) could be used instead of mtime (i.e. `st_ctime` within
// `struct stat`, the last write time), since ctime of the new directory would be updated
// to the current time once rename() is called, while mtime would not be updated.
bool get_expiration_timestamp_by_last_write_time(const std::string &path,
uint64_t delay_seconds,
uint64_t &expiration_timestamp_s)
{
time_t last_write_time_s;
if (!dsn::utils::filesystem::last_write_time(path, last_write_time_s)) {
LOG_WARNING("gc_disk: failed to get last write time of {}", path);
return false;
}

expiration_timestamp_s = static_cast<uint64_t>(last_write_time_s) + delay_seconds;
return true;
}

// Unix timestamp in microseconds for 2010-01-01 00:00:00 GMT+0000.
// This timestamp could be used as the minimum, since it's far earlier than the time when
// Pegasus was born.
#define MIN_TIMESTAMP_US 1262304000000000
#define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1)

// Parse timestamp from the directory name.
//
// There are only 2 kinds of directory names that could include timestamp: one is the faulty
// replicas whose name has suffix ".err"; another is the dropped replicas whose name has
// suffix ".gar". The examples for both kinds of directory names:
// 1.1.pegasus.1698843209235962.err
// 1.2.pegasus.1698843214240709.gar
//
// Specify the size of suffix by `suffix_size`. For both kinds of names (.err and .gar),
// `suffix_size` is 4.
//
// The timestamp is the number just before the suffix, between the 2 dots. For example, in
// 1.1.pegasus.1698843209235962.err, 1698843209235962 is the timestamp in microseconds,
// generated by dsn_now_us().
//
// `timestamp_us` is parsed result while returning true; otherwise, it would never be assigned.
bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &timestamp_us)
{
CHECK_GE(name.size(), suffix_size);

if (suffix_size == name.size()) {
return false;
}

const size_t end_idx = name.size() - suffix_size;
auto begin_idx = name.find_last_of('.', end_idx - 1);
if (begin_idx == std::string::npos || ++begin_idx >= end_idx) {
return false;
}

const auto length = end_idx - begin_idx;
if (length < MIN_TIMESTAMP_US_LENGTH) {
return false;
}

// std::isdigit() is not an addressable standard library function, thus it can't be used
// directly as an algorithm predicate.
//
// See following docs for details.
// https://stackoverflow.com/questions/75868796/differences-between-isdigit-and-stdisdigit
// https://en.cppreference.com/w/cpp/string/byte/isdigit
const auto begin_itr = name.cbegin() + begin_idx;
if (!std::all_of(
begin_itr, begin_itr + length, [](unsigned char c) { return std::isdigit(c); })) {
return false;
}

const auto ok =
dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, length), timestamp_us);
return ok ? timestamp_us > MIN_TIMESTAMP_US : false;
}

bool get_expiration_timestamp(const std::string &name,
const std::string &path,
size_t suffix_size,
uint64_t delay_seconds,
uint64_t &expiration_timestamp_s)
{
uint64_t timestamp_us = 0;
if (!parse_timestamp_us(name, suffix_size, timestamp_us)) {
// Once the timestamp could not be extracted from the directory name, the last write time
// would be used as the base time to compute the expiration time.
LOG_WARNING("gc_disk: failed to parse timestamp from {}, turn to "
"the last write time for {}",
name,
path);
return get_expiration_timestamp_by_last_write_time(
path, delay_seconds, expiration_timestamp_s);
}

expiration_timestamp_s = timestamp_us / 1000000 + delay_seconds;
return true;
}

} // anonymous namespace

error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &dir_nodes,
/*output*/ disk_cleaning_report &report)
{
Expand All @@ -87,59 +194,85 @@ error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &d
}
sub_list.insert(sub_list.end(), tmp_list.begin(), tmp_list.end());
}
for (auto &fpath : sub_list) {
auto name = dsn::utils::filesystem::get_file_name(fpath);
if (!is_data_dir_removable(name)) {
continue;
}
std::string folder_suffix = name.substr(name.length() - 4);

time_t mt;
if (!dsn::utils::filesystem::last_write_time(fpath, mt)) {
LOG_WARNING("gc_disk: failed to get last write time of {}", fpath);
continue;
}

auto last_write_time = (uint64_t)mt;
uint64_t current_time_ms = dsn_now_ms();
uint64_t remove_interval_seconds = current_time_ms / 1000;
for (const auto &path : sub_list) {
uint64_t expiration_timestamp_s = 0;

// don't delete ".bak" directory because it is backed by administrator.
if (folder_suffix == kFolderSuffixErr) {
// Note: don't delete ".bak" directory since it could be did by administrator.
const auto name = dsn::utils::filesystem::get_file_name(path);
if (boost::algorithm::ends_with(name, kFolderSuffixErr)) {
report.error_replica_count++;
remove_interval_seconds = FLAGS_gc_disk_error_replica_interval_seconds;
} else if (folder_suffix == kFolderSuffixGar) {
if (!get_expiration_timestamp(name,
path,
kFolderSuffixErr.size(),
FLAGS_gc_disk_error_replica_interval_seconds,
expiration_timestamp_s)) {
continue;
}
} else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) {
report.garbage_replica_count++;
remove_interval_seconds = FLAGS_gc_disk_garbage_replica_interval_seconds;
} else if (folder_suffix == kFolderSuffixTmp) {
if (!get_expiration_timestamp(name,
path,
kFolderSuffixGar.size(),
FLAGS_gc_disk_garbage_replica_interval_seconds,
expiration_timestamp_s)) {
continue;
}
} else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) {
report.disk_migrate_tmp_count++;
remove_interval_seconds = FLAGS_gc_disk_migration_tmp_replica_interval_seconds;
} else if (folder_suffix == kFolderSuffixOri) {
if (!get_expiration_timestamp_by_last_write_time(
path,
FLAGS_gc_disk_migration_tmp_replica_interval_seconds,
expiration_timestamp_s)) {
continue;
}
} else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) {
report.disk_migrate_origin_count++;
remove_interval_seconds = FLAGS_gc_disk_migration_origin_replica_interval_seconds;
if (!get_expiration_timestamp_by_last_write_time(
path,
FLAGS_gc_disk_migration_origin_replica_interval_seconds,
expiration_timestamp_s)) {
continue;
}
} else {
continue;
}

if (last_write_time + remove_interval_seconds <= current_time_ms / 1000) {
if (!dsn::utils::filesystem::remove_path(fpath)) {
LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}",
fpath,
dsn_now_ms() - current_time_ms);
} else {
const auto current_time_ms = dsn_now_ms();
if (expiration_timestamp_s <= current_time_ms / 1000) {
if (dsn::utils::filesystem::remove_path(path)) {
LOG_WARNING("gc_disk: replica_dir_op succeed to delete directory '{}'"
", time_used_ms = {}",
fpath,
path,
dsn_now_ms() - current_time_ms);
report.remove_dir_count++;
} else {
LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}",
path,
dsn_now_ms() - current_time_ms);
}
} else {
LOG_INFO("gc_disk: reserve directory '{}', wait_seconds = {}",
fpath,
last_write_time + remove_interval_seconds - current_time_ms / 1000);
path,
expiration_timestamp_s - current_time_ms / 1000);
}
}
return error_s::ok();
}

bool is_data_dir_removable(const std::string &dir)
{
return boost::algorithm::ends_with(dir, kFolderSuffixErr) ||
boost::algorithm::ends_with(dir, kFolderSuffixGar) ||
boost::algorithm::ends_with(dir, kFolderSuffixTmp) ||
boost::algorithm::ends_with(dir, kFolderSuffixOri);
}

bool is_data_dir_invalid(const std::string &dir)
{
return is_data_dir_removable(dir) || boost::algorithm::ends_with(dir, kFolderSuffixBak);
}

void move_to_err_path(const std::string &path, const std::string &log_prefix)
{
const std::string new_path = fmt::format("{}.{}{}", path, dsn_now_us(), kFolderSuffixErr);
Expand All @@ -150,5 +283,6 @@ void move_to_err_path(const std::string &path, const std::string &log_prefix)
new_path);
LOG_WARNING("{}: succeed to move directory from '{}' to '{}'", log_prefix, path, new_path);
}

} // namespace replication
} // namespace dsn
23 changes: 5 additions & 18 deletions src/replica/disk_cleaner.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,13 @@ struct disk_cleaning_report
extern error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &dir_nodes,
/*output*/ disk_cleaning_report &report);

inline bool is_data_dir_removable(const std::string &dir)
{
if (dir.length() < 4) {
return false;
}
const std::string folder_suffix = dir.substr(dir.length() - 4);
return (folder_suffix == kFolderSuffixErr || folder_suffix == kFolderSuffixGar ||
folder_suffix == kFolderSuffixTmp || folder_suffix == kFolderSuffixOri);
}
bool is_data_dir_removable(const std::string &dir);

// Note: ".bak" is invalid but not allow delete, because it can be backed by administrator.
inline bool is_data_dir_invalid(const std::string &dir)
{
if (dir.length() < 4) {
return false;
}
const std::string folder_suffix = dir.substr(dir.length() - 4);
return is_data_dir_removable(dir) || folder_suffix == kFolderSuffixBak;
}
// Note: ".bak" is invalid but not allowed to be deleted, because it could be did by
// administrator on purpose.
bool is_data_dir_invalid(const std::string &dir);

void move_to_err_path(const std::string &path, const std::string &log_prefix);

} // namespace replication
} // namespace dsn
2 changes: 1 addition & 1 deletion src/replica/replica_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1754,7 +1754,7 @@ void replica_stub::on_gc_replica(replica_stub_ptr this_, gpid id)
CHECK(
dsn::utils::filesystem::directory_exists(replica_path), "dir({}) not exist", replica_path);
LOG_INFO("start to move replica({}) as garbage, path: {}", id, replica_path);
const auto rename_path = fmt::format("{}.{}.gar", replica_path, dsn_now_us());
const auto rename_path = fmt::format("{}.{}{}", replica_path, dsn_now_us(), kFolderSuffixGar);
if (!dsn::utils::filesystem::rename_path(replica_path, rename_path)) {
LOG_WARNING("gc_replica: failed to move directory '{}' to '{}'", replica_path, rename_path);

Expand Down
Loading

0 comments on commit ee369d3

Please sign in to comment.