Skip to content
This repository has been archived by the owner on Jan 3, 2024. It is now read-only.

Commit

Permalink
rgw/sfs: honor retry_raced_bucket_write mechanism
Browse files Browse the repository at this point in the history
Updating bucket's metadata concurrently by two or more threads is allowed in radosgw.
There is a retry mechanism: retry_raced_bucket_write(), that expects the bucket references to fetch the latest data from the persistent store.
rgw/sfs driver didn't implement try_refresh_info() in its bucket class definition; this could cause two references to the same bucket to potentially lead to partial metadata updates.

Fixes: https://github.com/aquarist-labs/s3gw/issues/637
Signed-off-by: Giuseppe Baccini <[email protected]>
  • Loading branch information
Giuseppe Baccini committed Oct 27, 2023
1 parent 4976c75 commit ce2bf84
Show file tree
Hide file tree
Showing 2 changed files with 262 additions and 2 deletions.
32 changes: 30 additions & 2 deletions src/rgw/driver/sfs/bucket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -534,11 +534,39 @@ int SFSBucket::abort_multiparts(
return sfs::SFSMultipartUploadV2::abort_multiparts(dpp, store, this);
}

/**
* @brief Refresh this bucket's rep with the state obtained from the store.
* Indeed it can happen that the rep of this bucket is obsolete due to
* concurrent threads updating metadata using their own SFSBucket instance.
*/
int SFSBucket::try_refresh_info(
const DoutPrefixProvider* dpp, ceph::real_time* /*pmtime*/
) {
ldpp_dout(dpp, 10) << __func__ << ": TODO" << dendl;
return -ENOTSUP;
auto bref = store->get_bucket_ref(get_name());
// maybe excessively safe approach: instead of directly assign the returned
// value to this.bucket let's check first if bref is valid.
if (!bref) {
lsfs_dout(dpp, 0) << fmt::format("no such bucket! {}", get_name()) << dendl;
return -ERR_NO_SUCH_BUCKET;
}
bucket = bref;

// update views like we do in the constructor

// info view
get_info() = bucket->get_info();

// attrs view
set_attrs(bucket->get_attrs());

// acls view
auto it = attrs.find(RGW_ATTR_ACL);
if (it != attrs.end()) {
auto lval = it->second.cbegin();
acls.decode(lval);
}

return 0;
}

int SFSBucket::read_usage(
Expand Down
232 changes: 232 additions & 0 deletions src/test/rgw/sfs/test_rgw_sfs_sfs_bucket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,49 @@
#include "rgw/driver/sfs/sqlite/sqlite_users.h"
#include "rgw/rgw_sal_sfs.h"

/*
These structs are in-memory mockable versions of actual structs/classes
that have a private rep.
Real types normally populate their rep via encode/decode methods.
For the sake of convenience, we define binary equivalent types with
public editable members.
*/
namespace mockable {
struct DefaultRetention {
std::string mode;
int days;
int years;

bool operator==(const DefaultRetention& other) const {
return this->mode == other.mode && this->days == other.days &&
this->years == other.years;
}
};

struct ObjectLockRule {
mockable::DefaultRetention defaultRetention;

bool operator==(const ObjectLockRule& other) const {
return this->defaultRetention == other.defaultRetention;
}
};

struct RGWObjectLock {
bool enabled;
bool rule_exist;
mockable::ObjectLockRule rule;

bool operator==(const RGWObjectLock& other) const {
return this->enabled == other.enabled &&
this->rule_exist == other.rule_exist && this->rule == other.rule;
}
};

mockable::RGWObjectLock& actual2mock(::RGWObjectLock& actual) {
return (mockable::RGWObjectLock&)actual;
}
} // namespace mockable

/*
HINT
s3gw.db will create here: /tmp/rgw_sfs_tests
Expand Down Expand Up @@ -1438,6 +1481,7 @@ TEST_F(TestSFSBucket, ListNamespaceMultipartsBasics) {
.path_uuid = uuid,
.meta_str = "metastr",
.mtime = now};

int id = multipart.insert(mpop);
ASSERT_GE(id, 0);

Expand All @@ -1453,3 +1497,191 @@ TEST_F(TestSFSBucket, ListNamespaceMultipartsBasics) {
EXPECT_EQ(results.objs[0].key.name, std::to_string(id));
EXPECT_EQ(results.objs[0].meta.mtime, now);
}

TEST_F(TestSFSBucket, RacedBucketMetadataWriteOperations) {
auto ceph_context = std::make_shared<CephContext>(CEPH_ENTITY_TYPE_CLIENT);
ceph_context->_conf.set_val("rgw_sfs_data_path", getTestDir());
ceph_context->_log->start();
auto store = new rgw::sal::SFStore(ceph_context.get(), getTestDir());

NoDoutPrefix ndp(ceph_context.get(), 1);
RGWEnv env;
env.init(ceph_context.get());
createUser("usr_id", store->db_conn);

rgw_user arg_user("", "usr_id", "");
auto user = store->get_user(arg_user);

rgw_bucket arg_bucket("t_id", "b_name", "");
rgw_placement_rule arg_pl_rule("default", "STANDARD");
std::string arg_swift_ver_location;
RGWQuotaInfo arg_quota_info;
RGWAccessControlPolicy arg_aclp_d = get_aclp_default();
rgw::sal::Attrs arg_attrs;

RGWBucketInfo arg_info = get_binfo();
obj_version arg_objv;
bool existed = false;
req_info arg_req_info(ceph_context.get(), &env);

std::unique_ptr<rgw::sal::Bucket> bucket_from_create;

EXPECT_EQ(
user->create_bucket(
&ndp, //dpp
arg_bucket, //b
"zg1", //zonegroup_id
arg_pl_rule, //placement_rule
arg_swift_ver_location, //swift_ver_location
&arg_quota_info, //pquota_info
arg_aclp_d, //policy
arg_attrs, //attrs
arg_info, //info
arg_objv, //ep_objv
false, //exclusive
false, //obj_lock_enabled
&existed, //existed
arg_req_info, //req_info
&bucket_from_create, //bucket
null_yield //optional_yield
),
0
);

std::unique_ptr<rgw::sal::Bucket> bucket_from_store_1;

EXPECT_EQ(
store->get_bucket(
&ndp, user.get(), arg_info.bucket, &bucket_from_store_1, null_yield
),
0
);

std::unique_ptr<rgw::sal::Bucket> bucket_from_store_2;

EXPECT_EQ(
store->get_bucket(
&ndp, user.get(), arg_info.bucket, &bucket_from_store_2, null_yield
),
0
);

EXPECT_EQ(*bucket_from_store_1, *bucket_from_store_2);

// merge_and_store_attrs

rgw::sal::Attrs new_attrs;
RGWAccessControlPolicy arg_aclp = get_aclp_1();
{
bufferlist acl_bl;
arg_aclp.encode(acl_bl);
new_attrs[RGW_ATTR_ACL] = acl_bl;
}

EXPECT_EQ(
bucket_from_store_1->merge_and_store_attrs(&ndp, new_attrs, null_yield), 0
);

// assert b1 contains the RGW_ATTR_ACL attribute
auto acl_bl_1 = bucket_from_store_1->get_attrs().find(RGW_ATTR_ACL);
EXPECT_NE(bucket_from_store_1->get_attrs().end(), acl_bl_1);

// assert b2 does not contain the RGW_ATTR_ACL attribute
auto acl_bl_2 = bucket_from_store_2->get_attrs().find(RGW_ATTR_ACL);
EXPECT_EQ(bucket_from_store_2->get_attrs().end(), acl_bl_2);

// put_info

RGWObjectLock obj_lock;
mockable::RGWObjectLock& ol = mockable::actual2mock(obj_lock);
ol.enabled = true;
ol.rule.defaultRetention.years = 12;
ol.rule.defaultRetention.days = 31;
ol.rule.defaultRetention.mode = "GOVERNANCE";
ol.rule_exist = true;

bucket_from_store_2->get_info().obj_lock = obj_lock;
EXPECT_EQ(bucket_from_store_2->put_info(&ndp, false, real_time()), 0);

auto& ol1 = mockable::actual2mock(bucket_from_store_1->get_info().obj_lock);
auto& ol2 = mockable::actual2mock(bucket_from_store_2->get_info().obj_lock);

// obj lock structure in memory cannot be equal at this point in memory for the
// two b1 and b2 objects; this simulates two threads updating metadata over the
// same bucket using their own bucket reference (as it happens actually when 2
// concurrent calls are issued from one or more S3 clients).
EXPECT_NE(ol1, ol2);

// Getting now a third object from the backing store should fetch an image equal to
// b2 since that object is the latest one that did a put_info().
// merge_and_store_attrs() done with b1 should now be lost due to b2.put_info().
std::unique_ptr<rgw::sal::Bucket> bucket_from_store_3;
EXPECT_EQ(
store->get_bucket(
&ndp, user.get(), arg_info.bucket, &bucket_from_store_3, null_yield
),
0
);

// ol2 and ol3 should be the same.
auto& ol3 = mockable::actual2mock(bucket_from_store_3->get_info().obj_lock);
EXPECT_EQ(ol2, ol3);

// We expect to have lost RGW_ATTR_ACL attribute in the backing store.
auto acl_bl_3 = bucket_from_store_3->get_attrs().find(RGW_ATTR_ACL);
EXPECT_EQ(bucket_from_store_3->get_attrs().end(), acl_bl_3);

// Now we repeat the updates interposing the try_refresh_info() on b2.
// try_refresh_info() refreshes b2's rep with the state obtained
// from the store.
EXPECT_EQ(
bucket_from_store_1->merge_and_store_attrs(&ndp, new_attrs, null_yield), 0
);
EXPECT_EQ(bucket_from_store_2->try_refresh_info(&ndp, nullptr), 0);
EXPECT_EQ(bucket_from_store_2->put_info(&ndp, false, real_time()), 0);

// let's refetch b3 from store.
EXPECT_EQ(
store->get_bucket(
&ndp, user.get(), arg_info.bucket, &bucket_from_store_3, null_yield
),
0
);

// Now all the views over b1, b2 and b3 should be the same, given that the
// underlying reps are (hopefully) the same.

// get_info() view
ol1 = mockable::actual2mock(bucket_from_store_1->get_info().obj_lock);
ol2 = mockable::actual2mock(bucket_from_store_2->get_info().obj_lock);
ol3 = mockable::actual2mock(bucket_from_store_3->get_info().obj_lock);
EXPECT_EQ(ol1, ol2);
EXPECT_EQ(ol2, ol3);

// get_attrs() view and acls views
acl_bl_1 = bucket_from_store_1->get_attrs().find(RGW_ATTR_ACL);
EXPECT_NE(bucket_from_store_1->get_attrs().end(), acl_bl_1);
acl_bl_2 = bucket_from_store_2->get_attrs().find(RGW_ATTR_ACL);
EXPECT_NE(bucket_from_store_2->get_attrs().end(), acl_bl_2);
acl_bl_3 = bucket_from_store_3->get_attrs().find(RGW_ATTR_ACL);
EXPECT_NE(bucket_from_store_3->get_attrs().end(), acl_bl_3);

{
RGWAccessControlPolicy aclp;
auto ci_lval = acl_bl_1->second.cbegin();
aclp.decode(ci_lval);
EXPECT_EQ(aclp, arg_aclp);
}
{
RGWAccessControlPolicy aclp;
auto ci_lval = acl_bl_2->second.cbegin();
aclp.decode(ci_lval);
EXPECT_EQ(aclp, arg_aclp);
}
{
RGWAccessControlPolicy aclp;
auto ci_lval = acl_bl_3->second.cbegin();
aclp.decode(ci_lval);
EXPECT_EQ(aclp, arg_aclp);
}
}

0 comments on commit ce2bf84

Please sign in to comment.