Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/remove allow all bucket statement #1106

Merged
merged 7 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions backend/dataall/modules/dataset_sharing/aws/s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,22 +95,10 @@ def generate_access_point_policy_template(
return policy

@staticmethod
def generate_default_bucket_policy(
s3_bucket_name: str,
owner_roleId: list,
allow_owner_sid: str,
):
def generate_default_bucket_policy(s3_bucket_name: str):
policy = {
'Version': '2012-10-17',
'Statement': [
{
'Sid': allow_owner_sid,
'Effect': 'Allow',
'Principal': '*',
'Action': 's3:*',
'Resource': [f'arn:aws:s3:::{s3_bucket_name}', f'arn:aws:s3:::{s3_bucket_name}/*'],
'Condition': {'StringLike': {'aws:userId': owner_roleId}},
},
{
'Effect': 'Deny',
'Principal': {'AWS': '*'},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,24 +259,6 @@ def revoke_share(cls, engine: Engine, share_uri: str):
env_group,
)
log.info(f'revoking folders succeeded = {revoked_folders_succeed}')
existing_shared_folders = ShareObjectRepository.check_existing_shared_items_of_type(
session, share_uri, ShareableType.StorageLocation.value
)

log.info(f'Still remaining S3 folder resources shared = {existing_shared_folders}')
if not existing_shared_folders and revoked_folders:
log.info('Clean up S3 access points...')
clean_up_folders = ProcessS3AccessPointShare.clean_up_share(
session,
dataset=dataset,
share=share,
folder=revoked_folders[0],
source_environment=source_environment,
target_environment=target_environment,
source_env_group=source_env_group,
env_group=env_group,
)
log.info(f'Clean up S3 successful = {clean_up_folders}')

log.info('Revoking permissions to S3 buckets')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def add_missing_resources_to_policy_statement(
def remove_resource_from_statement(self, target_resources: list, statement_sid: str, policy_document: dict):
policy_name = self.generate_policy_name()
index = self._get_statement_by_sid(policy_document, statement_sid)
log.info(f'Removing {target_resources} from Statement[{index}] in Managed policy {policy_name} ' f'skipping...')
log.info(f'Removing {target_resources} from Statement[{index}] in Managed policy {policy_name} ...')
if index is None:
log.info(f'{statement_sid} does NOT exists for Managed policy {policy_name} ' f'skipping...')
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@ def process_revoked_shares(self, *kwargs) -> bool:
def verify_shares(self, *kwargs) -> bool:
raise NotImplementedError

@abc.abstractmethod
def clean_up_share(self, *kwargs):
raise NotImplementedError

@staticmethod
def build_access_point_name(share):
S3AccessPointName = utils.slugify(
Expand Down Expand Up @@ -135,16 +131,6 @@ def manage_bucket_policy(self):
counter = count()
statements = {item.get('Sid', next(counter)): item for item in bucket_policy.get('Statement', {})}

if DATAALL_ALLOW_OWNER_SID not in statements.keys():
statements[DATAALL_ALLOW_OWNER_SID] = {
'Sid': DATAALL_ALLOW_OWNER_SID,
'Effect': 'Allow',
'Principal': '*',
'Action': 's3:*',
'Resource': [f'arn:aws:s3:::{self.bucket_name}', f'arn:aws:s3:::{self.bucket_name}/*'],
'Condition': {'StringLike': {'aws:userId': self.get_bucket_owner_roleid()}},
}

if DATAALL_DELEGATE_TO_ACCESS_POINT not in statements.keys():
statements[DATAALL_DELEGATE_TO_ACCESS_POINT] = {
'Sid': DATAALL_DELEGATE_TO_ACCESS_POINT,
Expand All @@ -171,27 +157,10 @@ def get_bucket_policy_or_default(self):
)
bucket_policy = json.loads(bucket_policy)
else:
logger.info(f'Bucket policy for {self.bucket_name} does not exist, generating default policy...')
exceptions_roleId = self.get_bucket_owner_roleid()
bucket_policy = S3ControlClient.generate_default_bucket_policy(
self.bucket_name, exceptions_roleId, DATAALL_ALLOW_OWNER_SID
)
logger.info('Bucket policy for {self.bucket_name} does not exist, generating default policy...')
bucket_policy = S3ControlClient.generate_default_bucket_policy(self.bucket_name)
return bucket_policy

def get_bucket_owner_roleid(self):
exceptions_roleId = [
f'{item}:*'
for item in SessionHelper.get_role_ids(
self.source_account_id,
[
self.dataset_admin,
self.source_env_admin,
SessionHelper.get_delegation_role_arn(self.source_account_id),
],
)
]
return exceptions_roleId

def check_target_role_access_policy(self) -> None:
"""
Checks if requester IAM role policy includes requested S3 bucket and access point
Expand Down Expand Up @@ -494,26 +463,6 @@ def manage_access_point_and_policy(self):
access_point_arn,
self.s3_prefix,
)
exceptions_roleId = [
f'{item}:*'
for item in SessionHelper.get_role_ids(
self.source_account_id,
[
self.dataset_admin,
self.source_env_admin,
SessionHelper.get_delegation_role_arn(self.source_account_id),
],
)
]
admin_statement = {
'Sid': DATAALL_ALLOW_OWNER_SID,
'Effect': 'Allow',
'Principal': '*',
'Action': 's3:*',
'Resource': f'{access_point_arn}',
'Condition': {'StringLike': {'aws:userId': exceptions_roleId}},
}
access_point_policy['Statement'].append(admin_statement)
s3_client.attach_access_point_policy(
access_point_name=self.access_point_name, policy=json.dumps(access_point_policy)
)
Expand Down Expand Up @@ -612,58 +561,57 @@ def update_dataset_bucket_key_policy(self):
}
kms_client.put_key_policy(kms_key_id, json.dumps(existing_policy))

def delete_access_point_policy(self):
logger.info(f'Deleting access point policy for access point {self.access_point_name}...')
def revoke_access_in_access_point_policy(self):
logger.info(f'Generating new access point policy for access point {self.access_point_name}...')
s3_client = S3ControlClient(self.source_account_id, self.source_environment.region)
access_point_policy = json.loads(s3_client.get_access_point_policy(self.access_point_name))
access_point_arn = s3_client.get_bucket_access_point_arn(self.access_point_name)
target_requester_id = SessionHelper.get_role_id(self.target_account_id, self.target_requester_IAMRoleName)
statements = {item['Sid']: item for item in access_point_policy['Statement']}
if f'{target_requester_id}0' in statements.keys():
prefix_list = statements[f'{target_requester_id}0']['Condition']['StringLike']['s3:prefix']
if isinstance(prefix_list, list) and f'{self.s3_prefix}/*' in prefix_list:
prefix_list.remove(f'{self.s3_prefix}/*')
statements[f'{target_requester_id}1']['Resource'].remove(
f'{access_point_arn}/object/{self.s3_prefix}/*'
)
access_point_policy['Statement'] = list(statements.values())
if f'{self.s3_prefix}/*' in prefix_list:
logger.info(f'Removing folder {self.s3_prefix} from access point policy...')
if isinstance(prefix_list, list):
prefix_list.remove(f'{self.s3_prefix}/*')
statements[f'{target_requester_id}1']['Resource'].remove(
f'{access_point_arn}/object/{self.s3_prefix}/*'
)
access_point_policy['Statement'] = list(statements.values())
elif isinstance(prefix_list, str):
prefix_list = []
else:
logger.info(f'Folder {self.s3_prefix} already removed from access point policy, skipping...')

if len(prefix_list) == 0:
logger.info('Removing empty statements from access point policy...')
access_point_policy['Statement'].remove(statements[f'{target_requester_id}0'])
access_point_policy['Statement'].remove(statements[f'{target_requester_id}1'])
# We need to handle DATAALL_ALLOW_OWNER_SID for backwards compatibility
if statements.get(DATAALL_ALLOW_OWNER_SID, None) is not None:
access_point_policy['Statement'].remove(statements[DATAALL_ALLOW_OWNER_SID])
return access_point_policy

def attach_new_access_point_policy(self, access_point_policy):
logger.info(f'Attaching access point policy {access_point_policy} for access point {self.access_point_name}...')
s3_client = S3ControlClient(self.source_account_id, self.source_environment.region)
s3_client.attach_access_point_policy(
access_point_name=self.access_point_name, policy=json.dumps(access_point_policy)
)

@staticmethod
def delete_access_point(
share: ShareObject,
dataset: Dataset,
):
access_point_name = S3AccessPointShareManager.build_access_point_name(share)
logger.info(f'Deleting access point {access_point_name}...')

s3_client = S3ControlClient(dataset.AwsAccountId, dataset.region)
access_point_policy = json.loads(s3_client.get_access_point_policy(access_point_name))
if len(access_point_policy['Statement']) <= 1:
# At least we have the 'AllowAllToAdmin' statement
s3_client.delete_bucket_access_point(access_point_name)
return True
else:
return False
def delete_access_point(self):
logger.info(f'Deleting access point {self.access_point_name}...')
s3_client = S3ControlClient(self.source_account_id, self.source_environment.region)
s3_client.delete_bucket_access_point(self.access_point_name)

@staticmethod
def delete_target_role_access_policy(
share: ShareObject,
dataset: Dataset,
target_environment: Environment,
):
def revoke_target_role_access_policy(self):
logger.info('Deleting target role IAM statements...')

share_policy_service = SharePolicyService(
role_name=share.principalIAMRoleName,
account=target_environment.AwsAccountId,
environmentUri=target_environment.environmentUri,
resource_prefix=target_environment.resourcePrefix,
environmentUri=self.target_environment.environmentUri,
account=self.target_environment.AwsAccountId,
role_name=self.target_requester_IAMRoleName,
resource_prefix=self.target_environment.resourcePrefix,
)

# Backwards compatibility
Expand All @@ -675,21 +623,20 @@ def delete_target_role_access_policy(
# End of backwards compatibility

share_resource_policy_name = share_policy_service.generate_policy_name()

version_id, policy_document = IAM.get_managed_policy_default_version(
target_environment.AwsAccountId, share_resource_policy_name
self.target_account_id, share_resource_policy_name
)

access_point_name = S3AccessPointShareManager.build_access_point_name(share)

key_alias = f'alias/{dataset.KmsAlias}'
kms_client = KmsClient(dataset.AwsAccountId, dataset.region)
key_alias = f'alias/{self.dataset.KmsAlias}'
kms_client = KmsClient(self.dataset_account_id, self.source_environment.region)
kms_key_id = kms_client.get_key_id(key_alias)

s3_target_resources = [
f'arn:aws:s3:::{dataset.S3BucketName}',
f'arn:aws:s3:::{dataset.S3BucketName}/*',
f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}',
f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*',
f'arn:aws:s3:::{self.bucket_name}',
f'arn:aws:s3:::{self.bucket_name}/*',
f'arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}',
f'arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}/*',
]

share_policy_service.remove_resource_from_statement(
Expand All @@ -698,14 +645,14 @@ def delete_target_role_access_policy(
policy_document=policy_document,
)
if kms_key_id:
kms_target_resources = [f'arn:aws:kms:{dataset.region}:{dataset.AwsAccountId}:key/{kms_key_id}']
kms_target_resources = [f'arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}']
share_policy_service.remove_resource_from_statement(
target_resources=kms_target_resources,
statement_sid=f'{IAM_S3_ACCESS_POINTS_STATEMENT_SID}KMS',
policy_document=policy_document,
)
IAM.update_managed_policy_default_version(
target_environment.AwsAccountId, share_resource_policy_name, version_id, json.dumps(policy_document)
self.target_account_id, share_resource_policy_name, version_id, json.dumps(policy_document)
)

def delete_dataset_bucket_key_policy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
logger = logging.getLogger(__name__)

DATAALL_READ_ONLY_SID = 'DataAll-Bucket-ReadOnly'
DATAALL_ALLOW_OWNER_SID = 'AllowAllToAdmin'
DATAALL_BUCKET_KMS_DECRYPT_SID = 'DataAll-Bucket-KMS-Decrypt'
DATAALL_KMS_PIVOT_ROLE_PERMISSIONS_SID = 'KMSPivotRolePermissions'

Expand Down Expand Up @@ -262,26 +261,9 @@ def get_bucket_policy_or_default(self):
bucket_policy = json.loads(bucket_policy)
else:
logger.info(f'Bucket policy for {self.bucket_name} does not exist, generating default policy...')
exceptions_roleId = self.get_bucket_owner_roleid()
bucket_policy = S3ControlClient.generate_default_bucket_policy(
self.bucket_name, exceptions_roleId, DATAALL_ALLOW_OWNER_SID
)
bucket_policy = S3ControlClient.generate_default_bucket_policy(self.bucket_name)
return bucket_policy

def get_bucket_owner_roleid(self):
exceptions_roleId = [
f'{item}:*'
for item in SessionHelper.get_role_ids(
self.source_account_id,
[
self.dataset_admin,
self.source_env_admin,
SessionHelper.get_delegation_role_arn(self.source_account_id),
],
)
]
return exceptions_roleId

def check_role_bucket_policy(self) -> None:
"""
This function checks if the bucket policy grants read only access to accepted share roles
Expand Down Expand Up @@ -335,11 +317,6 @@ def grant_role_bucket_policy(self):
self.bucket_name, target_requester_arn
)

if DATAALL_ALLOW_OWNER_SID not in statements.keys():
statements[DATAALL_ALLOW_OWNER_SID] = self.generate_owner_access_statement(
self.bucket_name, self.get_bucket_owner_roleid()
)

bucket_policy['Statement'] = list(statements.values())
s3_client = S3Client(self.source_account_id, self.source_environment.region)
s3_client.create_bucket_policy(self.bucket_name, json.dumps(bucket_policy))
Expand All @@ -354,18 +331,6 @@ def add_target_arn_to_statement_principal(self, statement, target_requester_arn)
statement['Principal']['AWS'] = principal_list
return statement

@staticmethod
def generate_owner_access_statement(s3_bucket_name, owner_roleId):
owner_policy_statement = {
'Sid': DATAALL_ALLOW_OWNER_SID,
'Effect': 'Allow',
'Principal': '*',
'Action': 's3:*',
'Resource': [f'arn:aws:s3:::{s3_bucket_name}', f'arn:aws:s3:::{s3_bucket_name}/*'],
'Condition': {'StringLike': {'aws:userId': owner_roleId}},
}
return owner_policy_statement

@staticmethod
def get_principal_list(statement):
principal_list = statement['Principal']['AWS']
Expand Down
Loading
Loading