Skip to content

Commit

Permalink
Fix/remove allow all bucket statement (#1106)
Browse files Browse the repository at this point in the history
### Feature or Bugfix
- Bugfix

### Detail
- Remove unnecessary AllowAllToAdmin statement in bucket policy and in
access point policies
- For the deletion of access points in existing shares I kept a
reference to `AllowAllToAdmin`. We could change this, I am not strong
opinionated.
- For existing bucket policies and access point policies
`AllowAllToAdmin` will be deleted when all folders are revoked. It is
unnecessary but it does not harm anyone to leave it as it is for
existing shares.

[UPDATE]
Since the testing uncovered issues derived from removing
`AllowAllToAdmin` with the current clean-up actions I took the issue
#1101 and included it in this PR. On top of the aforementioned changes,
this PR:
- Remove unnecessary `clean_up` functions from revoke_folders and move
the clean_up logic into the `process_revoked_folders` function (from the
`data_sharing_service.py`)
- Break `delete_access_point_policy` into smaller functions with less
responsibilities: `revoke_access_in_access_point_policy` that generates
the new access point policy; `attach_new_access_point_policy` that
attaches a given policy to an access point; `delete_access_point` that
deletes an access point.
- Logic on what should be deleted moved to
`backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py`.
Only if the resulting policy contains no statements the access point is
deleted, the IAM permissions revoked from the requester IAM role and the
KMS key policy revoked.
- Bonus point: fixed small bug on the clean-up (it was assuming
prefix_list is a list, but for a single folder shared it can be a
string)


### Relates
- #997 
- #1101 

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)?
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization?
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features?
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users?
  - Have you used the least-privilege principle? How?


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.
  • Loading branch information
dlpzx authored Mar 25, 2024
1 parent edb2253 commit 2ca856f
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 303 deletions.
14 changes: 1 addition & 13 deletions backend/dataall/modules/dataset_sharing/aws/s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,22 +95,10 @@ def generate_access_point_policy_template(
return policy

@staticmethod
def generate_default_bucket_policy(
s3_bucket_name: str,
owner_roleId: list,
allow_owner_sid: str,
):
def generate_default_bucket_policy(s3_bucket_name: str):
policy = {
'Version': '2012-10-17',
'Statement': [
{
'Sid': allow_owner_sid,
'Effect': 'Allow',
'Principal': '*',
'Action': 's3:*',
'Resource': [f'arn:aws:s3:::{s3_bucket_name}', f'arn:aws:s3:::{s3_bucket_name}/*'],
'Condition': {'StringLike': {'aws:userId': owner_roleId}},
},
{
'Effect': 'Deny',
'Principal': {'AWS': '*'},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,24 +259,6 @@ def revoke_share(cls, engine: Engine, share_uri: str):
env_group,
)
log.info(f'revoking folders succeeded = {revoked_folders_succeed}')
existing_shared_folders = ShareObjectRepository.check_existing_shared_items_of_type(
session, share_uri, ShareableType.StorageLocation.value
)

log.info(f'Still remaining S3 folder resources shared = {existing_shared_folders}')
if not existing_shared_folders and revoked_folders:
log.info('Clean up S3 access points...')
clean_up_folders = ProcessS3AccessPointShare.clean_up_share(
session,
dataset=dataset,
share=share,
folder=revoked_folders[0],
source_environment=source_environment,
target_environment=target_environment,
source_env_group=source_env_group,
env_group=env_group,
)
log.info(f'Clean up S3 successful = {clean_up_folders}')

log.info('Revoking permissions to S3 buckets')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def add_missing_resources_to_policy_statement(
def remove_resource_from_statement(self, target_resources: list, statement_sid: str, policy_document: dict):
policy_name = self.generate_policy_name()
index = self._get_statement_by_sid(policy_document, statement_sid)
log.info(f'Removing {target_resources} from Statement[{index}] in Managed policy {policy_name} ' f'skipping...')
log.info(f'Removing {target_resources} from Statement[{index}] in Managed policy {policy_name} ...')
if index is None:
log.info(f'{statement_sid} does NOT exists for Managed policy {policy_name} ' f'skipping...')
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@ def process_revoked_shares(self, *kwargs) -> bool:
def verify_shares(self, *kwargs) -> bool:
raise NotImplementedError

@abc.abstractmethod
def clean_up_share(self, *kwargs):
raise NotImplementedError

@staticmethod
def build_access_point_name(share):
S3AccessPointName = utils.slugify(
Expand Down Expand Up @@ -135,16 +131,6 @@ def manage_bucket_policy(self):
counter = count()
statements = {item.get('Sid', next(counter)): item for item in bucket_policy.get('Statement', {})}

if DATAALL_ALLOW_OWNER_SID not in statements.keys():
statements[DATAALL_ALLOW_OWNER_SID] = {
'Sid': DATAALL_ALLOW_OWNER_SID,
'Effect': 'Allow',
'Principal': '*',
'Action': 's3:*',
'Resource': [f'arn:aws:s3:::{self.bucket_name}', f'arn:aws:s3:::{self.bucket_name}/*'],
'Condition': {'StringLike': {'aws:userId': self.get_bucket_owner_roleid()}},
}

if DATAALL_DELEGATE_TO_ACCESS_POINT not in statements.keys():
statements[DATAALL_DELEGATE_TO_ACCESS_POINT] = {
'Sid': DATAALL_DELEGATE_TO_ACCESS_POINT,
Expand All @@ -171,27 +157,10 @@ def get_bucket_policy_or_default(self):
)
bucket_policy = json.loads(bucket_policy)
else:
logger.info(f'Bucket policy for {self.bucket_name} does not exist, generating default policy...')
exceptions_roleId = self.get_bucket_owner_roleid()
bucket_policy = S3ControlClient.generate_default_bucket_policy(
self.bucket_name, exceptions_roleId, DATAALL_ALLOW_OWNER_SID
)
logger.info('Bucket policy for {self.bucket_name} does not exist, generating default policy...')
bucket_policy = S3ControlClient.generate_default_bucket_policy(self.bucket_name)
return bucket_policy

def get_bucket_owner_roleid(self):
exceptions_roleId = [
f'{item}:*'
for item in SessionHelper.get_role_ids(
self.source_account_id,
[
self.dataset_admin,
self.source_env_admin,
SessionHelper.get_delegation_role_arn(self.source_account_id),
],
)
]
return exceptions_roleId

def check_target_role_access_policy(self) -> None:
"""
Checks if requester IAM role policy includes requested S3 bucket and access point
Expand Down Expand Up @@ -494,26 +463,6 @@ def manage_access_point_and_policy(self):
access_point_arn,
self.s3_prefix,
)
exceptions_roleId = [
f'{item}:*'
for item in SessionHelper.get_role_ids(
self.source_account_id,
[
self.dataset_admin,
self.source_env_admin,
SessionHelper.get_delegation_role_arn(self.source_account_id),
],
)
]
admin_statement = {
'Sid': DATAALL_ALLOW_OWNER_SID,
'Effect': 'Allow',
'Principal': '*',
'Action': 's3:*',
'Resource': f'{access_point_arn}',
'Condition': {'StringLike': {'aws:userId': exceptions_roleId}},
}
access_point_policy['Statement'].append(admin_statement)
s3_client.attach_access_point_policy(
access_point_name=self.access_point_name, policy=json.dumps(access_point_policy)
)
Expand Down Expand Up @@ -612,58 +561,57 @@ def update_dataset_bucket_key_policy(self):
}
kms_client.put_key_policy(kms_key_id, json.dumps(existing_policy))

def delete_access_point_policy(self):
logger.info(f'Deleting access point policy for access point {self.access_point_name}...')
def revoke_access_in_access_point_policy(self):
logger.info(f'Generating new access point policy for access point {self.access_point_name}...')
s3_client = S3ControlClient(self.source_account_id, self.source_environment.region)
access_point_policy = json.loads(s3_client.get_access_point_policy(self.access_point_name))
access_point_arn = s3_client.get_bucket_access_point_arn(self.access_point_name)
target_requester_id = SessionHelper.get_role_id(self.target_account_id, self.target_requester_IAMRoleName)
statements = {item['Sid']: item for item in access_point_policy['Statement']}
if f'{target_requester_id}0' in statements.keys():
prefix_list = statements[f'{target_requester_id}0']['Condition']['StringLike']['s3:prefix']
if isinstance(prefix_list, list) and f'{self.s3_prefix}/*' in prefix_list:
prefix_list.remove(f'{self.s3_prefix}/*')
statements[f'{target_requester_id}1']['Resource'].remove(
f'{access_point_arn}/object/{self.s3_prefix}/*'
)
access_point_policy['Statement'] = list(statements.values())
if f'{self.s3_prefix}/*' in prefix_list:
logger.info(f'Removing folder {self.s3_prefix} from access point policy...')
if isinstance(prefix_list, list):
prefix_list.remove(f'{self.s3_prefix}/*')
statements[f'{target_requester_id}1']['Resource'].remove(
f'{access_point_arn}/object/{self.s3_prefix}/*'
)
access_point_policy['Statement'] = list(statements.values())
elif isinstance(prefix_list, str):
prefix_list = []
else:
logger.info(f'Folder {self.s3_prefix} already removed from access point policy, skipping...')

if len(prefix_list) == 0:
logger.info('Removing empty statements from access point policy...')
access_point_policy['Statement'].remove(statements[f'{target_requester_id}0'])
access_point_policy['Statement'].remove(statements[f'{target_requester_id}1'])
# We need to handle DATAALL_ALLOW_OWNER_SID for backwards compatibility
if statements.get(DATAALL_ALLOW_OWNER_SID, None) is not None:
access_point_policy['Statement'].remove(statements[DATAALL_ALLOW_OWNER_SID])
return access_point_policy

def attach_new_access_point_policy(self, access_point_policy):
logger.info(f'Attaching access point policy {access_point_policy} for access point {self.access_point_name}...')
s3_client = S3ControlClient(self.source_account_id, self.source_environment.region)
s3_client.attach_access_point_policy(
access_point_name=self.access_point_name, policy=json.dumps(access_point_policy)
)

@staticmethod
def delete_access_point(
share: ShareObject,
dataset: Dataset,
):
access_point_name = S3AccessPointShareManager.build_access_point_name(share)
logger.info(f'Deleting access point {access_point_name}...')

s3_client = S3ControlClient(dataset.AwsAccountId, dataset.region)
access_point_policy = json.loads(s3_client.get_access_point_policy(access_point_name))
if len(access_point_policy['Statement']) <= 1:
# At least we have the 'AllowAllToAdmin' statement
s3_client.delete_bucket_access_point(access_point_name)
return True
else:
return False
def delete_access_point(self):
logger.info(f'Deleting access point {self.access_point_name}...')
s3_client = S3ControlClient(self.source_account_id, self.source_environment.region)
s3_client.delete_bucket_access_point(self.access_point_name)

@staticmethod
def delete_target_role_access_policy(
share: ShareObject,
dataset: Dataset,
target_environment: Environment,
):
def revoke_target_role_access_policy(self):
logger.info('Deleting target role IAM statements...')

share_policy_service = SharePolicyService(
role_name=share.principalIAMRoleName,
account=target_environment.AwsAccountId,
environmentUri=target_environment.environmentUri,
resource_prefix=target_environment.resourcePrefix,
environmentUri=self.target_environment.environmentUri,
account=self.target_environment.AwsAccountId,
role_name=self.target_requester_IAMRoleName,
resource_prefix=self.target_environment.resourcePrefix,
)

# Backwards compatibility
Expand All @@ -675,21 +623,20 @@ def delete_target_role_access_policy(
# End of backwards compatibility

share_resource_policy_name = share_policy_service.generate_policy_name()

version_id, policy_document = IAM.get_managed_policy_default_version(
target_environment.AwsAccountId, share_resource_policy_name
self.target_account_id, share_resource_policy_name
)

access_point_name = S3AccessPointShareManager.build_access_point_name(share)

key_alias = f'alias/{dataset.KmsAlias}'
kms_client = KmsClient(dataset.AwsAccountId, dataset.region)
key_alias = f'alias/{self.dataset.KmsAlias}'
kms_client = KmsClient(self.dataset_account_id, self.source_environment.region)
kms_key_id = kms_client.get_key_id(key_alias)

s3_target_resources = [
f'arn:aws:s3:::{dataset.S3BucketName}',
f'arn:aws:s3:::{dataset.S3BucketName}/*',
f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}',
f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*',
f'arn:aws:s3:::{self.bucket_name}',
f'arn:aws:s3:::{self.bucket_name}/*',
f'arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}',
f'arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}/*',
]

share_policy_service.remove_resource_from_statement(
Expand All @@ -698,14 +645,14 @@ def delete_target_role_access_policy(
policy_document=policy_document,
)
if kms_key_id:
kms_target_resources = [f'arn:aws:kms:{dataset.region}:{dataset.AwsAccountId}:key/{kms_key_id}']
kms_target_resources = [f'arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}']
share_policy_service.remove_resource_from_statement(
target_resources=kms_target_resources,
statement_sid=f'{IAM_S3_ACCESS_POINTS_STATEMENT_SID}KMS',
policy_document=policy_document,
)
IAM.update_managed_policy_default_version(
target_environment.AwsAccountId, share_resource_policy_name, version_id, json.dumps(policy_document)
self.target_account_id, share_resource_policy_name, version_id, json.dumps(policy_document)
)

def delete_dataset_bucket_key_policy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
logger = logging.getLogger(__name__)

DATAALL_READ_ONLY_SID = 'DataAll-Bucket-ReadOnly'
DATAALL_ALLOW_OWNER_SID = 'AllowAllToAdmin'
DATAALL_BUCKET_KMS_DECRYPT_SID = 'DataAll-Bucket-KMS-Decrypt'
DATAALL_KMS_PIVOT_ROLE_PERMISSIONS_SID = 'KMSPivotRolePermissions'

Expand Down Expand Up @@ -262,26 +261,9 @@ def get_bucket_policy_or_default(self):
bucket_policy = json.loads(bucket_policy)
else:
logger.info(f'Bucket policy for {self.bucket_name} does not exist, generating default policy...')
exceptions_roleId = self.get_bucket_owner_roleid()
bucket_policy = S3ControlClient.generate_default_bucket_policy(
self.bucket_name, exceptions_roleId, DATAALL_ALLOW_OWNER_SID
)
bucket_policy = S3ControlClient.generate_default_bucket_policy(self.bucket_name)
return bucket_policy

def get_bucket_owner_roleid(self):
exceptions_roleId = [
f'{item}:*'
for item in SessionHelper.get_role_ids(
self.source_account_id,
[
self.dataset_admin,
self.source_env_admin,
SessionHelper.get_delegation_role_arn(self.source_account_id),
],
)
]
return exceptions_roleId

def check_role_bucket_policy(self) -> None:
"""
This function checks if the bucket policy grants read only access to accepted share roles
Expand Down Expand Up @@ -335,11 +317,6 @@ def grant_role_bucket_policy(self):
self.bucket_name, target_requester_arn
)

if DATAALL_ALLOW_OWNER_SID not in statements.keys():
statements[DATAALL_ALLOW_OWNER_SID] = self.generate_owner_access_statement(
self.bucket_name, self.get_bucket_owner_roleid()
)

bucket_policy['Statement'] = list(statements.values())
s3_client = S3Client(self.source_account_id, self.source_environment.region)
s3_client.create_bucket_policy(self.bucket_name, json.dumps(bucket_policy))
Expand All @@ -354,18 +331,6 @@ def add_target_arn_to_statement_principal(self, statement, target_requester_arn)
statement['Principal']['AWS'] = principal_list
return statement

@staticmethod
def generate_owner_access_statement(s3_bucket_name, owner_roleId):
owner_policy_statement = {
'Sid': DATAALL_ALLOW_OWNER_SID,
'Effect': 'Allow',
'Principal': '*',
'Action': 's3:*',
'Resource': [f'arn:aws:s3:::{s3_bucket_name}', f'arn:aws:s3:::{s3_bucket_name}/*'],
'Condition': {'StringLike': {'aws:userId': owner_roleId}},
}
return owner_policy_statement

@staticmethod
def get_principal_list(statement):
principal_list = statement['Principal']['AWS']
Expand Down
Loading

0 comments on commit 2ca856f

Please sign in to comment.