From a9bc1398f902830b92778ca2c07b39ab96d761bd Mon Sep 17 00:00:00 2001 From: Anushka Singh Date: Mon, 20 Nov 2023 02:27:38 -0500 Subject: [PATCH] Enabling S3 bucket share (#848) ### Feature or Bugfix - Feature ### Detail - We want to enable bucket sharing along with access point share which already exists in data all right now. - A user will be able to request shares at bucket level and at the folder level with access points. - Please NOTE: There is some common code between Access point share managers and processors and S3 Bucket managers and processors. We will send out a separate PR for that refactoring work at a later time. ### Relates - https://github.com/awslabs/aws-dataall/issues/284 - https://github.com/awslabs/aws-dataall/pull/823 - https://github.com/awslabs/aws-dataall/pull/846/files#diff-c1f522a1f50d8bcf7b6e5b2e586e40a8de784caa80345f4e05a6329ae2a372d0 ### Contributors: - Contents of this PR have been contributed by @anushka-singh, @blitzmohit, @rbernotas, @TejasRGitHub ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --------- Signed-off-by: dependabot[bot] Co-authored-by: Noah Paige <69586985+noah-paige@users.noreply.github.com> Co-authored-by: dlpzx <71252798+dlpzx@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: jaidisido Co-authored-by: dlpzx Co-authored-by: mourya-33 <134511711+mourya-33@users.noreply.github.com> Co-authored-by: nikpodsh <124577300+nikpodsh@users.noreply.github.com> Co-authored-by: MK Co-authored-by: Manjula Co-authored-by: Zilvinas Saltys Co-authored-by: Zilvinas Saltys Co-authored-by: Daniel Lorch <98748454+lorchda@users.noreply.github.com> Co-authored-by: Anushka Singh Co-authored-by: Tejas Rajopadhye <71188245+TejasRGitHub@users.noreply.github.com> Co-authored-by: trajopadhye --- .../modules/dataset_sharing/api/enums.py | 1 + .../modules/dataset_sharing/api/resolvers.py | 1 + .../modules/dataset_sharing/api/types.py | 1 + .../modules/dataset_sharing/aws/s3_client.py | 45 + .../modules/dataset_sharing/db/enums.py | 1 + .../db/share_object_repositories.py | 37 +- .../services/data_sharing_service.py | 87 +- .../services/dataset_alarm_service.py | 45 +- .../services/share_managers/__init__.py | 3 +- ...er.py => s3_access_point_share_manager.py} | 153 +- .../share_managers/s3_bucket_share_manager.py | 458 +++++ .../share_managers/share_manager_utils.py | 64 + ...re.py => s3_access_point_process_share.py} | 46 +- .../s3_bucket_process_share.py | 171 ++ .../db/dataset_bucket_repositories.py | 41 + .../datasets/services/dataset_service.py | 4 + .../datasets_base/db/dataset_models.py | 20 + .../8c79fb896983_add_table_for_buckets.py | 189 ++ .../Shares/components/AddShareItemModal.js | 3 +- .../components/RevokeShareItemsModal.js | 3 +- .../modules/Shares/services/getShareObject.js | 1 + .../src/modules/Shares/views/ShareView.js | 37 +- frontend/src/utils/index.js | 1 + frontend/src/utils/share.js | 10 + tests/modules/datasets/conftest.py | 5 +- tests/modules/datasets/tasks/conftest.py | 58 +- ... => test_s3_access_point_share_manager.py} | 128 +- .../tasks/test_s3_bucket_share_manager.py | 1606 +++++++++++++++++ tests/modules/datasets/test_share.py | 4 + 29 files changed, 3104 insertions(+), 119 deletions(-) rename backend/dataall/modules/dataset_sharing/services/share_managers/{s3_share_manager.py => s3_access_point_share_manager.py} (76%) create mode 100644 backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py create mode 100644 backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py rename backend/dataall/modules/dataset_sharing/services/share_processors/{s3_process_share.py => s3_access_point_process_share.py} (84%) create mode 100644 backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py create mode 100644 backend/dataall/modules/datasets/db/dataset_bucket_repositories.py create mode 100644 backend/migrations/versions/8c79fb896983_add_table_for_buckets.py create mode 100644 frontend/src/utils/share.js rename tests/modules/datasets/tasks/{test_s3_share_manager.py => test_s3_access_point_share_manager.py} (91%) create mode 100644 tests/modules/datasets/tasks/test_s3_bucket_share_manager.py diff --git a/backend/dataall/modules/dataset_sharing/api/enums.py b/backend/dataall/modules/dataset_sharing/api/enums.py index 37aa5022a..9fb593f18 100644 --- a/backend/dataall/modules/dataset_sharing/api/enums.py +++ b/backend/dataall/modules/dataset_sharing/api/enums.py @@ -5,6 +5,7 @@ class ShareableType(GraphQLEnumMapper): Table = 'DatasetTable' StorageLocation = 'DatasetStorageLocation' View = 'View' + S3Bucket = 'S3Bucket' class ShareObjectPermission(GraphQLEnumMapper): diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index ecb567ed9..d0ae3a568 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -191,6 +191,7 @@ def resolve_consumption_data(context: Context, source: ShareObject, **kwargs): return { 's3AccessPointName': S3AccessPointName, 'sharedGlueDatabase': (ds.GlueDatabaseName + '_shared_' + source.shareUri)[:254] if ds else 'Not created', + 's3bucketName': ds.S3BucketName, } diff --git a/backend/dataall/modules/dataset_sharing/api/types.py b/backend/dataall/modules/dataset_sharing/api/types.py index 6e41512be..b7e3b06bf 100644 --- a/backend/dataall/modules/dataset_sharing/api/types.py +++ b/backend/dataall/modules/dataset_sharing/api/types.py @@ -107,6 +107,7 @@ fields=[ gql.Field(name='s3AccessPointName', type=gql.String), gql.Field(name='sharedGlueDatabase', type=gql.String), + gql.Field(name='s3bucketName', type=gql.String), ], ) diff --git a/backend/dataall/modules/dataset_sharing/aws/s3_client.py b/backend/dataall/modules/dataset_sharing/aws/s3_client.py index 78b0296ce..bb1f23fc2 100755 --- a/backend/dataall/modules/dataset_sharing/aws/s3_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/s3_client.py @@ -121,6 +121,51 @@ def generate_access_point_policy_template( } return policy + @staticmethod + def generate_default_bucket_policy( + s3_bucket_name: str, + owner_roleId: list, + allow_owner_sid: str, + ): + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": allow_owner_sid, + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + }, + { + "Effect": "Deny", + "Principal": { + "AWS": "*" + }, + "Sid": "RequiredSecureTransport", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "Bool": { + "aws:SecureTransport": "false" + } + } + } + ] + } + return policy + class S3Client: def __init__(self, account_id, region): diff --git a/backend/dataall/modules/dataset_sharing/db/enums.py b/backend/dataall/modules/dataset_sharing/db/enums.py index 233991fad..7db0be34a 100644 --- a/backend/dataall/modules/dataset_sharing/db/enums.py +++ b/backend/dataall/modules/dataset_sharing/db/enums.py @@ -57,6 +57,7 @@ class ShareableType(Enum): Table = 'DatasetTable' StorageLocation = 'DatasetStorageLocation' View = 'View' + S3Bucket = 'S3Bucket' class PrincipalType(Enum): diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index 7a6d1a70b..469eb548c 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -12,7 +12,7 @@ ShareItemStatus, ShareableType, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset +from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket logger = logging.getLogger(__name__) @@ -356,6 +356,8 @@ def get_share_item(session, item_type, item_uri): return session.query(DatasetTable).get(item_uri) if item_type == ShareableType.StorageLocation.value: return session.query(DatasetStorageLocation).get(item_uri) + if item_type == ShareableType.S3Bucket.value: + return session.query(DatasetBucket).get(item_uri) @staticmethod def get_share_by_uri(session, uri): @@ -525,7 +527,33 @@ def list_shareable_items(session, share, states, data): if states: locations = locations.filter(ShareObjectItem.status.in_(states)) - shareable_objects = tables.union(locations).subquery('shareable_objects') + s3_buckets = ( + session.query( + DatasetBucket.bucketUri.label('itemUri'), + func.coalesce('S3Bucket').label('itemType'), + DatasetBucket.S3BucketName.label('itemName'), + DatasetBucket.description.label('description'), + ShareObjectItem.shareItemUri.label('shareItemUri'), + ShareObjectItem.status.label('status'), + case( + [(ShareObjectItem.shareItemUri.isnot(None), True)], + else_=False, + ).label('isShared'), + ) + .outerjoin( + ShareObjectItem, + and_( + ShareObjectItem.shareUri == share.shareUri, + DatasetBucket.bucketUri + == ShareObjectItem.itemUri, + ), + ) + .filter(DatasetBucket.datasetUri == share.datasetUri) + ) + if states: + s3_buckets = s3_buckets.filter(ShareObjectItem.status.in_(states)) + + shareable_objects = tables.union(locations, s3_buckets).subquery('shareable_objects') query = session.query(shareable_objects) if data: @@ -732,9 +760,14 @@ def get_share_data_items(session, share_uri, status): session, share, status, DatasetStorageLocation, DatasetStorageLocation.locationUri ) + s3_buckets = ShareObjectRepository._find_all_share_item( + session, share, status, DatasetBucket, DatasetBucket.bucketUri + ) + return ( tables, folders, + s3_buckets, ) @staticmethod diff --git a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py index 3e93d894a..7134eebb4 100644 --- a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py @@ -1,12 +1,18 @@ import logging -from dataall.modules.dataset_sharing.services.share_processors.lf_process_cross_account_share import ProcessLFCrossAccountShare -from dataall.modules.dataset_sharing.services.share_processors.lf_process_same_account_share import ProcessLFSameAccountShare -from dataall.modules.dataset_sharing.services.share_processors.s3_process_share import ProcessS3Share +from dataall.modules.dataset_sharing.services.share_processors.lf_process_cross_account_share import \ + ProcessLFCrossAccountShare +from dataall.modules.dataset_sharing.services.share_processors.lf_process_same_account_share import \ + ProcessLFSameAccountShare +from dataall.modules.dataset_sharing.services.share_processors.s3_access_point_process_share import \ + ProcessS3AccessPointShare +from dataall.modules.dataset_sharing.services.share_processors.s3_bucket_process_share import ProcessS3BucketShare from dataall.base.db import Engine -from dataall.modules.dataset_sharing.db.enums import ShareObjectActions, ShareItemStatus, ShareableType -from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectSM, ShareObjectRepository, ShareItemSM +from dataall.modules.dataset_sharing.db.enums import (ShareObjectActions, ShareItemStatus, ShareableType, + ShareItemActions) +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectSM, ShareObjectRepository, \ + ShareItemSM log = logging.getLogger(__name__) @@ -21,8 +27,9 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: 1) Updates share object State Machine with the Action: Start 2) Retrieves share data and items in Share_Approved state 3) Calls sharing folders processor to grant share - 4) Calls sharing tables processor for same or cross account sharing to grant share - 5) Updates share object State Machine with the Action: Finish + 4) Calls sharing buckets processor to grant share + 5) Calls sharing tables processor for same or cross account sharing to grant share + 6) Updates share object State Machine with the Action: Finish Parameters ---------- @@ -50,12 +57,13 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: ( shared_tables, - shared_folders + shared_folders, + shared_buckets ) = ShareObjectRepository.get_share_data_items(session, share_uri, ShareItemStatus.Share_Approved.value) log.info(f'Granting permissions to folders: {shared_folders}') - approved_folders_succeed = ProcessS3Share.process_approved_shares( + approved_folders_succeed = ProcessS3AccessPointShare.process_approved_shares( session, dataset, share, @@ -67,6 +75,20 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: ) log.info(f'sharing folders succeeded = {approved_folders_succeed}') + log.info('Granting permissions to S3 buckets') + + approved_s3_buckets_succeed = ProcessS3BucketShare.process_approved_shares( + session, + dataset, + share, + shared_buckets, + source_environment, + target_environment, + source_env_group, + env_group + ) + log.info(f'sharing s3 buckets succeeded = {approved_s3_buckets_succeed}') + if source_environment.AwsAccountId != target_environment.AwsAccountId: processor = ProcessLFCrossAccountShare( session, @@ -97,7 +119,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share, new_share_state) - return approved_tables_succeed if approved_folders_succeed else False + return approved_folders_succeed and approved_s3_buckets_succeed and approved_tables_succeed @classmethod def revoke_share(cls, engine: Engine, share_uri: str): @@ -108,7 +130,8 @@ def revoke_share(cls, engine: Engine, share_uri: str): 4) Checks if remaining folders are shared and effectuates clean up with folders processor 5) Calls sharing tables processor for same or cross account sharing to revoke share 6) Checks if remaining tables are shared and effectuates clean up with tables processor - 7) Updates share object State Machine with the Action: Finish + 7) Calls sharing buckets processor to revoke share + 8) Updates share object State Machine with the Action: Finish Parameters ---------- @@ -139,7 +162,8 @@ def revoke_share(cls, engine: Engine, share_uri: str): ( revoked_tables, - revoked_folders + revoked_folders, + revoked_buckets ) = ShareObjectRepository.get_share_data_items(session, share_uri, ShareItemStatus.Revoke_Approved.value) new_state = revoked_item_sm.run_transition(ShareObjectActions.Start.value) @@ -147,7 +171,7 @@ def revoke_share(cls, engine: Engine, share_uri: str): log.info(f'Revoking permissions to folders: {revoked_folders}') - revoked_folders_succeed = ProcessS3Share.process_revoked_shares( + revoked_folders_succeed = ProcessS3AccessPointShare.process_revoked_shares( session, dataset, share, @@ -158,21 +182,48 @@ def revoke_share(cls, engine: Engine, share_uri: str): env_group, ) log.info(f'revoking folders succeeded = {revoked_folders_succeed}') - existing_shared_items = ShareObjectRepository.check_existing_shared_items_of_type( + existing_shared_folders = ShareObjectRepository.check_existing_shared_items_of_type( session, share_uri, ShareableType.StorageLocation.value ) + existing_shared_buckets = ShareObjectRepository.check_existing_shared_items_of_type( + session, + share_uri, + ShareableType.S3Bucket.value + ) + existing_shared_items = existing_shared_folders or existing_shared_buckets log.info(f'Still remaining S3 resources shared = {existing_shared_items}') - if not existing_shared_items and revoked_folders: + if not existing_shared_folders and revoked_folders: log.info("Clean up S3 access points...") - clean_up_folders = ProcessS3Share.clean_up_share( + clean_up_folders = ProcessS3AccessPointShare.clean_up_share( + session, dataset=dataset, share=share, - target_environment=target_environment + folder=revoked_folders[0], + source_environment=source_environment, + target_environment=target_environment, + source_env_group=source_env_group, + env_group=env_group, + existing_shared_buckets=existing_shared_buckets ) log.info(f"Clean up S3 successful = {clean_up_folders}") + log.info('Revoking permissions to S3 buckets') + + revoked_s3_buckets_succeed = ProcessS3BucketShare.process_revoked_shares( + session, + dataset, + share, + revoked_buckets, + source_environment, + target_environment, + source_env_group, + env_group, + existing_shared_folders + ) + log.info(f'revoking s3 buckets succeeded = {revoked_s3_buckets_succeed}') + if source_environment.AwsAccountId != target_environment.AwsAccountId: processor = ProcessLFCrossAccountShare( session, @@ -217,4 +268,4 @@ def revoke_share(cls, engine: Engine, share_uri: str): new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share, new_share_state) - return revoked_tables_succeed and revoked_folders_succeed + return revoked_folders_succeed and revoked_s3_buckets_succeed and revoked_tables_succeed diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py index ae225f99f..d568dd4d8 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py @@ -3,7 +3,7 @@ from dataall.core.environment.db.environment_models import Environment from dataall.modules.dataset_sharing.db.share_object_models import ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation +from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation, DatasetBucket from dataall.base.utils.alarm_service import AlarmService log = logging.getLogger(__name__) @@ -147,5 +147,48 @@ def trigger_revoke_folder_sharing_failure_alarm( Share Target - AWS Account: {target_environment.AwsAccountId} - Region: {target_environment.region} +""" + return self.publish_message_to_alarms_topic(subject, message) + + def trigger_s3_bucket_sharing_failure_alarm( + self, + bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + ): + alarm_type = "Share" + return self.handle_bucket_sharing_failure(bucket, share, target_environment, alarm_type) + + def trigger_revoke_s3_bucket_sharing_failure_alarm( + self, + bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + ): + alarm_type = "Sharing Revoke" + return self.handle_bucket_sharing_failure(bucket, share, target_environment, alarm_type) + + def handle_bucket_sharing_failure(self, bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + alarm_type: str): + log.info(f'Triggering {alarm_type} failure alarm...') + subject = ( + f'ALARM: DATAALL S3 Bucket {bucket.S3BucketName} {alarm_type} Failure Notification' + ) + message = f""" +You are receiving this email because your DATAALL {self.envname} environment in the {self.region} region has entered the ALARM state, because it failed to {alarm_type} the S3 Bucket {bucket.S3BucketName}. +Alarm Details: + - State Change: OK -> ALARM + - Reason for State Change: S3 Bucket {alarm_type} failure + - Timestamp: {datetime.now()} + Share Source + - Dataset URI: {share.datasetUri} + - AWS Account: {bucket.AwsAccountId} + - Region: {bucket.region} + - S3 Bucket: {bucket.S3BucketName} + Share Target + - AWS Account: {target_environment.AwsAccountId} + - Region: {target_environment.region} """ return self.publish_message_to_alarms_topic(subject, message) diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py b/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py index f8c7a4347..df0af76bf 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py @@ -1,2 +1,3 @@ -from .s3_share_manager import S3ShareManager +from .s3_access_point_share_manager import S3AccessPointShareManager from .lf_share_manager import LFShareManager +from .s3_bucket_share_manager import S3BucketShareManager diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py similarity index 76% rename from backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py rename to backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py index 644dbe360..014d3fd1d 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py @@ -12,15 +12,18 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository +from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareManagerUtils from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset logger = logging.getLogger(__name__) ACCESS_POINT_CREATION_TIME = 30 ACCESS_POINT_CREATION_RETRIES = 5 +IAM_ACCESS_POINT_ROLE_POLICY = "targetDatasetAccessControlPolicy" +DATAALL_ALLOW_OWNER_SID = "AllowAllToAdmin" -class S3ShareManager: +class S3AccessPointShareManager: def __init__( self, session, @@ -91,14 +94,14 @@ def manage_bucket_policy(self): s3_client = S3Client(self.source_account_id, self.source_environment.region) bucket_policy = json.loads(s3_client.get_bucket_policy(self.bucket_name)) for statement in bucket_policy["Statement"]: - if statement.get("Sid") in ["AllowAllToAdmin", "DelegateAccessToAccessPoint"]: + if statement.get("Sid") in ["DelegateAccessToAccessPoint"]: return exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( self.source_account_id, [self.dataset_admin, self.source_env_admin, SessionHelper.get_delegation_role_arn(self.source_account_id)] )] allow_owner_access = { - "Sid": "AllowAllToAdmin", + "Sid": DATAALL_ALLOW_OWNER_SID, "Effect": "Allow", "Principal": "*", "Action": "s3:*", @@ -140,34 +143,62 @@ def grant_target_role_access_policy(self): logger.info( f'Grant target role {self.target_requester_IAMRoleName} access policy' ) + key_alias = f"alias/{self.dataset.KmsAlias}" + kms_client = KmsClient(self.dataset_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = IAM.get_role_policy( self.target_account_id, self.target_requester_IAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, ) if existing_policy: # type dict - if self.bucket_name not in ",".join(existing_policy["Statement"][0]["Resource"]): - logger.info( - f'targetDatasetAccessControlPolicy exists for IAM role {self.target_requester_IAMRoleName}, ' - f'but S3 Access point {self.access_point_name} is not included, updating...' - ) - target_resources = [ - f"arn:aws:s3:::{self.bucket_name}", - f"arn:aws:s3:::{self.bucket_name}/*", - f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}", - f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}/*" + s3_target_resources = [ + f"arn:aws:s3:::{self.bucket_name}", + f"arn:aws:s3:::{self.bucket_name}/*", + f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}", + f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}/*" + ] + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.add_missing_resources_to_policy_statement( + self.bucket_name, + s3_target_resources, + existing_policy["Statement"][0], + IAM_ACCESS_POINT_ROLE_POLICY + ) + if kms_key_id: + kms_target_resources = [ + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}" ] - existing_policy["Statement"][0]["Resource"].extend(target_resources) - policy = existing_policy - else: - logger.info( - f'targetDatasetAccessControlPolicy exists for IAM role {self.target_requester_IAMRoleName} ' - f'and S3 Access point {self.access_point_name} is included, skipping...' - ) - return + if len(existing_policy["Statement"]) > 1: + share_manager.add_missing_resources_to_policy_statement( + kms_key_id, + kms_target_resources, + existing_policy["Statement"][1], + IAM_ACCESS_POINT_ROLE_POLICY + ) + else: + additional_policy = { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": kms_target_resources + } + existing_policy["Statement"].append(additional_policy) + + policy = existing_policy else: logger.info( - f'targetDatasetAccessControlPolicy does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' + f'{IAM_ACCESS_POINT_ROLE_POLICY} does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' ) policy = { "Version": "2012-10-17", @@ -186,10 +217,22 @@ def grant_target_role_access_policy(self): } ] } + if kms_key_id: + additional_policy = { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}" + ] + } + policy["Statement"].append(additional_policy) + IAM.update_role_policy( self.target_account_id, self.target_requester_IAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, json.dumps(policy), ) @@ -260,7 +303,7 @@ def manage_access_point_and_policy(self): [self.dataset_admin, self.source_env_admin, SessionHelper.get_delegation_role_arn(self.source_account_id)] )] admin_statement = { - "Sid": "AllowAllToAdmin", + "Sid": DATAALL_ALLOW_OWNER_SID, "Effect": "Allow", "Principal": "*", "Action": "s3:*", @@ -333,7 +376,7 @@ def delete_access_point( share: ShareObject, dataset: Dataset, ): - access_point_name = S3ShareManager.build_access_point_name(share) + access_point_name = S3AccessPointShareManager.build_access_point_name(share) logger.info( f'Deleting access point {access_point_name}...' ) @@ -356,32 +399,54 @@ def delete_target_role_access_policy( logger.info( 'Deleting target role IAM policy...' ) - access_point_name = S3ShareManager.build_access_point_name(share) + access_point_name = S3AccessPointShareManager.build_access_point_name(share) existing_policy = IAM.get_role_policy( target_environment.AwsAccountId, share.principalIAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, ) + key_alias = f"alias/{dataset.KmsAlias}" + kms_client = KmsClient(dataset.AwsAccountId, dataset.region) + kms_key_id = kms_client.get_key_id(key_alias) if existing_policy: - if dataset.S3BucketName in ",".join(existing_policy["Statement"][0]["Resource"]): - target_resources = [ - f"arn:aws:s3:::{dataset.S3BucketName}", - f"arn:aws:s3:::{dataset.S3BucketName}/*", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + s3_target_resources = [ + f"arn:aws:s3:::{dataset.S3BucketName}", + f"arn:aws:s3:::{dataset.S3BucketName}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + ShareManagerUtils.remove_resource_from_statement( + existing_policy["Statement"][0], + s3_target_resources + ) + if kms_key_id: + kms_target_resources = [ + f"arn:aws:kms:{dataset.region}:{dataset.AwsAccountId}:key/{kms_key_id}" ] - for item in target_resources: - existing_policy["Statement"][0]["Resource"].remove(item) - if not existing_policy["Statement"][0]["Resource"]: - IAM.delete_role_policy(target_environment.AwsAccountId, share.principalIAMRoleName, "targetDatasetAccessControlPolicy") - else: - IAM.update_role_policy( - target_environment.AwsAccountId, - share.principalIAMRoleName, - "targetDatasetAccessControlPolicy", - json.dumps(existing_policy), + if len(existing_policy["Statement"]) > 1: + ShareManagerUtils.remove_resource_from_statement( + existing_policy["Statement"][1], + kms_target_resources ) + policy_statements = [] + for statement in existing_policy["Statement"]: + if len(statement["Resource"]) != 0: + policy_statements.append(statement) + + existing_policy["Statement"] = policy_statements + if len(existing_policy["Statement"]) == 0: + IAM.delete_role_policy(target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_ACCESS_POINT_ROLE_POLICY) + else: + IAM.update_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_ACCESS_POINT_ROLE_POLICY, + json.dumps(existing_policy), + ) + @staticmethod def delete_dataset_bucket_key_policy( share: ShareObject, diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py new file mode 100644 index 000000000..180b09d4a --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py @@ -0,0 +1,458 @@ +import abc +import json +import logging +from itertools import count + +from dataall.base.aws.iam import IAM +from dataall.base.aws.sts import SessionHelper +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.aws.kms_client import KmsClient +from dataall.modules.dataset_sharing.aws.s3_client import S3ControlClient, S3Client +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareManagerUtils +from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository + +logger = logging.getLogger(__name__) + +DATAALL_READ_ONLY_SID = "DataAll-Bucket-ReadOnly" +DATAALL_ALLOW_OWNER_SID = "AllowAllToAdmin" +IAM_S3BUCKET_ROLE_POLICY = "dataall-targetDatasetS3Bucket-AccessControlPolicy" + + +class S3BucketShareManager: + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + target_bucket: DatasetBucket, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + self.session = session + self.source_env_group = source_env_group + self.env_group = env_group + self.dataset = dataset + self.share = share + self.target_bucket = target_bucket + self.source_environment = source_environment + self.target_environment = target_environment + self.share_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + target_bucket.bucketUri, + ) + self.source_account_id = target_bucket.AwsAccountId + self.target_account_id = target_environment.AwsAccountId + self.source_env_admin = source_env_group.environmentIAMRoleArn + self.target_requester_IAMRoleName = share.principalIAMRoleName + self.bucket_name = target_bucket.S3BucketName + self.dataset_admin = dataset.IAMDatasetAdminRoleArn + self.bucket_region = target_bucket.region + + @abc.abstractmethod + def process_approved_shares(self, *kwargs) -> bool: + raise NotImplementedError + + @abc.abstractmethod + def process_revoked_shares(self, *kwargs) -> bool: + raise NotImplementedError + + def grant_s3_iam_access(self): + """ + Updates requester IAM role policy to include requested S3 bucket and kms key + :return: + """ + logger.info( + f'Grant target role {self.target_requester_IAMRoleName} access policy' + ) + existing_policy = IAM.get_role_policy( + self.target_account_id, + self.target_requester_IAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + ) + key_alias = f"alias/{self.target_bucket.KmsAlias}" + kms_client = KmsClient(self.source_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + + if existing_policy: # type dict + s3_target_resources = [ + f"arn:aws:s3:::{self.bucket_name}", + f"arn:aws:s3:::{self.bucket_name}/*" + ] + + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.add_missing_resources_to_policy_statement( + resource_type=self.bucket_name, + target_resources=s3_target_resources, + existing_policy_statement=existing_policy["Statement"][0], + iam_role_policy_name=IAM_S3BUCKET_ROLE_POLICY + ) + + if kms_key_id: + kms_target_resources = [ + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}" + ] + if len(existing_policy["Statement"]) > 1: + share_manager.add_missing_resources_to_policy_statement( + resource_type=kms_key_id, + target_resources=kms_target_resources, + existing_policy_statement=existing_policy["Statement"][1], + iam_role_policy_name=IAM_S3BUCKET_ROLE_POLICY + ) + else: + additional_policy = { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": kms_target_resources + } + existing_policy["Statement"].append(additional_policy) + + policy = existing_policy + else: + logger.info( + f'{IAM_S3BUCKET_ROLE_POLICY} does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' + ) + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{self.bucket_name}", + f"arn:aws:s3:::{self.bucket_name}/*" + ] + } + ] + } + if kms_key_id: + additional_policy = { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}" + ] + } + policy["Statement"].append(additional_policy) + + IAM.update_role_policy( + self.target_account_id, + self.target_requester_IAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + json.dumps(policy), + ) + + def get_bucket_policy_or_default(self): + """ + Fetches the existing bucket policy for the S3 bucket if one exists otherwise returns the default bucket policy + :return: + """ + s3_client = S3Client(self.source_account_id, self.source_environment.region) + bucket_policy = s3_client.get_bucket_policy(self.bucket_name) + if bucket_policy: + logger.info( + f'There is already an existing policy for bucket {self.bucket_name}, will be updating policy...' + ) + bucket_policy = json.loads(bucket_policy) + else: + logger.info( + f'Bucket policy for {self.bucket_name} does not exist, generating default policy...' + ) + exceptions_roleId = self.get_bucket_owner_roleid() + bucket_policy = S3ControlClient.generate_default_bucket_policy( + self.bucket_name, + exceptions_roleId, + DATAALL_ALLOW_OWNER_SID + ) + return bucket_policy + + def get_bucket_owner_roleid(self): + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + self.source_account_id, + [self.dataset_admin, self.source_env_admin, SessionHelper.get_delegation_role_arn(self.source_account_id)] + )] + return exceptions_roleId + + def grant_role_bucket_policy(self): + """ + This function will update bucket policy by granting admin access to dataset admin, pivot role + and environment admin along with read only access to accepted share roles. All the policies will only be added + once. + :return: + """ + logger.info( + f'Granting access via Bucket policy for {self.bucket_name}' + ) + try: + target_requester_arn = self.get_role_arn(self.target_account_id, self.target_requester_IAMRoleName) + bucket_policy = self.get_bucket_policy_or_default() + counter = count() + statements = {item.get("Sid", next(counter)): item for item in bucket_policy.get("Statement", {})} + if DATAALL_READ_ONLY_SID in statements.keys(): + logger.info(f'Bucket policy contains share statement {DATAALL_READ_ONLY_SID}, updating the current one') + statements[DATAALL_READ_ONLY_SID] = self.add_target_arn_to_statement_principal(statements[DATAALL_READ_ONLY_SID], target_requester_arn) + else: + logger.info(f'Bucket policy does not contain share statement {DATAALL_READ_ONLY_SID}, generating a new one') + statements[DATAALL_READ_ONLY_SID] = self.generate_default_bucket_read_policy_statement(self.bucket_name, target_requester_arn) + + if DATAALL_ALLOW_OWNER_SID not in statements.keys(): + statements[DATAALL_ALLOW_OWNER_SID] = self.generate_owner_access_statement(self.bucket_name, self.get_bucket_owner_roleid()) + + bucket_policy["Statement"] = list(statements.values()) + s3_client = S3Client(self.source_account_id, self.source_environment.region) + s3_client.create_bucket_policy(self.bucket_name, json.dumps(bucket_policy)) + except Exception as e: + logger.exception( + f'Failed during bucket policy management {e}' + ) + raise e + + def add_target_arn_to_statement_principal(self, statement, target_requester_arn): + principal_list = self.get_principal_list(statement) + if f"{target_requester_arn}" not in principal_list: + principal_list.append(f"{target_requester_arn}") + statement["Principal"]["AWS"] = principal_list + return statement + + @staticmethod + def generate_owner_access_statement(s3_bucket_name, owner_roleId): + owner_policy_statement = { + "Sid": DATAALL_ALLOW_OWNER_SID, + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + } + return owner_policy_statement + + @staticmethod + def get_principal_list(statement): + principal_list = statement["Principal"]["AWS"] + if isinstance(principal_list, str): + principal_list = [principal_list] + return principal_list + + def grant_dataset_bucket_key_policy(self): + if (self.target_bucket.imported and self.target_bucket.importedKmsKey) or not self.target_bucket.imported: + logger.info( + 'Updating dataset Bucket KMS key policy...' + ) + key_alias = f"alias/{self.target_bucket.KmsAlias}" + kms_client = KmsClient(self.source_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = kms_client.get_key_policy(kms_key_id) + target_requester_id = SessionHelper.get_role_id(self.target_account_id, self.target_requester_IAMRoleName) + if existing_policy and f'{target_requester_id}:*' not in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"].append( + { + "Sid": f"{target_requester_id}", + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": { + "StringLike": { + "aws:userId": f"{target_requester_id}:*" + } + } + } + ) + kms_client.put_key_policy( + kms_key_id, + json.dumps(policy) + ) + + def delete_target_role_bucket_policy(self): + logger.info( + f'Deleting target role from bucket policy for bucket {self.bucket_name}...' + ) + try: + s3_client = S3Client(self.source_account_id, self.source_environment.region) + bucket_policy = json.loads(s3_client.get_bucket_policy(self.bucket_name)) + target_requester_arn = self.get_role_arn(self.target_account_id, self.target_requester_IAMRoleName) + counter = count() + statements = {item.get("Sid", next(counter)): item for item in bucket_policy.get("Statement", {})} + if DATAALL_READ_ONLY_SID in statements.keys(): + principal_list = self.get_principal_list(statements[DATAALL_READ_ONLY_SID]) + if f"{target_requester_arn}" in principal_list: + principal_list.remove(f"{target_requester_arn}") + if len(principal_list) == 0: + statements.pop(DATAALL_READ_ONLY_SID) + else: + statements[DATAALL_READ_ONLY_SID]["Principal"]["AWS"] = principal_list + bucket_policy["Statement"] = list(statements.values()) + s3_client.create_bucket_policy(self.bucket_name, json.dumps(bucket_policy)) + except Exception as e: + logger.exception( + f'Failed during bucket policy management {e}' + ) + raise e + + def delete_target_role_access_policy( + self, + share: ShareObject, + target_bucket: DatasetBucket, + target_environment: Environment, + ): + logger.info( + 'Deleting target role IAM policy...' + ) + existing_policy = IAM.get_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + ) + key_alias = f"alias/{target_bucket.KmsAlias}" + kms_client = KmsClient(target_bucket.AwsAccountId, target_bucket.region) + kms_key_id = kms_client.get_key_id(key_alias) + if existing_policy: + s3_target_resources = [ + f"arn:aws:s3:::{target_bucket.S3BucketName}", + f"arn:aws:s3:::{target_bucket.S3BucketName}/*" + ] + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.remove_resource_from_statement(existing_policy["Statement"][0], s3_target_resources) + if kms_key_id: + kms_target_resources = [ + f"arn:aws:kms:{target_bucket.region}:{target_bucket.AwsAccountId}:key/{kms_key_id}" + ] + if len(existing_policy["Statement"]) > 1: + share_manager.remove_resource_from_statement(existing_policy["Statement"][1], kms_target_resources) + + policy_statements = [] + for statement in existing_policy["Statement"]: + if len(statement["Resource"]) != 0: + policy_statements.append(statement) + + existing_policy["Statement"] = policy_statements + if len(existing_policy["Statement"]) == 0: + IAM.delete_role_policy(target_environment.AwsAccountId, share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY) + else: + IAM.update_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + json.dumps(existing_policy), + ) + + @staticmethod + def delete_target_role_bucket_key_policy( + share: ShareObject, + target_bucket: DatasetBucket, + target_environment: Environment, + ): + if (target_bucket.imported and target_bucket.importedKmsKey) or not target_bucket.imported: + logger.info( + 'Deleting target role from dataset bucket KMS key policy...' + ) + key_alias = f"alias/{target_bucket.KmsAlias}" + kms_client = KmsClient(target_bucket.AwsAccountId, target_bucket.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = kms_client.get_key_policy(kms_key_id) + target_requester_id = SessionHelper.get_role_id(target_environment.AwsAccountId, share.principalIAMRoleName) + if existing_policy and f'{target_requester_id}:*' in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"] = [item for item in policy["Statement"] if item.get("Sid", None) != f"{target_requester_id}"] + kms_client.put_key_policy( + kms_key_id, + json.dumps(policy) + ) + + def handle_share_failure(self, error: Exception) -> bool: + """ + Handles share failure by raising an alarm to alarmsTopic + Returns + ------- + True if alarm published successfully + """ + logger.error( + f'Failed to share bucket {self.target_bucket.S3BucketName} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' + f'due to: {error}' + ) + DatasetAlarmService().trigger_s3_bucket_sharing_failure_alarm( + self.target_bucket, self.share, self.target_environment + ) + return True + + def handle_revoke_failure(self, error: Exception) -> bool: + """ + Handles share failure by raising an alarm to alarmsTopic + Returns + ------- + True if alarm published successfully + """ + logger.error( + f'Failed to revoke S3 permissions to bucket {self.bucket_name} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' + f'due to: {error}' + ) + DatasetAlarmService().trigger_revoke_folder_sharing_failure_alarm( + self.target_bucket, self.share, self.target_environment + ) + return True + + @staticmethod + def get_role_arn(target_account_id, target_requester_IAMRoleName): + return f"arn:aws:iam::{target_account_id}:role/{target_requester_IAMRoleName}" + + @staticmethod + def generate_default_bucket_read_policy_statement(s3_bucket_name, target_requester_arn): + return { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"{target_requester_arn}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ] + } diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py b/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py new file mode 100644 index 000000000..305d8c5e7 --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py @@ -0,0 +1,64 @@ +import abc +import logging + +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.datasets_base.db.dataset_models import Dataset + + +logger = logging.getLogger(__name__) + + +class ShareManagerUtils: + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + self.target_requester_IAMRoleName = share.principalIAMRoleName + self.session = session + self.dataset = dataset + self.share = share + self.source_environment = source_environment + self.target_environment = target_environment + self.source_env_group = source_env_group + self.env_group = env_group + + def add_missing_resources_to_policy_statement( + self, + resource_type, + target_resources, + existing_policy_statement, + iam_role_policy_name + ): + """ + Checks if the resources are in the existing policy. Otherwise, it will add it. + :param resource_type: str + :param target_resources: list + :param existing_policy_statement: dict + :param iam_role_policy_name: str + :return + """ + for target_resource in target_resources: + if target_resource not in existing_policy_statement["Resource"]: + logger.info( + f'{iam_role_policy_name} exists for IAM role {self.target_requester_IAMRoleName}, ' + f'but {resource_type} is not included, updating...' + ) + existing_policy_statement["Resource"].extend([target_resource]) + else: + logger.info( + f'{iam_role_policy_name} exists for IAM role {self.target_requester_IAMRoleName} ' + f'and {resource_type} is included, skipping...' + ) + + @staticmethod + def remove_resource_from_statement(policy_statement, target_resources): + for target_resource in target_resources: + if target_resource in policy_statement["Resource"]: + policy_statement["Resource"].remove(target_resource) diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py similarity index 84% rename from backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py rename to backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py index 8e2f6cf38..a1c934ef9 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py @@ -1,7 +1,7 @@ import logging from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup -from ..share_managers import S3ShareManager +from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset from dataall.modules.dataset_sharing.db.enums import ShareItemStatus, ShareObjectActions, ShareItemActions from dataall.modules.dataset_sharing.db.share_object_models import ShareObject @@ -10,7 +10,7 @@ log = logging.getLogger(__name__) -class ProcessS3Share(S3ShareManager): +class ProcessS3AccessPointShare(S3AccessPointShareManager): def __init__( self, session, @@ -21,6 +21,8 @@ def __init__( target_environment: Environment, source_env_group: EnvironmentGroup, env_group: EnvironmentGroup, + existing_shared_buckets: bool = False + ): super().__init__( @@ -111,7 +113,7 @@ def process_revoked_shares( source_environment: Environment, target_environment: Environment, source_env_group: EnvironmentGroup, - env_group: EnvironmentGroup + env_group: EnvironmentGroup, ) -> bool: """ 1) update_share_item_status with Start action @@ -164,11 +166,18 @@ def process_revoked_shares( return success - @staticmethod + @classmethod def clean_up_share( + cls, + session, dataset: Dataset, share: ShareObject, - target_environment: Environment + folder: DatasetStorageLocation, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + existing_shared_buckets: bool = False ): """ 1) deletes S3 access point for this share in this Dataset S3 Bucket @@ -179,21 +188,32 @@ def clean_up_share( ------- True if share is cleaned-up successfully """ - - clean_up = S3ShareManager.delete_access_point( + clean_up_folder = cls( + session, + dataset, + share, + folder, + source_environment, + target_environment, + source_env_group, + env_group, + ) + clean_up = clean_up_folder.delete_access_point( share=share, dataset=dataset ) + if clean_up: - S3ShareManager.delete_target_role_access_policy( - share=share, - dataset=dataset, - target_environment=target_environment - ) - S3ShareManager.delete_dataset_bucket_key_policy( + clean_up_folder.delete_target_role_access_policy( share=share, dataset=dataset, target_environment=target_environment ) + if not existing_shared_buckets: + clean_up_folder.delete_dataset_bucket_key_policy( + share=share, + dataset=dataset, + target_environment=target_environment + ) return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py new file mode 100644 index 000000000..57e8e069f --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py @@ -0,0 +1,171 @@ +import logging + +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.dataset_sharing.db.enums import ShareItemStatus, ShareObjectActions, ShareItemActions +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository, ShareItemSM + + +log = logging.getLogger(__name__) + + +class ProcessS3BucketShare(S3BucketShareManager): + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + s3_bucket: DatasetBucket, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + + super().__init__( + session, + dataset, + share, + s3_bucket, + source_environment, + target_environment, + source_env_group, + env_group, + ) + + @classmethod + def process_approved_shares( + cls, + session, + dataset: Dataset, + share: ShareObject, + shared_buckets: [DatasetBucket], + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup + ) -> bool: + """ + 1) update_share_item_status with Start action + 2) manage_bucket_policy - grants permission in the bucket policy + 3) grant_target_role_access_policy == done + 4) update_dataset_bucket_key_policy == done + 5) update_share_item_status with Finish action == done + + Returns + ------- + True if share is granted successfully + """ + log.info( + '##### Starting S3 bucket share #######' + ) + success = True + for shared_bucket in shared_buckets: + sharing_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + shared_bucket.bucketUri, + ) + shared_item_SM = ShareItemSM(ShareItemStatus.Share_Approved.value) + new_state = shared_item_SM.run_transition(ShareObjectActions.Start.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + + sharing_bucket = cls( + session, + dataset, + share, + shared_bucket, + source_environment, + target_environment, + source_env_group, + env_group + ) + try: + sharing_bucket.grant_role_bucket_policy() + sharing_bucket.grant_s3_iam_access() + sharing_bucket.grant_dataset_bucket_key_policy() + new_state = shared_item_SM.run_transition(ShareItemActions.Success.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + + except Exception as e: + sharing_bucket.handle_share_failure(e) + new_state = shared_item_SM.run_transition(ShareItemActions.Failure.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + success = False + return success + + @classmethod + def process_revoked_shares( + cls, + session, + dataset: Dataset, + share: ShareObject, + revoked_buckets: [DatasetBucket], + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + existing_shared_folders: bool = False + ) -> bool: + """ + 1) update_share_item_status with Start action + 2) remove access from bucket policy + 3) remove access from key policy + 4) remove access from IAM role policy + 5) update_share_item_status with Finish action + + Returns + ------- + True if share is revoked successfully + False if revoke fails + """ + + log.info( + '##### Starting Revoking S3 bucket share #######' + ) + success = True + for revoked_bucket in revoked_buckets: + removing_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + revoked_bucket.bucketUri, + ) + + revoked_item_SM = ShareItemSM(ShareItemStatus.Revoke_Approved.value) + new_state = revoked_item_SM.run_transition(ShareObjectActions.Start.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + removing_bucket = cls( + session, + dataset, + share, + revoked_bucket, + source_environment, + target_environment, + source_env_group, + env_group + ) + try: + removing_bucket.delete_target_role_bucket_policy() + removing_bucket.delete_target_role_access_policy( + share=share, + target_bucket=revoked_bucket, + target_environment=target_environment + ) + if not existing_shared_folders: + removing_bucket.delete_target_role_bucket_key_policy( + share=share, + target_bucket=revoked_bucket, + target_environment=target_environment + ) + new_state = revoked_item_SM.run_transition(ShareItemActions.Success.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + + except Exception as e: + removing_bucket.handle_revoke_failure(e) + new_state = revoked_item_SM.run_transition(ShareItemActions.Failure.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + success = False + + return success diff --git a/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py b/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py new file mode 100644 index 000000000..31cfa2cdd --- /dev/null +++ b/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py @@ -0,0 +1,41 @@ +import logging + +from dataall.modules.datasets_base.db.dataset_models import DatasetBucket, Dataset + +logger = logging.getLogger(__name__) + + +class DatasetBucketRepository: + + @staticmethod + def create_dataset_bucket( + session, + dataset: Dataset, + data: dict = None + ) -> DatasetBucket: + bucket = DatasetBucket( + datasetUri=dataset.datasetUri, + label=data.get('label'), + description=data.get('description', 'No description provided'), + tags=data.get('tags', []), + S3BucketName=dataset.S3BucketName, + AwsAccountId=dataset.AwsAccountId, + owner=dataset.owner, + region=dataset.region, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(bucket) + session.commit() + return bucket + + @staticmethod + def delete_dataset_buckets(session, dataset_uri) -> bool: + buckets = ( + session.query(DatasetBucket) + .filter(DatasetBucket.datasetUri == dataset_uri) + .all() + ) + for bucket in buckets: + session.delete(bucket) diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 707e9fb91..5a02d2e9b 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -16,6 +16,7 @@ from dataall.core.stacks.db.stack_repositories import Stack from dataall.core.tasks.db.task_models import Task from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository +from dataall.modules.datasets.db.dataset_bucket_repositories import DatasetBucketRepository from dataall.modules.vote.db.vote_repositories import VoteRepository from dataall.base.db.exceptions import AWSResourceNotFound, UnauthorizedOperation from dataall.modules.dataset_sharing.db.share_object_models import ShareObject @@ -92,6 +93,8 @@ def create_dataset(uri, admin_group, data: dict): data=data, ) + DatasetBucketRepository.create_dataset_bucket(session, dataset, data) + ResourcePolicy.attach_resource_policy( session=session, group=data['SamlAdminGroupName'], @@ -380,6 +383,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): DatasetService.delete_dataset_term_links(session, uri) DatasetTableRepository.delete_dataset_tables(session, dataset.datasetUri) DatasetLocationRepository.delete_dataset_locations(session, dataset.datasetUri) + DatasetBucketRepository.delete_dataset_buckets(session, dataset.datasetUri) KeyValueTag.delete_key_value_tags(session, dataset.datasetUri, 'dataset') VoteRepository.delete_votes(session, dataset.datasetUri, 'dataset') diff --git a/backend/dataall/modules/datasets_base/db/dataset_models.py b/backend/dataall/modules/datasets_base/db/dataset_models.py index a5fcf1260..dd12746ad 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_models.py +++ b/backend/dataall/modules/datasets_base/db/dataset_models.py @@ -141,3 +141,23 @@ class Dataset(Resource, Base): @classmethod def uri(cls): return cls.datasetUri + + +class DatasetBucket(Resource, Base): + __tablename__ = 'dataset_bucket' + datasetUri = Column(String, nullable=False) + bucketUri = Column(String, primary_key=True, default=utils.uuid('bucket')) + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + region = Column(String, default='eu-west-1') + partition = Column(String, default='aws') + KmsAlias = Column(String, nullable=False) + imported = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + userRoleForStorageBucket = query_expression() + projectPermission = query_expression() + environmentEndPoint = query_expression() + + @classmethod + def uri(cls): + return cls.bucketUri diff --git a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py new file mode 100644 index 000000000..46eed84c2 --- /dev/null +++ b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py @@ -0,0 +1,189 @@ +"""add table for buckets + +Revision ID: 8c79fb896983 +Revises: 5781fdf1f877 +Create Date: 2023-09-06 12:01:53.841149 + +""" +import os +from sqlalchemy import orm, Column, String, Boolean, ForeignKey, DateTime, and_, inspect +from sqlalchemy.orm import query_expression +from sqlalchemy.ext.declarative import declarative_base +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +from dataall.base.db import get_engine, has_table +from dataall.base.db import utils, Resource +from dataall.modules.dataset_sharing.db.enums import ShareObjectStatus, ShareableType +from datetime import datetime + +# revision identifiers, used by Alembic. +revision = '8c79fb896983' +down_revision = '4f3c1d84a628' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Dataset(Resource, Base): + __tablename__ = 'dataset' + environmentUri = Column(String, ForeignKey("environment.environmentUri"), nullable=False) + organizationUri = Column(String, nullable=False) + datasetUri = Column(String, primary_key=True, default=utils.uuid('dataset')) + region = Column(String, default='eu-west-1') + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + GlueDatabaseName = Column(String, nullable=False) + GlueCrawlerName = Column(String) + GlueCrawlerSchedule = Column(String) + GlueProfilingJobName = Column(String) + GlueProfilingTriggerSchedule = Column(String) + GlueProfilingTriggerName = Column(String) + GlueDataQualityJobName = Column(String) + GlueDataQualitySchedule = Column(String) + GlueDataQualityTriggerName = Column(String) + IAMDatasetAdminRoleArn = Column(String, nullable=False) + IAMDatasetAdminUserArn = Column(String, nullable=False) + KmsAlias = Column(String, nullable=False) + userRoleForDataset = query_expression() + userRoleInEnvironment = query_expression() + isPublishedInEnvironment = query_expression() + projectPermission = query_expression() + language = Column(String, nullable=False, default='English') + topics = Column(postgresql.ARRAY(String), nullable=True) + confidentiality = Column(String, nullable=False, default='Unclassified') + tags = Column(postgresql.ARRAY(String)) + inProject = query_expression() + + bucketCreated = Column(Boolean, default=False) + glueDatabaseCreated = Column(Boolean, default=False) + iamAdminRoleCreated = Column(Boolean, default=False) + iamAdminUserCreated = Column(Boolean, default=False) + kmsAliasCreated = Column(Boolean, default=False) + lakeformationLocationCreated = Column(Boolean, default=False) + bucketPolicyCreated = Column(Boolean, default=False) + + # bookmarked = Column(Integer, default=0) + # upvotes=Column(Integer, default=0) + + businessOwnerEmail = Column(String, nullable=True) + businessOwnerDelegationEmails = Column(postgresql.ARRAY(String), nullable=True) + stewards = Column(String, nullable=True) + + SamlAdminGroupName = Column(String, nullable=True) + + importedS3Bucket = Column(Boolean, default=False) + importedGlueDatabase = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + importedAdminRole = Column(Boolean, default=False) + imported = Column(Boolean, default=False) + + +class DatasetBucket(Resource, Base): + __tablename__ = 'dataset_bucket' + datasetUri = Column(String, nullable=False) + bucketUri = Column(String, primary_key=True, default=utils.uuid('bucket')) + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + region = Column(String, default='eu-west-1') + partition = Column(String, default='aws') + KmsAlias = Column(String, nullable=False) + imported = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + userRoleForStorageBucket = query_expression() + projectPermission = query_expression() + environmentEndPoint = query_expression() + + @classmethod + def uri(cls): + return cls.bucketUri + + +class ShareObjectItem(Base): + __tablename__ = 'share_object_item' + shareUri = Column(String, nullable=False) + shareItemUri = Column( + String, default=utils.uuid('shareitem'), nullable=False, primary_key=True + ) + itemType = Column(String, nullable=False) + itemUri = Column(String, nullable=False) + itemName = Column(String, nullable=False) + permission = Column(String, nullable=True) + created = Column(DateTime, nullable=False, default=datetime.now) + updated = Column(DateTime, nullable=True, onupdate=datetime.now) + deleted = Column(DateTime, nullable=True) + owner = Column(String, nullable=False) + GlueDatabaseName = Column(String, nullable=True) + GlueTableName = Column(String, nullable=True) + S3AccessPointName = Column(String, nullable=True) + status = Column(String, nullable=False, default=ShareObjectStatus.Draft.value) + action = Column(String, nullable=True) + + +def upgrade(): + try: + envname = os.getenv('envname', 'local') + print('ENVNAME', envname) + engine = get_engine(envname=envname).engine + bind = op.get_bind() + session = orm.Session(bind=bind) + datasets: [Dataset] = session.query(Dataset).all() + if not has_table('dataset_bucket', engine): + op.create_table( + 'dataset_bucket', + sa.Column('bucketUri', sa.String(), nullable=False), + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('KmsAlias', sa.String(), nullable=False), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.Column('importedKmsKey', sa.Boolean(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('partition', sa.String(), nullable=False, default='aws'), + sa.ForeignKeyConstraint(columns=['datasetUri'], refcolumns=['dataset.datasetUri']), + sa.PrimaryKeyConstraint('bucketUri'), + ) + print('Creating a new dataset_bucket row for each existing dataset...') + for dataset in datasets: + dataset_bucket = DatasetBucket( + name=dataset.S3BucketName, + datasetUri=dataset.datasetUri, + AwsAccountId=dataset.AwsAccountId, + S3BucketName=dataset.S3BucketName, + region=dataset.region, + label=dataset.label, + description=dataset.label, + tags=dataset.tags, + owner=dataset.owner, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(dataset_bucket) + session.flush() # flush to get the bucketUri + session.commit() + + except Exception as exception: + print('Failed to upgrade due to:', exception) + raise exception + + +def column_exists(table_name, column_name): + bind = op.get_context().bind + insp = inspect(bind) + columns = insp.get_columns(table_name) + return any(c["name"] == column_name for c in columns) + + +def downgrade(): + op.drop_table('dataset_bucket') diff --git a/frontend/src/modules/Shares/components/AddShareItemModal.js b/frontend/src/modules/Shares/components/AddShareItemModal.js index a21b55d13..1270209e7 100644 --- a/frontend/src/modules/Shares/components/AddShareItemModal.js +++ b/frontend/src/modules/Shares/components/AddShareItemModal.js @@ -20,6 +20,7 @@ import { Defaults, Pager, Scrollbar } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; import { useClient } from 'services'; import { addSharedItem, getShareObject } from '../services'; +import { generateShareItemLabel } from 'utils'; export const AddShareItemModal = (props) => { const client = useClient(); @@ -144,7 +145,7 @@ export const AddShareItemModal = (props) => { sharedItems.nodes.map((item) => ( - {item.itemType === 'Table' ? 'Table' : 'Folder'} + {generateShareItemLabel(item.itemType)} {item.itemName} diff --git a/frontend/src/modules/Shares/components/RevokeShareItemsModal.js b/frontend/src/modules/Shares/components/RevokeShareItemsModal.js index 2aa066df8..895824dba 100644 --- a/frontend/src/modules/Shares/components/RevokeShareItemsModal.js +++ b/frontend/src/modules/Shares/components/RevokeShareItemsModal.js @@ -10,6 +10,7 @@ import { Defaults } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; import { useClient } from 'services'; import { getShareObject, revokeItemsShareObject } from '../services'; +import { generateShareItemLabel } from 'utils'; export const RevokeShareItemsModal = (props) => { const client = useClient(); @@ -40,7 +41,7 @@ export const RevokeShareItemsModal = (props) => { response.data.getShareObject.items.nodes.map((item) => ({ id: item.shareItemUri, name: item.itemName, - type: item.itemType === 'StorageLocation' ? 'Folder' : 'Table', + type: generateShareItemLabel(item.itemType), status: item.status })) ); diff --git a/frontend/src/modules/Shares/services/getShareObject.js b/frontend/src/modules/Shares/services/getShareObject.js index eebf5cdfd..3075c3e85 100644 --- a/frontend/src/modules/Shares/services/getShareObject.js +++ b/frontend/src/modules/Shares/services/getShareObject.js @@ -18,6 +18,7 @@ export const getShareObject = ({ shareUri, filter }) => ({ consumptionData { s3AccessPointName sharedGlueDatabase + s3bucketName } principal { principalId diff --git a/frontend/src/modules/Shares/views/ShareView.js b/frontend/src/modules/Shares/views/ShareView.js index 0ac51a9ec..1b06cd83a 100644 --- a/frontend/src/modules/Shares/views/ShareView.js +++ b/frontend/src/modules/Shares/views/ShareView.js @@ -65,6 +65,7 @@ import { UpdateRejectReason, UpdateRequestReason } from '../components'; +import { generateShareItemLabel } from 'utils'; function ShareViewHeader(props) { const { @@ -360,7 +361,7 @@ function SharedItem(props) { return ( - {item.itemType === 'Table' ? 'Table' : 'Folder'} + {generateShareItemLabel(item.itemType)} {item.itemName} @@ -796,6 +797,40 @@ const ShareView = () => { + + S3 Bucket name (Bucket sharing): + + + {` ${share.consumptionData.s3bucketName || '-'}`} + + + copyNotification()} + text={`aws s3 ls s3://${share.consumptionData.s3bucketName}`} + > + + + + + {`aws s3 ls s3://${share.consumptionData.s3bucketName}`} + + + { + switch (itemType) { + case 'Table': + return 'Table'; + case 'S3Bucket': + return 'S3Bucket'; + case 'StorageLocation': + return 'Folder'; + } +}; diff --git a/tests/modules/datasets/conftest.py b/tests/modules/datasets/conftest.py index 6e911bafa..733b2aa0c 100644 --- a/tests/modules/datasets/conftest.py +++ b/tests/modules/datasets/conftest.py @@ -6,13 +6,12 @@ from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.core.organizations.db.organization_models import Organization from dataall.core.permissions.db.resource_policy_repositories import ResourcePolicy -from dataall.core.stacks.db.stack_models import Stack from dataall.modules.dataset_sharing.db.enums import ShareableType, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_REQUESTER, SHARE_OBJECT_APPROVER from dataall.modules.datasets.api.dataset.enums import ConfidentialityClassification from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation, DatasetBucket @pytest.fixture(scope='module', autouse=True) @@ -268,7 +267,7 @@ def dataset_model(db): def factory( organization: Organization, environment: Environment, - label: str, + label: str ) -> Dataset: with db.scoped_session() as session: dataset = Dataset( diff --git a/tests/modules/datasets/tasks/conftest.py b/tests/modules/datasets/tasks/conftest.py index 43f888fe6..7503660fc 100644 --- a/tests/modules/datasets/tasks/conftest.py +++ b/tests/modules/datasets/tasks/conftest.py @@ -1,11 +1,10 @@ import pytest -from dataall.core.cognito_groups.db.cognito_group_models import Group from dataall.core.organizations.db.organization_models import Organization from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.modules.dataset_sharing.db.enums import ShareableType, ShareItemStatus, ShareObjectStatus, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset +from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket @pytest.fixture(scope="module") @@ -14,6 +13,7 @@ def factory( organization: Organization, environment: Environment, label: str, + imported: bool = False ) -> Dataset: with db.scoped_session() as session: dataset = Dataset( @@ -31,6 +31,8 @@ def factory( region=environment.region, IAMDatasetAdminUserArn=f"arn:aws:iam::{environment.AwsAccountId}:user/dataset", IAMDatasetAdminRoleArn=f"arn:aws:iam::{environment.AwsAccountId}:role/dataset", + imported=imported, + importedKmsKey=imported ) session.add(dataset) session.commit() @@ -83,6 +85,35 @@ def factory(dataset: Dataset, label: str) -> DatasetTable: yield factory +@pytest.fixture(scope='module', autouse=True) +def bucket(db): + cache = {} + + def factory(dataset: Dataset, name) -> DatasetBucket: + key = f'{dataset.datasetUri}-{name}' + if cache.get(key): + return cache.get(key) + with db.scoped_session() as session: + bucket = DatasetBucket( + name=name, + label=name, + owner=dataset.owner, + datasetUri=dataset.datasetUri, + region=dataset.region, + AwsAccountId=dataset.AwsAccountId, + S3BucketName=dataset.S3BucketName, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(bucket) + session.commit() + + return bucket + + yield factory + + @pytest.fixture(scope="module") def share(db): def factory( @@ -99,6 +130,7 @@ def factory( principalType=PrincipalType.Group.value, principalIAMRoleName=env_group.environmentIAMRoleName, status=ShareObjectStatus.Approved.value, + groupUri=env_group.groupUri, ) session.add(share) session.commit() @@ -150,3 +182,25 @@ def factory( return share_item yield factory + + +@pytest.fixture(scope="module") +def share_item_bucket(db): + def factory( + share: ShareObject, + bucket: DatasetBucket, + ) -> ShareObjectItem: + with db.scoped_session() as session: + share_item = ShareObjectItem( + shareUri=share.shareUri, + owner="alice", + itemUri=bucket.bucketUri, + itemType=ShareableType.StorageLocation.value, + itemName=bucket.name, + status=ShareItemStatus.Share_Approved.value, + ) + session.add(share_item) + session.commit() + return share_item + + yield factory diff --git a/tests/modules/datasets/tasks/test_s3_share_manager.py b/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py similarity index 91% rename from tests/modules/datasets/tasks/test_s3_share_manager.py rename to tests/modules/datasets/tasks/test_s3_access_point_share_manager.py index febea47f9..90bb932fa 100644 --- a/tests/modules/datasets/tasks/test_s3_share_manager.py +++ b/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py @@ -11,7 +11,7 @@ from dataall.modules.dataset_sharing.aws.s3_client import S3ControlClient from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem -from dataall.modules.dataset_sharing.services.share_managers import S3ShareManager +from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset SOURCE_ENV_ACCOUNT = "111111111111" @@ -127,7 +127,7 @@ def admin_ap_delegation_bucket_policy(): "Resource": "arn:aws:s3:::dataall-iris-test-120922-4s47wv71", }, { - "Sid": "AllowAllToAdmin", + "Sid": "DelegateAccessToAccessPoint", "Effect": "Allow", "Principal": "*", "Action": "s3:*", @@ -143,7 +143,7 @@ def admin_ap_delegation_bucket_policy(): def mock_s3_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.S3Client', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.S3Client', mock_client ) mock_client.create_bucket_policy.return_value = None @@ -153,7 +153,7 @@ def mock_s3_client(mocker): def mock_s3_control_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.S3ControlClient', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.S3ControlClient', mock_client ) @@ -170,7 +170,7 @@ def mock_s3_control_client(mocker): def mock_kms_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.KmsClient', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.KmsClient', mock_client ) mock_client.put_key_policy.return_value = None @@ -192,6 +192,15 @@ def target_dataset_access_control_policy(request): f"arn:aws:s3:datasetregion:{request.param[1]}:accesspoint/{request.param[2]}", f"arn:aws:s3:datasetregion:{request.param[1]}:accesspoint/{request.param[2]}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112" + ] } ], } @@ -229,7 +238,7 @@ def test_manage_bucket_policy_no_policy( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -250,7 +259,7 @@ def test_manage_bucket_policy_no_policy( # Then print(f"Bucket policy generated {created_bucket_policy}") - sid_list = [statement.get("Sid") for statement in + sid_list = [statement.get("Sid") for statement in created_bucket_policy["Statement"] if statement.get("Sid")] assert "AllowAllToAdmin" in sid_list @@ -278,7 +287,7 @@ def test_manage_bucket_policy_existing_policy( s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -296,7 +305,7 @@ def test_manage_bucket_policy_existing_policy( s3_client.create_bucket_policy.assert_not_called() -@pytest.mark.parametrize("target_dataset_access_control_policy", +@pytest.mark.parametrize("target_dataset_access_control_policy", ([("bucketname", "aws_account_id", "access_point_name")]), indirect=True) def test_grant_target_role_access_policy_existing_policy_bucket_not_included( @@ -326,8 +335,11 @@ def test_grant_target_role_access_policy_existing_policy_bucket_not_included( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -349,6 +361,9 @@ def test_grant_target_role_access_policy_existing_policy_bucket_not_included( # Assert that bucket_name is inside the resource array of policy object assert location1.S3BucketName in ",".join(policy_object["Statement"][0]["Resource"]) + assert f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key" in \ + iam_policy["Statement"][1]["Resource"] \ + and "kms:*" in iam_policy["Statement"][1]["Action"] @pytest.mark.parametrize("target_dataset_access_control_policy", ([("dataset1", SOURCE_ENV_ACCOUNT, "test")]), indirect=True) @@ -379,8 +394,11 @@ def test_grant_target_role_access_policy_existing_policy_bucket_included( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -395,7 +413,7 @@ def test_grant_target_role_access_policy_existing_policy_bucket_included( manager.grant_target_role_access_policy() # Then - iam_update_role_policy_mock.assert_not_called() + iam_update_role_policy_mock.assert_called() def test_grant_target_role_access_policy_test_no_policy( @@ -434,12 +452,24 @@ def test_grant_target_role_access_policy_test_no_policy( f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{share_item_folder1.S3AccessPointName}", f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{share_item_folder1.S3AccessPointName}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key" + ] } ], } + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -455,7 +485,7 @@ def test_grant_target_role_access_policy_test_no_policy( # Then iam_update_role_policy_mock.assert_called_with( - target_environment.AwsAccountId, share1.principalIAMRoleName, + target_environment.AwsAccountId, share1.principalIAMRoleName, "targetDatasetAccessControlPolicy", json.dumps(expected_policy) ) @@ -498,7 +528,7 @@ def test_update_dataset_bucket_key_policy_with_env_admin( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -614,7 +644,7 @@ def test_update_dataset_bucket_key_policy_without_env_admin( } with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -672,7 +702,7 @@ def test_manage_access_point_and_policy_1( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -740,7 +770,7 @@ def test_manage_access_point_and_policy_2( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -805,7 +835,7 @@ def test_manage_access_point_and_policy_3( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -869,7 +899,7 @@ def test_delete_access_point_policy_with_env_admin_one_prefix( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -928,7 +958,7 @@ def test_delete_access_point_policy_with_env_admin_multiple_prefix( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -974,7 +1004,7 @@ def test_dont_delete_access_point_with_policy( s3_control_client().get_access_point_policy.return_value = json.dumps(existing_ap_policy) # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1014,7 +1044,7 @@ def test_delete_access_point_without_policy( # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1055,9 +1085,18 @@ def test_delete_target_role_access_policy_no_remaining_statement( "Resource": [ f"arn:aws:s3:::{location1.S3BucketName}", f"arn:aws:s3:::{location1.S3BucketName}/*", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}/*", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key" + ] } ], } @@ -1077,9 +1116,12 @@ def test_delete_target_role_access_policy_no_remaining_statement( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1122,9 +1164,19 @@ def test_delete_target_role_access_policy_with_remaining_statement( "arn:aws:s3:::UNRELATED_BUCKET_ARN", f"arn:aws:s3:::{location1.S3BucketName}", f"arn:aws:s3:::{location1.S3BucketName}/*", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}/*", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}/*", + ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key" + ] } ], } @@ -1136,6 +1188,15 @@ def test_delete_target_role_access_policy_with_remaining_statement( "Effect": "Allow", "Action": ["s3:*"], "Resource": ["arn:aws:s3:::UNRELATED_BUCKET_ARN"], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112" + ] } ], } @@ -1155,9 +1216,12 @@ def test_delete_target_role_access_policy_with_remaining_statement( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1245,7 +1309,7 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_additional_target ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1312,7 +1376,7 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_no_additional_tar ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1331,4 +1395,4 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_no_additional_tar kms_client().put_key_policy.assert_called_with( kms_client().get_key_id.return_value, json.dumps(remaining_policy) - ) + ) \ No newline at end of file diff --git a/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py b/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py new file mode 100644 index 000000000..39a3edb51 --- /dev/null +++ b/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py @@ -0,0 +1,1606 @@ +import pytest +import json +from unittest.mock import MagicMock + +from typing import Callable + +from dataall.core.cognito_groups.db.cognito_group_models import Group +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.core.organizations.db.organization_models import Organization +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket + +SOURCE_ENV_ACCOUNT = "111111111111" +SOURCE_ENV_ROLE_NAME = "dataall-ProducerEnvironment-i6v1v1c2" + +TARGET_ACCOUNT_ENV = "222222222222" +TARGET_ACCOUNT_ENV_ROLE_NAME = "dataall-ConsumersEnvironment-r71ucp4m" + +DATAALL_READ_ONLY_SID = "DataAll-Bucket-ReadOnly" +DATAALL_ALLOW_ALL_ADMINS_SID = "AllowAllToAdmin" + + +@pytest.fixture(scope="module") +def source_environment(env: Callable, org_fixture: Organization, group: Group): + source_environment = env( + org=org_fixture, + account=SOURCE_ENV_ACCOUNT, + envname="source_environment", + owner=group.owner, + group=group.name, + role=SOURCE_ENV_ROLE_NAME, + ) + yield source_environment + + +@pytest.fixture(scope="module") +def source_environment_group(environment_group: Callable, source_environment: Environment, group: Group): + source_environment_group = environment_group(source_environment, group.name) + yield source_environment_group + + +@pytest.fixture(scope="module") +def target_environment(env: Callable, org_fixture: Organization, group2: Group): + target_environment = env( + org=org_fixture, + account=TARGET_ACCOUNT_ENV, + envname="target_environment", + owner=group2.owner, + group=group2.name, + role=TARGET_ACCOUNT_ENV_ROLE_NAME, + ) + yield target_environment + + +@pytest.fixture(scope="module") +def target_environment_group(environment_group: Callable, target_environment: Environment, group2: Group): + target_environment_group = environment_group(target_environment, group2.name) + yield target_environment_group + + +@pytest.fixture(scope="module") +def dataset_imported(create_dataset: Callable, org_fixture: Organization, source_environment: Environment): + dataset_imported = create_dataset(org_fixture, source_environment, "dataset_imported", True) + yield dataset_imported + + +@pytest.fixture(scope="module") +def dataset2(create_dataset: Callable, org_fixture: Organization, source_environment: Organization): + dataset2 = create_dataset(org_fixture, source_environment, "dataset2") + yield dataset2 + + +@pytest.fixture(scope="module") +def bucket2(bucket: Callable, dataset2: Dataset) -> DatasetBucket: + yield bucket(dataset2, "bucket2") + + +@pytest.fixture(scope="module") +def bucket3(bucket: Callable, dataset_imported: Dataset) -> DatasetBucket: + yield bucket(dataset_imported, "bucket3") + + +@pytest.fixture(scope="module") +def share2(share: Callable, dataset2: Dataset, + target_environment: Environment, + target_environment_group: EnvironmentGroup) -> ShareObject: + share2 = share(dataset2, target_environment, target_environment_group) + yield share2 + + +@pytest.fixture(scope="module") +def share3(share: Callable, dataset_imported: Dataset, + target_environment: Environment, + target_environment_group: EnvironmentGroup) -> ShareObject: + share3 = share(dataset_imported, target_environment, target_environment_group) + yield share3 + + +@pytest.fixture(scope="function") +def base_bucket_policy(dataset2): + bucket_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Principal": {"AWS": "*"}, + "Action": "s3:*", + "Resource": [f"arn:aws:s3:::{dataset2.S3BucketName}", f"arn:aws:s3:::{dataset2.S3BucketName}/*"], + "Condition": {"Bool": {"aws:SecureTransport": "false"}}, + } + ], + } + return bucket_policy + + +def base_kms_key_policy(target_environment_samlGrpName: str): + kms_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": f"{target_environment_samlGrpName}", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": f"{target_environment_samlGrpName}:*"}}, + } + ], + } + return kms_policy + + +def complete_access_bucket_policy(target_requester_arn, s3_bucket_name, owner_roleId): + bucket_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Principal": { + "AWS": "*" + }, + "Sid": "RequiredSecureTransport", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "Bool": { + "aws:SecureTransport": "false" + } + } + }, + { + "Sid": f"{DATAALL_ALLOW_ALL_ADMINS_SID}", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + }, + { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"{target_requester_arn}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ] + } + ] + } + + return bucket_policy + + +def mock_s3_client(mocker): + mock_client = MagicMock() + mocker.patch( + 'dataall.modules.dataset_sharing.services.share_managers.s3_bucket_share_manager.S3Client', + mock_client + ) + mock_client.create_bucket_policy.return_value = None + return mock_client + + +def mock_kms_client(mocker): + mock_client = MagicMock() + mocker.patch( + 'dataall.modules.dataset_sharing.services.share_managers.s3_bucket_share_manager.KmsClient', + mock_client + ) + mock_client.put_key_policy.return_value = None + return mock_client + + +# For below test cases, dataset2, share2, src, target env and src group , env group remain the same +def test_grant_role_bucket_policy_with_no_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + bucket2, + db, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # No Bucket policy. A Default bucket policy should be formed with DataAll-Bucket-ReadOnly, AllowAllToAdmin & RequiredSecureTransport Sids + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = None + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + # Check all the Sids are present + # Check that the S3 bucket resources are also present + assert f"{DATAALL_ALLOW_ALL_ADMINS_SID}" in modified_bucket_policy["Statement"][0]["Sid"] + assert modified_bucket_policy["Statement"][0]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert modified_bucket_policy["Statement"][0]["Condition"]["StringLike"]["aws:userId"] == ['1:*', '2:*', '3:*'] + assert "RequiredSecureTransport" in modified_bucket_policy["Statement"][1]["Sid"] + assert modified_bucket_policy["Statement"][1]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert f"{DATAALL_READ_ONLY_SID}" in modified_bucket_policy["Statement"][2]["Sid"] + assert modified_bucket_policy["Statement"][2]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert modified_bucket_policy["Statement"][2]["Principal"]["AWS"] == [ + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}"] + + +def test_grant_role_bucket_policy_with_default_complete_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # Bucket Policy containing required "AllowAllToAdmin" and "DataAll-Bucket-ReadOnly" Sid's + # Bucket Policy shouldn't be modified after calling "grant_role_bucket_policy" function + + target_arn = f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + + bucket_policy = complete_access_bucket_policy(target_arn, + dataset2.S3BucketName, "ABNCSJ81982393") + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + created_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if nothing is removed from the policy and is the policy remains the same + for policy in created_bucket_policy["Statement"]: + assert policy["Sid"] in json.dumps(bucket_policy) + + +def test_grant_role_bucket_policy_with_policy_and_no_allow_owner_sid_and_no_read_only_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # base bucket policy + # Check if both "AllowAllToAdmin" and "DataAll-Bucket-ReadOnly" Sid's Statements are added to the policy + + bucket_policy = base_bucket_policy + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # AllowToAdmin, DataAll-Bucket-ReadOnly Sid's should be attached now + for policy in modified_bucket_policy["Statement"]: + if "Sid" in policy: + assert policy["Sid"] in [f"{DATAALL_ALLOW_ALL_ADMINS_SID}", f"{DATAALL_READ_ONLY_SID}"] + + +def test_grant_role_bucket_policy_with_another_read_only_role( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given base bucket policy with "DataAll-Bucket-ReadOnly" + bucket_policy = base_bucket_policy + + target_arn = f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + + # Append a policy for read only role + bucket_policy["Statement"].append( + { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + "SomeTargetResourceArn" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::someS3Bucket", + f"arn:aws:s3:::someS3Bucket/*" + ] + }) + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # AllowToAdmin Sid should be attached now. Also DataAll-Bucket-ReadOnly Sid should be present + for policy in modified_bucket_policy["Statement"]: + if "Sid" in policy: + assert policy["Sid"] in [f"{DATAALL_ALLOW_ALL_ADMINS_SID}", f"{DATAALL_READ_ONLY_SID}"] + + # Check if the principal was appended and not overridden into the DataAll-Bucket-ReadOnly + assert len(modified_bucket_policy["Statement"][1]["Principal"]["AWS"]) == 2 + assert modified_bucket_policy["Statement"][1]["Principal"]["AWS"][0] == "SomeTargetResourceArn" + + +def test_grant_s3_iam_access_with_no_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # There is not existing IAM policy in the requesters account for the dataset's S3bucket + # Check if the update_role_policy func is called and policy statements are added + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=None) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + iam_update_role_policy_mock.assert_called() + + iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + # Assert if the IAM role policy with S3 and KMS permissions was created + assert len(iam_policy["Statement"]) == 2 + assert len(iam_policy["Statement"][0]["Resource"]) == 2 + assert len(iam_policy["Statement"][1]["Resource"]) == 1 + assert f"arn:aws:s3:::{dataset2.S3BucketName}" in iam_policy["Statement"][0]["Resource"] and "s3:*" in iam_policy["Statement"][0]["Action"] + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" in \ + iam_policy["Statement"][1]["Resource"] \ + and "kms:*" in iam_policy["Statement"][1]["Action"] + + +def test_grant_s3_iam_access_with_policy_and_target_resources_not_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given policy with some other bucket as resource + # Check if the correct resource is attached/appended + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::S3Bucket", + f"arn:aws:s3:::S3Bucket/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:12121121121:key/some-kms-key" + ] + } + ] + } + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=policy) + + assert len(policy["Statement"]) == 2 + assert len(policy["Statement"][0]["Resource"]) == 2 + assert len(policy["Statement"][1]["Resource"]) == 1 + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + iam_update_role_policy_mock.assert_called() + + iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + # Assert that new resources were appended + assert len(policy["Statement"]) == 2 + assert len(iam_policy["Statement"][0]["Resource"]) == 4 + assert f'arn:aws:s3:::{dataset2.S3BucketName}' in iam_policy["Statement"][0]["Resource"] + assert len(iam_policy["Statement"][1]["Resource"]) == 2 + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" in iam_policy["Statement"][1]["Resource"] + + +# Tests to check if +def test_grant_s3_iam_access_with_complete_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given complete policy present with required target resources + # Check if policy created after calling function and the existing Policy is same + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" + ] + } + ] + } + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=policy) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + # Assert that the IAM Policy is the same as the existing complete policy + iam_update_role_policy_mock.assert_called() + + created_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + assert len(created_iam_policy["Statement"]) == 2 + assert policy["Statement"][0]["Resource"] == created_iam_policy["Statement"][0]["Resource"] and policy["Statement"][0]["Action"] == created_iam_policy["Statement"][0]["Action"] + assert policy["Statement"][1]["Resource"] == created_iam_policy["Statement"][1]["Resource"] and policy["Statement"][1]["Action"] == \ + created_iam_policy["Statement"][1]["Action"] + + +def test_grant_dataset_bucket_key_policy_with_complete_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given complete existing policy + # Check if KMS.put_key_policy is not called + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + kms_client().put_key_policy.assert_not_called() + + +def test_grant_dataset_bucket_key_policy_with_target_requester_id_absent( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given policy where target_requester is not present + # Check if KMS.put_key_policy is called and check if the policy is modified + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + kms_client().put_key_policy.assert_called() + + kms_key_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(kms_key_policy["Statement"]) == 2 + assert kms_key_policy["Statement"][1]["Sid"] == target_environment.SamlGroupName + assert kms_key_policy["Statement"][1]["Action"] == "kms:Decrypt" + assert target_environment.SamlGroupName in kms_key_policy["Statement"][1]["Condition"]["StringLike"]["aws:userId"] + +# Test Case to check if the IAM Role is updated +def test_grant_dataset_bucket_key_policy_and_default_bucket_key_policy( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + share3: ShareObject, + bucket3, + source_environment: Environment, + target_environment: Environment + ): + # Given + # Dataset is imported and it doesn't have Imported KMS Key + # Mocking KMS key function - > Check if not called + # Mocking KMS Tags Functions -> Check if not called + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + # dataset2 should not have importedKey to simulate that while importing the dataset a key was not added + bucket3.importedKmsKey = False + session.add(bucket3) + + manager.grant_dataset_bucket_key_policy() + + # Assert that when a dataset is imported and doesn't have importedKey, kms policy function are not triggered + kms_client().get_key_policy.assert_not_called() + kms_client().put_key_policy.assert_not_called() + + bucket3.importedKmsKey = True + session.add(bucket3) + + +def test_grant_dataset_bucket_key_policy_with_imported( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + bucket3, + db, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # Dataset is imported and it has Imported KMS Key + # Mocking KMS key function + # Mocking KMS Tags Functions + # Check if the bucket policy is modified and the targetResource is added + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + # Assert that when a dataset is imported and has importedKey + # policy is fetched and the target requester id SID is attached to it + kms_client().get_key_policy.assert_called() + kms_client().put_key_policy.assert_called() + updated_bucket_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(updated_bucket_policy["Statement"]) == 2 + assert updated_bucket_policy["Statement"][1]["Sid"] == target_environment.SamlGroupName + assert target_environment.SamlGroupName in updated_bucket_policy["Statement"][1]["Condition"]["StringLike"][ + "aws:userId"] + + +def test_delete_target_role_bucket_policy_with_no_read_only_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with no DataAll-Bucket-ReadOnly Sid + # S3 function to update bucket policy (create_bucket_policy) should not trigger + + bucket_policy = base_bucket_policy + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_not_called() + + +def test_delete_target_role_bucket_policy_with_multiple_principals_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with DataAll-Bucket-ReadOnly Sid And Multiple Principals + # Check if the appropriate AWS arn is removed and 'SomeotherArn' is retained + + bucket_policy = base_bucket_policy + + addition_to_policy = { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + "SomeotherArn", + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + } + + bucket_policy["Statement"].append(addition_to_policy) + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if the 'DataAll-Bucket-ReadOnly' Sid is still present + # Check if the 'someOtherArn' is still present and the target arn is removed + assert modified_bucket_policy["Statement"][1]["Sid"] == f"{DATAALL_READ_ONLY_SID}" + assert len(modified_bucket_policy["Statement"][1]["Principal"]["AWS"]) == 1 + assert 'SomeotherArn' in modified_bucket_policy["Statement"][1]["Principal"]["AWS"] + assert f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" not in \ + modified_bucket_policy["Statement"][1]["Principal"]["AWS"] + + +def test_delete_target_role_bucket_policy_with_one_principal_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with DataAll-Bucket-ReadOnly Sid And Single target Principals + # Bucket Policy should not have the DataAll-Bucket-ReadOnly Sid after delete_target_role_bucket_policy is called + + bucket_policy = base_bucket_policy + + addition_to_policy = { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + } + + bucket_policy["Statement"].append(addition_to_policy) + + assert len(bucket_policy["Statement"]) == 2 + + sid_list = [statement["Sid"] for statement in bucket_policy["Statement"] if "Sid" in statement] + assert f"{DATAALL_READ_ONLY_SID}" in sid_list + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if the 'DataAll-Bucket-ReadOnly' Sid is removed completely + assert len(modified_bucket_policy["Statement"]) == 1 + sid_list = [statement["Sid"] for statement in modified_bucket_policy["Statement"] if "Sid" in statement] + assert f"{DATAALL_READ_ONLY_SID}" not in sid_list + + +def test_delete_target_role_access_policy_no_resource_of_datasets_s3_bucket( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy which doesn't contain target S3 bucket resources + # IAM.delete_role_policy & IAM.update_role_policy should not be called + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::someOtherBucket", + f"arn:aws:s3:::someOtherBucket/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112" + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_called() + iam_delete_role_policy_mock.assert_not_called() + + # Get the updated IAM policy and compare it with the existing one + updated_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + assert len(updated_iam_policy["Statement"]) == 2 + assert "arn:aws:s3:::someOtherBucket,arn:aws:s3:::someOtherBucket/*" == ",".join(updated_iam_policy["Statement"][0]["Resource"]) + assert "arn:aws:kms:us-east-1:121231131212:key/some-key-2112" == ",".join( + updated_iam_policy["Statement"][1]["Resource"]) + + +def test_delete_target_role_access_policy_with_multiple_s3_buckets_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy with multiple bucket resources along with target environments bucket resources + # Check if the IAM.update_policy is called and it only updates / deletes the target env bucket resources + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::someOtherBucket", + f"arn:aws:s3:::someOtherBucket/*", + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*", + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key", + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_called() + iam_delete_role_policy_mock.assert_not_called() + + updated_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + assert f"arn:aws:s3:::{dataset2.S3BucketName}" not in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::{dataset2.S3BucketName}/*" not in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::someOtherBucket" in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::someOtherBucket/*" in updated_iam_policy["Statement"][0]["Resource"] + + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" not in updated_iam_policy["Statement"][1]["Resource"] + assert f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112" in updated_iam_policy["Statement"][1]["Resource"] + + +def test_delete_target_role_access_policy_with_one_s3_bucket_and_one_kms_resource_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy with target environments bucket resources only + # Check if the IAM.delete_policy is called + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*", + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_not_called() + iam_delete_role_policy_mock.assert_called() + + +def test_delete_target_role_bucket_key_policy_with_no_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # complete existing KMS key policy with no target requester id in it + # Check if KMS.put_key_policy is not called + + existing_key_policy = base_kms_key_policy("Some_other_requester_id") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_not_called() + + +def test_delete_target_role_bucket_key_policy_with_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with target requester id in it + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 0 + + +def test_delete_target_role_bucket_key_policy_with_multiple_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with multiple target requester ids + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + existing_key_policy["Statement"].append( + { + "Sid": "some_other_target_sid", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": "some_other_target_sid:*"}} + } + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 1 + assert new_kms_policy["Statement"][0]["Sid"] == "some_other_target_sid" + assert target_environment.SamlGroupName not in json.dumps(new_kms_policy) + + +# Test for delete_target_role_bucket_key_policy when dataset is imported +def test_delete_target_role_bucket_key_policy_with_target_requester_id_and_imported_dataset( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + bucket3, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment + ): + # Given complete existing KMS key policy with target requester id in it + # and that the dataset is imported and has a importedKMS key + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share3, + target_bucket=bucket3, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 0 + + +# Test for delete_target_role_bucket_key_policy when dataset is imported and importedKMS key is missing +def test_delete_target_role_bucket_key_policy_with_target_requester_id_and_imported_dataset_with_no_imported_kms_key( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + bucket3, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment + ): + # Given complete existing KMS key policy with target requester id in it + # and the dataset is imported but doens't contain importedKey + # In that case the KMS.put_key_policy should not be called + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + # dataset2 should not have importedKey to simulate that while importing the dataset a key was not added + bucket3.importedKmsKey = False + session.add(dataset_imported) + + manager.delete_target_role_bucket_key_policy( + share=share3, + target_bucket=bucket3, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_not_called() + + bucket3.importedKmsKey = True + session.add(dataset_imported) + + +def test_delete_target_role_bucket_key_policy_missing_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with multiple target requester ids + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + missing_sid_statement = { + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": "some_other_target_sid:*"}} + } + existing_key_policy["Statement"].append( + missing_sid_statement + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 1 + assert new_kms_policy["Statement"][0] == missing_sid_statement + assert target_environment.SamlGroupName not in json.dumps(new_kms_policy) diff --git a/tests/modules/datasets/test_share.py b/tests/modules/datasets/test_share.py index 60909a65b..5ff64b965 100644 --- a/tests/modules/datasets/test_share.py +++ b/tests/modules/datasets/test_share.py @@ -401,6 +401,10 @@ def create_share_object(client, username, group, groupUri, environmentUri, datas userRoleForShareObject requestPurpose rejectPurpose + dataset { + datasetUri + datasetName + } } } """