From bd254847dff3ac81ca083c9cb0b813b65fe1f0c6 Mon Sep 17 00:00:00 2001 From: Petros Kalos Date: Mon, 9 Dec 2024 11:39:47 +0200 Subject: [PATCH 1/5] fix test_get_dashboard_unauthorized (#1736) ### Feature or Bugfix - Bugfix ### Detail Before #1729 an unauthorized getDashboard query meant a global failure, after #1729 it returns partial data and only raises an error for `environment` and `restricted` fields. ```json {'data': {'getDashboard': {'SamlGroupName': 'testGroup1', 'created': '2024-12-06 15:27:15.811303', 'dashboardUri': 'obfuscated_uri', 'description': 'No description provided', 'environment': None, 'label': '2024-12-06T14:57:28.249142', 'name': '2024-12-06t14-57-28-249142', 'owner': 'testUser1@amazonaws.com', 'restricted': None, 'tags': ['2024-12-06T14:57:28.249142'], 'terms': {'count': 0, 'nodes': []}, 'userRoleForDashboard': 'Shared'}}, 'errors': [{'locations': [{'column': 17, 'line': 13}], 'message': '\n' ' An error occurred (UnauthorizedOperation) ' 'when calling GET_DASHBOARD operation:\n' ' User: aUser@amazonaws.com is not ' 'authorized to perform: GET_DASHBOARD on resource: ' 'obfuscated_uri\n' ' ', 'path': ['getDashboard', 'restricted']}, {'locations': [{'column': 17, 'line': 17}], 'message': '\n' ' An error occurred (UnauthorizedOperation) ' 'when calling GET_ENVIRONMENT operation:\n' ' User: aUser@amazonaws.com is not ' 'authorized to perform: GET_ENVIRONMENT on resource: ' 'obfuscated_uri\n' ' ', 'path': ['getDashboard', 'environment']}]} ``` ### Relates Issue introduced at #1729 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../modules/dashboards/test_dashboard.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tests_new/integration_tests/modules/dashboards/test_dashboard.py b/tests_new/integration_tests/modules/dashboards/test_dashboard.py index 7fa48f768..1375b802f 100644 --- a/tests_new/integration_tests/modules/dashboards/test_dashboard.py +++ b/tests_new/integration_tests/modules/dashboards/test_dashboard.py @@ -1,6 +1,15 @@ import pytest from assertpy import assert_that +from integration_tests.core.environment.utils import set_env_params +from integration_tests.errors import GqlError +from integration_tests.modules.dashboards.conftest import create_dataall_dashboard +from integration_tests.modules.dashboards.mutations import ( + update_dashboard, + delete_dashboard, + approve_dashboard_share, + reject_dashboard_share, +) from integration_tests.modules.dashboards.queries import ( search_dashboards, get_dashboard, @@ -8,15 +17,6 @@ get_author_session, get_reader_session, ) -from integration_tests.modules.dashboards.mutations import ( - update_dashboard, - delete_dashboard, - approve_dashboard_share, - reject_dashboard_share, -) -from integration_tests.modules.dashboards.conftest import create_dataall_dashboard -from integration_tests.core.environment.utils import set_env_params -from integration_tests.errors import GqlError UPDATED_DESC = 'new description' @@ -45,9 +45,7 @@ def test_list_dashboards(client1, client2, session_id, dashboard1): def test_get_dashboard_unauthorized(client2, dashboard1): - assert_that(get_dashboard).raises(GqlError).when_called_with(client2, dashboard1.dashboardUri).contains( - 'UnauthorizedOperation', 'GET_DASHBOARD', dashboard1.dashboardUri - ) + assert_that(get_dashboard(client2, dashboard1.dashboardUri)).contains_entry(restricted=None, environment=None) def test_update_dashboard(client1, dashboard1): From 3d97d9ec47c811f349a3c8ebe34aab6779d106b2 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Tue, 10 Dec 2024 08:16:58 +0100 Subject: [PATCH 2/5] Consistent get_ permissions - S3_Datasets (#1727) ### Feature or Bugfix - Feature/Bugfix ### Detail This PR is the first part of a series and only handles S3_Datasets and its child components Tables and Folders #### Current implementation Most API calls on a particular resource are restricted by GET_RESOURCE permissions. But for resources that are indexed in the Catalog, some metadata is considered public as it is useful for data consumers to discover and understand the data assets. Users will click on these resources from the Catalog view and call one of the following API calls: - getDataset - getDatasetStorageLocation - getDatasetTable - getDashboard - getRedshiftDataset - getRedshiftTable From the above list, initially all are decorated with resource_permission checks except for getDataset and getDatasetTable. #### Target implementation - Public information should be available for data consumers to explore, that means that we first need to remove the resource_permission checks from the list of APIs. - Not all information is public, to get AWS information and other restricted metadata we still need to verify GET_X permissions on the resource. - For restricted metadata, we should provide default messages that do not break the frontend In addition in this PR some unused fields are removed and `syncTables` is simplified to return an integer with the count of synced tables ### Relates ### Testing For each of the following I tested with a user with GET permissions and without GET permissions. FE views render and show information or unauthorized to view info placeholder - [X] Dataset View, Dataset Edit Form and list Datasets - [x] Dataset Data tab with list of tables and folders - [X] Table view, Table Edit - [X] Folder view and Folder Edit Other checks - [x] Complete share request - [X] With requester check table and folder and view the details of the account... - [X] Worksheets query with owned table - [X] Worksheets query with shared table - [X] Crawl dataset - correct S3 path - [X] Sync tables - correct params ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../s3_datasets/api/dataset/resolvers.py | 6 ++ .../modules/s3_datasets/api/dataset/types.py | 45 +++++----- .../api/storage_location/resolvers.py | 6 ++ .../s3_datasets/api/storage_location/types.py | 48 +++-------- .../s3_datasets/api/table/mutations.py | 2 +- .../s3_datasets/api/table/resolvers.py | 6 ++ .../modules/s3_datasets/api/table/types.py | 17 +++- .../services/dataset_location_service.py | 10 ++- .../s3_datasets/services/dataset_service.py | 9 +- .../services/dataset_table_service.py | 12 +-- .../Folders/components/FolderOverview.js | 6 +- .../Folders/components/FolderS3Properties.js | 8 +- .../services/getDatasetStorageLocation.js | 10 ++- .../components/DatasetConsoleAccess.js | 15 ++-- .../S3_Datasets/components/DatasetFolders.js | 7 +- .../S3_Datasets/components/DatasetOverview.js | 4 +- .../components/DatasetStartCrawlerModal.js | 2 +- .../S3_Datasets/components/DatasetTables.js | 18 ++-- .../S3_Datasets/components/DatasetUpload.js | 2 +- .../services/listDatasetStorageLocations.js | 3 + .../S3_Datasets/services/startGlueCrawler.js | 2 - .../S3_Datasets/services/syncTables.js | 20 +---- .../S3_Datasets/views/DatasetEditForm.js | 2 +- .../Tables/services/getDatasetTable.js | 11 +-- .../src/modules/Tables/views/TableView.js | 6 +- .../modules/Worksheets/views/WorksheetView.js | 12 ++- .../services/graphql/Datasets/getDataset.js | 16 ++-- .../graphql/Datasets/listDatasetTables.js | 9 +- .../Datasets/listS3DatasetsOwnedByEnvGroup.js | 10 ++- .../utils/helpers/emptyPrintUnauthorized.js | 5 ++ frontend/src/utils/helpers/index.js | 1 + tests/modules/s3_datasets/conftest.py | 29 +++---- tests/modules/s3_datasets/test_dataset.py | 82 +++++++++---------- .../s3_datasets/test_dataset_glossary.py | 6 +- .../s3_datasets/test_dataset_location.py | 8 +- .../modules/s3_datasets/test_dataset_table.py | 8 +- tests/modules/s3_datasets_shares/conftest.py | 22 +++-- tests/permissions.py | 17 ++-- .../modules/catalog/conftest.py | 4 +- .../modules/s3_datasets/global_conftest.py | 24 ++++-- .../modules/s3_datasets/queries.py | 72 ++++++---------- .../modules/s3_datasets/test_s3_dataset.py | 4 +- .../modules/s3_datasets/test_s3_tables.py | 2 +- 43 files changed, 317 insertions(+), 291 deletions(-) create mode 100644 frontend/src/utils/helpers/emptyPrintUnauthorized.js diff --git a/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py b/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py index ef8acdbb7..db2fad2df 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py @@ -112,6 +112,12 @@ def get_dataset_statistics(context: Context, source: S3Dataset, **kwargs): return DatasetService.get_dataset_statistics(source) +def get_dataset_restricted_information(context: Context, source: S3Dataset, **kwargs): + if not source: + return None + return DatasetService.get_dataset_restricted_information(uri=source.datasetUri, dataset=source) + + @is_feature_enabled('modules.s3_datasets.features.aws_actions') def get_dataset_assume_role_url(context: Context, source, datasetUri: str = None): return DatasetService.get_dataset_assume_role_url(uri=datasetUri) diff --git a/backend/dataall/modules/s3_datasets/api/dataset/types.py b/backend/dataall/modules/s3_datasets/api/dataset/types.py index 2b257bb19..cc2e88139 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/types.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/types.py @@ -11,6 +11,7 @@ get_dataset_statistics, get_dataset_glossary_terms, resolve_dataset_stack, + get_dataset_restricted_information, ) from dataall.core.environment.api.enums import EnvironmentPermission @@ -23,6 +24,22 @@ ], ) +DatasetRestrictedInformation = gql.ObjectType( + name='DatasetRestrictedInformation', + fields=[ + gql.Field(name='AwsAccountId', type=gql.String), + gql.Field(name='region', type=gql.String), + gql.Field(name='S3BucketName', type=gql.String), + gql.Field(name='GlueDatabaseName', type=gql.String), + gql.Field(name='IAMDatasetAdminRoleArn', type=gql.String), + gql.Field(name='KmsAlias', type=gql.String), + gql.Field(name='importedS3Bucket', type=gql.Boolean), + gql.Field(name='importedGlueDatabase', type=gql.Boolean), + gql.Field(name='importedKmsKey', type=gql.Boolean), + gql.Field(name='importedAdminRole', type=gql.Boolean), + ], +) + Dataset = gql.ObjectType( name='Dataset', fields=[ @@ -35,29 +52,13 @@ gql.Field(name='created', type=gql.String), gql.Field(name='updated', type=gql.String), gql.Field(name='admins', type=gql.ArrayType(gql.String)), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='region', type=gql.String), - gql.Field(name='S3BucketName', type=gql.String), - gql.Field(name='GlueDatabaseName', type=gql.String), - gql.Field(name='GlueCrawlerName', type=gql.String), - gql.Field(name='GlueCrawlerSchedule', type=gql.String), - gql.Field(name='GlueProfilingJobName', type=gql.String), - gql.Field(name='GlueProfilingTriggerSchedule', type=gql.String), - gql.Field(name='IAMDatasetAdminRoleArn', type=gql.String), - gql.Field(name='KmsAlias', type=gql.String), - gql.Field(name='bucketCreated', type=gql.Boolean), - gql.Field(name='glueDatabaseCreated', type=gql.Boolean), - gql.Field(name='iamAdminRoleCreated', type=gql.Boolean), - gql.Field(name='lakeformationLocationCreated', type=gql.Boolean), - gql.Field(name='bucketPolicyCreated', type=gql.Boolean), gql.Field(name='SamlAdminGroupName', type=gql.String), - gql.Field(name='businessOwnerEmail', type=gql.String), - gql.Field(name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String)), - gql.Field(name='importedS3Bucket', type=gql.Boolean), - gql.Field(name='importedGlueDatabase', type=gql.Boolean), - gql.Field(name='importedKmsKey', type=gql.Boolean), - gql.Field(name='importedAdminRole', type=gql.Boolean), gql.Field(name='imported', type=gql.Boolean), + gql.Field( + name='restricted', + type=DatasetRestrictedInformation, + resolver=get_dataset_restricted_information, + ), gql.Field( name='environment', type=gql.Ref('EnvironmentSimplified'), @@ -130,8 +131,6 @@ name='GlueCrawler', fields=[ gql.Field(name='Name', type=gql.ID), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='region', type=gql.String), gql.Field(name='status', type=gql.String), ], ) diff --git a/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py b/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py index 39aa2d8cf..928d0d4f8 100644 --- a/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py @@ -50,6 +50,12 @@ def resolve_dataset(context, source: DatasetStorageLocation, **kwargs): return DatasetService.find_dataset(uri=source.datasetUri) +def get_folder_restricted_information(context: Context, source: DatasetStorageLocation, **kwargs): + if not source: + return None + return DatasetLocationService.get_folder_restricted_information(uri=source.locationUri, folder=source) + + def resolve_glossary_terms(context: Context, source: DatasetStorageLocation, **kwargs): if not source: return None diff --git a/backend/dataall/modules/s3_datasets/api/storage_location/types.py b/backend/dataall/modules/s3_datasets/api/storage_location/types.py index 40070a287..14db04c06 100644 --- a/backend/dataall/modules/s3_datasets/api/storage_location/types.py +++ b/backend/dataall/modules/s3_datasets/api/storage_location/types.py @@ -1,5 +1,9 @@ from dataall.base.api import gql -from dataall.modules.s3_datasets.api.storage_location.resolvers import resolve_glossary_terms, resolve_dataset +from dataall.modules.s3_datasets.api.storage_location.resolvers import ( + resolve_glossary_terms, + resolve_dataset, + get_folder_restricted_information, +) DatasetStorageLocation = gql.ObjectType( name='DatasetStorageLocation', @@ -11,13 +15,15 @@ gql.Field(name='owner', type=gql.String), gql.Field(name='created', type=gql.String), gql.Field(name='updated', type=gql.String), - gql.Field(name='region', type=gql.String), gql.Field(name='tags', type=gql.ArrayType(gql.String)), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='S3BucketName', type=gql.String), gql.Field(name='S3Prefix', type=gql.String), gql.Field(name='locationCreated', type=gql.Boolean), gql.Field(name='dataset', type=gql.Ref('Dataset'), resolver=resolve_dataset), + gql.Field( + name='restricted', + type=gql.Ref('DatasetRestrictedInformation'), + resolver=get_folder_restricted_information, + ), gql.Field(name='userRoleForStorageLocation', type=gql.Ref('DatasetRole')), gql.Field(name='environmentEndPoint', type=gql.String), gql.Field( @@ -40,37 +46,3 @@ gql.Field(name='hasPrevious', type=gql.Boolean), ], ) - - -DatasetAccessPoint = gql.ObjectType( - name='DatasetAccessPoint', - fields=[ - gql.Field(name='accessPointUri', type=gql.ID), - gql.Field(name='location', type=DatasetStorageLocation), - gql.Field(name='dataset', type=gql.Ref('Dataset')), - gql.Field(name='name', type=gql.String), - gql.Field(name='description', type=gql.String), - gql.Field(name='owner', type=gql.String), - gql.Field(name='created', type=gql.String), - gql.Field(name='updated', type=gql.String), - gql.Field(name='region', type=gql.String), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='S3BucketName', type=gql.String), - gql.Field(name='S3Prefix', type=gql.String), - gql.Field(name='S3AccessPointName', type=gql.String), - ], -) - - -DatasetAccessPointSearchResult = gql.ObjectType( - name='DatasetAccessPointSearchResult', - fields=[ - gql.Field(name='count', type=gql.Integer), - gql.Field(name='page', type=gql.Integer), - gql.Field(name='pageSize', type=gql.Integer), - gql.Field(name='pages', type=gql.Integer), - gql.Field(name='hasNext', type=gql.Integer), - gql.Field(name='hasPrevious', type=gql.Integer), - gql.Field(name='nodes', type=gql.ArrayType(DatasetAccessPoint)), - ], -) diff --git a/backend/dataall/modules/s3_datasets/api/table/mutations.py b/backend/dataall/modules/s3_datasets/api/table/mutations.py index 9c846ebca..386a6fa0a 100644 --- a/backend/dataall/modules/s3_datasets/api/table/mutations.py +++ b/backend/dataall/modules/s3_datasets/api/table/mutations.py @@ -28,7 +28,7 @@ syncTables = gql.MutationField( name='syncTables', args=[gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String))], - type=gql.Ref('DatasetTableSearchResult'), + type=gql.Integer, resolver=sync_tables, ) diff --git a/backend/dataall/modules/s3_datasets/api/table/resolvers.py b/backend/dataall/modules/s3_datasets/api/table/resolvers.py index b88c62405..63d0b0299 100644 --- a/backend/dataall/modules/s3_datasets/api/table/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/table/resolvers.py @@ -72,3 +72,9 @@ def list_table_data_filters(context: Context, source, tableUri: str = None, filt if not filter: filter = {'page': 1, 'pageSize': 5} return DatasetTableDataFilterService.list_table_data_filters(uri=tableUri, data=filter) + + +def get_dataset_table_restricted_information(context: Context, source: DatasetTable, **kwargs): + if not source: + return None + return DatasetTableService.get_table_restricted_information(uri=source.tableUri, table=source) diff --git a/backend/dataall/modules/s3_datasets/api/table/types.py b/backend/dataall/modules/s3_datasets/api/table/types.py index e91291f6f..8464ad77e 100644 --- a/backend/dataall/modules/s3_datasets/api/table/types.py +++ b/backend/dataall/modules/s3_datasets/api/table/types.py @@ -4,6 +4,7 @@ resolve_dataset, get_glue_table_properties, resolve_glossary_terms, + get_dataset_table_restricted_information, ) TablePermission = gql.ObjectType( @@ -21,6 +22,15 @@ gql.Field(name='nodes', type=gql.ArrayType(TablePermission)), ], ) +DatasetTableRestrictedInformation = gql.ObjectType( + name='DatasetTableRestrictedInformation', + fields=[ + gql.Field(name='AwsAccountId', type=gql.String), + gql.Field(name='GlueDatabaseName', type=gql.String), + gql.Field(name='GlueTableName', type=gql.String), + gql.Field(name='S3Prefix', type=gql.String), + ], +) DatasetTable = gql.ObjectType( name='DatasetTable', @@ -35,12 +45,11 @@ gql.Field(name='created', type=gql.String), gql.Field(name='updated', type=gql.String), gql.Field(name='admins', type=gql.ArrayType(gql.String)), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='GlueDatabaseName', type=gql.String), - gql.Field(name='GlueTableName', type=gql.String), gql.Field(name='LastGlueTableStatus', type=gql.String), - gql.Field(name='S3Prefix', type=gql.String), gql.Field(name='GlueTableConfig', type=gql.String), + gql.Field( + name='restricted', type=DatasetTableRestrictedInformation, resolver=get_dataset_table_restricted_information + ), gql.Field( name='GlueTableProperties', type=gql.String, diff --git a/backend/dataall/modules/s3_datasets/services/dataset_location_service.py b/backend/dataall/modules/s3_datasets/services/dataset_location_service.py index ee83d1c5f..13c12d144 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_location_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_location_service.py @@ -3,7 +3,7 @@ from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository -from dataall.base.db.exceptions import ResourceShared, ResourceAlreadyExists +from dataall.base.db.exceptions import ResourceAlreadyExists from dataall.modules.s3_datasets.services.dataset_service import DatasetService from dataall.modules.s3_datasets.aws.s3_location_client import S3LocationClient from dataall.modules.s3_datasets.db.dataset_location_repositories import DatasetLocationRepository @@ -59,7 +59,6 @@ def list_dataset_locations(uri: str, filter: dict = None): return DatasetLocationRepository.list_dataset_locations(session=session, uri=uri, data=filter) @staticmethod - @ResourcePolicyService.has_resource_permission(GET_DATASET_FOLDER) def get_storage_location(uri): with get_context().db_engine.scoped_session() as session: return DatasetLocationRepository.get_location_by_uri(session, uri) @@ -135,3 +134,10 @@ def _delete_dataset_folder_read_permission(session, dataset: S3Dataset, location } for group in permission_group: ResourcePolicyService.delete_resource_policy(session=session, group=group, resource_uri=location_uri) + + @staticmethod + @ResourcePolicyService.has_resource_permission(GET_DATASET_FOLDER) + def get_folder_restricted_information(uri: str, folder: DatasetStorageLocation): + context = get_context() + with context.db_engine.scoped_session() as session: + return DatasetRepository.get_dataset_by_uri(session, folder.datasetUri) diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index 6d3010bf4..3575a4a21 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -38,8 +38,8 @@ DATASET_ALL, DATASET_READ, IMPORT_DATASET, - GET_DATASET, DATASET_TABLE_ALL, + GET_DATASET, ) from dataall.modules.datasets_base.services.dataset_list_permissions import LIST_ENVIRONMENT_DATASETS from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository @@ -344,6 +344,11 @@ def get_dataset_statistics(dataset: S3Dataset): 'upvotes': count_upvotes or 0, } + @staticmethod + @ResourcePolicyService.has_resource_permission(GET_DATASET) + def get_dataset_restricted_information(uri: str, dataset: S3Dataset): + return dataset + @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET) @@ -397,8 +402,6 @@ def start_crawler(uri: str, data: dict = None): return { 'Name': dataset.GlueCrawlerName, - 'AwsAccountId': dataset.AwsAccountId, - 'region': dataset.region, 'status': crawler.get('LastCrawl', {}).get('Status', 'N/A'), } diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py index 386ab252e..21336c58b 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py @@ -1,5 +1,4 @@ import logging - from dataall.base.context import get_context from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService @@ -44,6 +43,11 @@ def get_table(uri: str): with get_context().db_engine.scoped_session() as session: return DatasetTableRepository.get_dataset_table_by_uri(session, uri) + @staticmethod + @ResourcePolicyService.has_resource_permission(GET_DATASET_TABLE) + def get_table_restricted_information(uri: str, table: DatasetTable): + return table + @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission(UPDATE_DATASET_TABLE, parent_resource=_get_dataset_uri) @@ -127,11 +131,7 @@ def sync_tables_for_dataset(cls, uri): DatasetTableIndexer.upsert_all(session=session, dataset_uri=dataset.datasetUri) DatasetTableIndexer.remove_all_deleted(session=session, dataset_uri=dataset.datasetUri) DatasetIndexer.upsert(session=session, dataset_uri=dataset.datasetUri) - return DatasetRepository.paginated_dataset_tables( - session=session, - uri=uri, - data={'page': 1, 'pageSize': 10}, - ) + return DatasetRepository.count_dataset_tables(session, dataset.datasetUri) @staticmethod def sync_existing_tables(session, uri, glue_tables=None): diff --git a/frontend/src/modules/Folders/components/FolderOverview.js b/frontend/src/modules/Folders/components/FolderOverview.js index c1b6c469c..6c3ec47bc 100644 --- a/frontend/src/modules/Folders/components/FolderOverview.js +++ b/frontend/src/modules/Folders/components/FolderOverview.js @@ -23,12 +23,14 @@ export const FolderOverview = (props) => { } /> - + {folder.restricted && ( + + )} { S3 URI - {`s3://${folder.dataset.S3BucketName}/${folder.S3Prefix}/`} + {`s3://${folder.restricted.S3BucketName}/${folder.S3Prefix}/`} @@ -27,7 +27,7 @@ export const FolderS3Properties = (props) => { S3 ARN - {`arn:aws:s3:::${folder.dataset.S3BucketName}/${folder.S3Prefix}/`} + {`arn:aws:s3:::${folder.restricted.S3BucketName}/${folder.S3Prefix}/`} @@ -35,7 +35,7 @@ export const FolderS3Properties = (props) => { Region - {folder.dataset.region} + {folder.restricted.region} @@ -43,7 +43,7 @@ export const FolderS3Properties = (props) => { Account - {folder.dataset.AwsAccountId} + {folder.restricted.AwsAccountId} diff --git a/frontend/src/modules/Folders/services/getDatasetStorageLocation.js b/frontend/src/modules/Folders/services/getDatasetStorageLocation.js index 292603f8f..42d91dd9b 100644 --- a/frontend/src/modules/Folders/services/getDatasetStorageLocation.js +++ b/frontend/src/modules/Folders/services/getDatasetStorageLocation.js @@ -7,14 +7,17 @@ export const getDatasetStorageLocation = (locationUri) => ({ query: gql` query getDatasetStorageLocation($locationUri: String!) { getDatasetStorageLocation(locationUri: $locationUri) { + restricted { + AwsAccountId + region + S3BucketName + } dataset { datasetUri name + label userRoleForDataset - region SamlAdminGroupName - S3BucketName - AwsAccountId owner environment { environmentUri @@ -31,7 +34,6 @@ export const getDatasetStorageLocation = (locationUri) => ({ created tags locationUri - AwsAccountId label name S3Prefix diff --git a/frontend/src/modules/S3_Datasets/components/DatasetConsoleAccess.js b/frontend/src/modules/S3_Datasets/components/DatasetConsoleAccess.js index d3545e713..d24dca471 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetConsoleAccess.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetConsoleAccess.js @@ -19,7 +19,7 @@ export const DatasetConsoleAccess = (props) => { Account - {dataset.AwsAccountId} + {dataset.restricted.AwsAccountId} @@ -28,7 +28,7 @@ export const DatasetConsoleAccess = (props) => { arn:aws:s3::: - {dataset.S3BucketName} + {dataset.restricted.S3BucketName} @@ -36,7 +36,7 @@ export const DatasetConsoleAccess = (props) => { Glue database - {`arn:aws:glue:${dataset.region}:${dataset.AwsAccountId}/database:${dataset.GlueDatabaseName}`} + {`arn:aws:glue:${dataset.restricted.region}:${dataset.restricted.AwsAccountId}/database:${dataset.restricted.GlueDatabaseName}`} @@ -44,16 +44,17 @@ export const DatasetConsoleAccess = (props) => { IAM role - {dataset.IAMDatasetAdminRoleArn} + {dataset.restricted.IAMDatasetAdminRoleArn} - {dataset.KmsAlias === 'SSE-S3' || dataset.KmsAlias === 'Undefined' ? ( + {dataset.restricted.KmsAlias === 'SSE-S3' || + dataset.restricted.KmsAlias === 'Undefined' ? ( S3 Encryption - {`${dataset.KmsAlias}`} + {`${dataset.restricted.KmsAlias}`} ) : ( @@ -62,7 +63,7 @@ export const DatasetConsoleAccess = (props) => { S3 Encryption SSE-KMS - {`arn:aws:kms:${dataset.region}:${dataset.AwsAccountId}/alias:${dataset.KmsAlias}`} + {`arn:aws:kms:${dataset.restricted.region}:${dataset.restricted.AwsAccountId}/alias:${dataset.restricted.KmsAlias}`} )} diff --git a/frontend/src/modules/S3_Datasets/components/DatasetFolders.js b/frontend/src/modules/S3_Datasets/components/DatasetFolders.js index 7cefeefee..ab5e4a446 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetFolders.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetFolders.js @@ -37,6 +37,7 @@ import { } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; import { deleteDatasetStorageLocation, useClient } from 'services'; +import { emptyPrintUnauthorized } from 'utils'; import { listDatasetStorageLocations } from '../services'; import { FolderCreateModal } from './FolderCreateModal'; @@ -69,7 +70,7 @@ export const DatasetFolders = (props) => { const response = await client.query( listDatasetStorageLocations(dataset.datasetUri, filter) ); - if (!response.errors) { + if (response.data.getDataset != null) { setItems({ ...response.data.getDataset.locations }); } else { dispatch({ type: SET_ERROR, error: response.errors[0].message }); @@ -224,7 +225,9 @@ export const DatasetFolders = (props) => { - {`s3://${dataset.S3BucketName}/${folder.S3Prefix}`} + {`s3://${emptyPrintUnauthorized( + folder.restricted?.S3BucketName + )}/${folder.S3Prefix}`} {folder.description} diff --git a/frontend/src/modules/S3_Datasets/components/DatasetOverview.js b/frontend/src/modules/S3_Datasets/components/DatasetOverview.js index 0599d33b2..3e61c26dd 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetOverview.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetOverview.js @@ -33,7 +33,9 @@ export const DatasetOverview = (props) => { objectType="dataset" /> - {isAdmin && } + {isAdmin && dataset.restricted && ( + + )} diff --git a/frontend/src/modules/S3_Datasets/components/DatasetStartCrawlerModal.js b/frontend/src/modules/S3_Datasets/components/DatasetStartCrawlerModal.js index ba5b3ac47..8196760fc 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetStartCrawlerModal.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetStartCrawlerModal.js @@ -132,7 +132,7 @@ export const DatasetStartCrawlerModal = (props) => { error={Boolean(touched.prefix && errors.prefix)} fullWidth helperText={touched.prefix && errors.prefix} - label={`s3://${dataset.S3BucketName}/`} + label={`s3://${dataset.restricted.S3BucketName}/`} name="prefix" onBlur={handleBlur} onChange={handleChange} diff --git a/frontend/src/modules/S3_Datasets/components/DatasetTables.js b/frontend/src/modules/S3_Datasets/components/DatasetTables.js index 46b3603e3..32f7774cc 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetTables.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetTables.js @@ -40,7 +40,7 @@ import { listDatasetTables, deleteDatasetTable, useClient } from 'services'; import { syncTables } from '../services'; import { DatasetStartCrawlerModal } from './DatasetStartCrawlerModal'; -import { isFeatureEnabled } from 'utils'; +import { emptyPrintUnauthorized, isFeatureEnabled } from 'utils'; export const DatasetTables = (props) => { const { dataset, isAdmin } = props; @@ -80,7 +80,7 @@ export const DatasetTables = (props) => { filter: { ...filter } }) ); - if (!response.errors) { + if (response.data.getDataset != null) { setItems({ ...response.data.getDataset.tables }); } else { dispatch({ type: SET_ERROR, error: response.errors[0].message }); @@ -95,7 +95,7 @@ export const DatasetTables = (props) => { fetchItems().catch((e) => dispatch({ type: SET_ERROR, error: e.message }) ); - enqueueSnackbar(`Retrieved ${response.data.syncTables.count} tables`, { + enqueueSnackbar(`Retrieved ${response.data.syncTables} tables`, { anchorOrigin: { horizontal: 'right', vertical: 'top' @@ -257,11 +257,17 @@ export const DatasetTables = (props) => { to={`/console/s3-datasets/table/${table.tableUri}`} variant="subtitle2" > - {table.GlueTableName} + {table.name} - {table.GlueDatabaseName} - {table.S3Prefix} + + {emptyPrintUnauthorized( + table.restricted?.GlueDatabaseName + )} + + + {emptyPrintUnauthorized(table.restricted?.S3Prefix)} + {isAdmin && ( { ({ description created userRoleForStorageLocation + restricted { + S3BucketName + } } } } diff --git a/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js b/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js index 2c110baa5..feb59d959 100644 --- a/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js +++ b/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js @@ -9,8 +9,6 @@ export const startGlueCrawler = ({ datasetUri, input }) => ({ mutation StartGlueCrawler($datasetUri: String, $input: CrawlerInput) { startGlueCrawler(datasetUri: $datasetUri, input: $input) { Name - AwsAccountId - region status } } diff --git a/frontend/src/modules/S3_Datasets/services/syncTables.js b/frontend/src/modules/S3_Datasets/services/syncTables.js index dd0f991ec..608335d01 100644 --- a/frontend/src/modules/S3_Datasets/services/syncTables.js +++ b/frontend/src/modules/S3_Datasets/services/syncTables.js @@ -6,25 +6,7 @@ export const syncTables = (datasetUri) => ({ }, mutation: gql` mutation SyncTables($datasetUri: String!) { - syncTables(datasetUri: $datasetUri) { - count - nodes { - tableUri - GlueTableName - GlueDatabaseName - description - name - label - created - S3Prefix - dataset { - datasetUri - name - GlueDatabaseName - userRoleForDataset - } - } - } + syncTables(datasetUri: $datasetUri) } ` }); diff --git a/frontend/src/modules/S3_Datasets/views/DatasetEditForm.js b/frontend/src/modules/S3_Datasets/views/DatasetEditForm.js index 6026b1823..73f5543de 100644 --- a/frontend/src/modules/S3_Datasets/views/DatasetEditForm.js +++ b/frontend/src/modules/S3_Datasets/views/DatasetEditForm.js @@ -333,7 +333,7 @@ const DatasetEditForm = (props) => { terms: dataset.terms || [], stewards: dataset.stewards, confidentiality: dataset.confidentiality, - KmsAlias: dataset.KmsAlias, + KmsAlias: dataset.restricted.KmsAlias, autoApprovalEnabled: dataset.autoApprovalEnabled, expirationSetting: dataset.expirySetting, minValidity: dataset.expiryMinDuration, diff --git a/frontend/src/modules/Tables/services/getDatasetTable.js b/frontend/src/modules/Tables/services/getDatasetTable.js index 1491871b0..0384c9d91 100644 --- a/frontend/src/modules/Tables/services/getDatasetTable.js +++ b/frontend/src/modules/Tables/services/getDatasetTable.js @@ -11,7 +11,6 @@ export const getDatasetTable = (tableUri) => ({ datasetUri name userRoleForDataset - region SamlAdminGroupName owner confidentiality @@ -31,13 +30,15 @@ export const getDatasetTable = (tableUri) => ({ created tags tableUri - AwsAccountId - GlueTableName - GlueDatabaseName LastGlueTableStatus label name - S3Prefix + restricted { + S3Prefix + AwsAccountId + GlueTableName + GlueDatabaseName + } terms { count nodes { diff --git a/frontend/src/modules/Tables/views/TableView.js b/frontend/src/modules/Tables/views/TableView.js index 85ce3c0a6..ee3da9d1a 100644 --- a/frontend/src/modules/Tables/views/TableView.js +++ b/frontend/src/modules/Tables/views/TableView.js @@ -59,7 +59,7 @@ function TablePageHeader(props) { - Table {table.GlueTableName} + Table {table.label} - {table.GlueTableName} + {table.label} @@ -222,7 +222,7 @@ const TableView = () => { const fetchItem = useCallback(async () => { setLoading(true); const response = await client.query(getDatasetTable(params.uri)); - if (!response.errors && response.data.getDatasetTable !== null) { + if (response.data.getDatasetTable !== null) { setTable(response.data.getDatasetTable); handleUserRole( response.data.getDatasetTable.dataset.userRoleForDataset, diff --git a/frontend/src/modules/Worksheets/views/WorksheetView.js b/frontend/src/modules/Worksheets/views/WorksheetView.js index 545c3f190..f3c409aef 100644 --- a/frontend/src/modules/Worksheets/views/WorksheetView.js +++ b/frontend/src/modules/Worksheets/views/WorksheetView.js @@ -136,7 +136,7 @@ const WorksheetView = () => { (d) => ({ ...d, value: d.datasetUri, - label: d.GlueDatabaseName + label: d.restricted.GlueDatabaseName }) ); } @@ -198,7 +198,7 @@ const WorksheetView = () => { response.data.getDataset.tables.nodes.map((t) => ({ ...t, value: t.tableUri, - label: t.GlueTableName + label: t.restricted.GlueTableName })) ); } else { @@ -379,7 +379,11 @@ const WorksheetView = () => { dispatch({ type: SET_ERROR, error: e.message }) ); setSqlBody( - `SELECT * FROM "${selectedDatabase.label}"."${event.target.value.GlueTableName}" limit 10;` + `SELECT * FROM "${selectedDatabase.label}"."${ + event.target.value.restricted + ? event.target.value.restricted.GlueTableName + : event.target.value.GlueTableName + }" limit 10;` ); } @@ -512,7 +516,7 @@ const WorksheetView = () => { {tableOptions.length > 0 ? ( tableOptions.map((table) => ( - {table.GlueTableName} + {table.label} )) ) : ( diff --git a/frontend/src/services/graphql/Datasets/getDataset.js b/frontend/src/services/graphql/Datasets/getDataset.js index 322b5189d..7dabc1b20 100644 --- a/frontend/src/services/graphql/Datasets/getDataset.js +++ b/frontend/src/services/graphql/Datasets/getDataset.js @@ -12,20 +12,20 @@ export const getDataset = (datasetUri) => ({ description label name - region created imported userRoleForDataset SamlAdminGroupName - AwsAccountId - KmsAlias - S3BucketName - GlueDatabaseName + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } tags - businessOwnerEmail stewards - IAMDatasetAdminRoleArn - businessOwnerDelegationEmails stack { stack status diff --git a/frontend/src/services/graphql/Datasets/listDatasetTables.js b/frontend/src/services/graphql/Datasets/listDatasetTables.js index 343e80461..eee1db27f 100644 --- a/frontend/src/services/graphql/Datasets/listDatasetTables.js +++ b/frontend/src/services/graphql/Datasets/listDatasetTables.js @@ -26,11 +26,14 @@ export const listDatasetTables = ({ datasetUri, filter }) => ({ tableUri name created - GlueTableName - GlueDatabaseName + restricted { + S3Prefix + AwsAccountId + GlueTableName + GlueDatabaseName + } description stage - S3Prefix userRoleForTable } } diff --git a/frontend/src/services/graphql/Datasets/listS3DatasetsOwnedByEnvGroup.js b/frontend/src/services/graphql/Datasets/listS3DatasetsOwnedByEnvGroup.js index 589d4cf59..cbe5e41f9 100644 --- a/frontend/src/services/graphql/Datasets/listS3DatasetsOwnedByEnvGroup.js +++ b/frontend/src/services/graphql/Datasets/listS3DatasetsOwnedByEnvGroup.js @@ -29,14 +29,16 @@ export const listS3DatasetsOwnedByEnvGroup = ({ nodes { datasetUri label - AwsAccountId - region - GlueDatabaseName SamlAdminGroupName name - S3BucketName created owner + restricted { + AwsAccountId + region + S3BucketName + GlueDatabaseName + } stack { status } diff --git a/frontend/src/utils/helpers/emptyPrintUnauthorized.js b/frontend/src/utils/helpers/emptyPrintUnauthorized.js new file mode 100644 index 000000000..9713c753d --- /dev/null +++ b/frontend/src/utils/helpers/emptyPrintUnauthorized.js @@ -0,0 +1,5 @@ +function emptyPrintUnauthorized(param) { + return param ? param : '**********'; +} + +export { emptyPrintUnauthorized }; diff --git a/frontend/src/utils/helpers/index.js b/frontend/src/utils/helpers/index.js index 5eec0fd36..b00b552f5 100644 --- a/frontend/src/utils/helpers/index.js +++ b/frontend/src/utils/helpers/index.js @@ -1,5 +1,6 @@ export * from './bytesToSize'; export * from './dayjs'; +export * from './emptyPrintUnauthorized'; export * from './listToTree'; export * from './moduleUtils'; export * from './linkMarkup'; diff --git a/tests/modules/s3_datasets/conftest.py b/tests/modules/s3_datasets/conftest.py index 99a4dcdcf..8295b4be2 100644 --- a/tests/modules/s3_datasets/conftest.py +++ b/tests/modules/s3_datasets/conftest.py @@ -76,19 +76,20 @@ def factory( datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName - GlueCrawlerName enableExpiration expirySetting expiryMinDuration expiryMaxDuration + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } tables{ nodes{ tableUri @@ -180,11 +181,11 @@ def factory(dataset: S3Dataset, name, username) -> DatasetTable: label=name, owner=username, datasetUri=dataset.datasetUri, - GlueDatabaseName=dataset.GlueDatabaseName, + GlueDatabaseName=dataset.restricted.GlueDatabaseName, GlueTableName=name, - region=dataset.region, - AWSAccountId=dataset.AwsAccountId, - S3BucketName=dataset.S3BucketName, + region=dataset.restricted.region, + AWSAccountId=dataset.restricted.AwsAccountId, + S3BucketName=dataset.restricted.S3BucketName, S3Prefix=f'{name}', ) session.add(table) @@ -325,9 +326,9 @@ def factory(dataset: S3Dataset, name, username) -> DatasetStorageLocation: label=name, owner=username, datasetUri=dataset.datasetUri, - S3BucketName=dataset.S3BucketName, - region=dataset.region, - AWSAccountId=dataset.AwsAccountId, + S3BucketName=dataset.restricted.S3BucketName, + region=dataset.restricted.region, + AWSAccountId=dataset.restricted.AwsAccountId, S3Prefix=f'{name}', ) session.add(ds_location) diff --git a/tests/modules/s3_datasets/test_dataset.py b/tests/modules/s3_datasets/test_dataset.py index 69b1de0f6..c3801edfa 100644 --- a/tests/modules/s3_datasets/test_dataset.py +++ b/tests/modules/s3_datasets/test_dataset.py @@ -69,13 +69,15 @@ def test_get_dataset(client, dataset1, env_fixture, group): query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId description - region - imported - importedS3Bucket stewards owners + imported + restricted { + AwsAccountId + region + importedS3Bucket + } } } """, @@ -83,11 +85,11 @@ def test_get_dataset(client, dataset1, env_fixture, group): username='alice', groups=[group.name], ) - assert response.data.getDataset.AwsAccountId == env_fixture.AwsAccountId - assert response.data.getDataset.region == env_fixture.region + assert response.data.getDataset.restricted.AwsAccountId == env_fixture.AwsAccountId + assert response.data.getDataset.restricted.region == env_fixture.region assert response.data.getDataset.label == 'dataset1' assert response.data.getDataset.imported is False - assert response.data.getDataset.importedS3Bucket is False + assert response.data.getDataset.restricted.importedS3Bucket is False def test_list_datasets(client, dataset1, group): @@ -194,8 +196,6 @@ def test_start_crawler(org_fixture, env_fixture, dataset1, client, group, module mutation StartGlueCrawler($datasetUri:String, $input:CrawlerInput){ startGlueCrawler(datasetUri:$datasetUri,input:$input){ Name - AwsAccountId - region status } } @@ -209,7 +209,7 @@ def test_start_crawler(org_fixture, env_fixture, dataset1, client, group, module 'prefix': 'raw', }, ) - assert response.data.startGlueCrawler.Name == dataset1.GlueCrawlerName + assert response.data.Name == dataset1.restricted.GlueCrawlerName def test_update_dataset_unauthorized(dataset1, client, group): @@ -309,9 +309,11 @@ def test_list_dataset_tables(client, dataset1, group): tableUri name label - GlueDatabaseName - GlueTableName - S3Prefix + restricted{ + GlueDatabaseName + GlueTableName + S3Prefix + } } } } @@ -391,9 +393,11 @@ def test_delete_dataset(client, dataset, env_fixture, org_fixture, db, module_mo query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId + restricted { + AwsAccountId + region + } description - region } } """, @@ -428,17 +432,15 @@ def test_import_dataset(org_fixture, env_fixture, dataset1, client, group): mutation importDataset($input:ImportDatasetInput){ importDataset(input:$input){ label - AwsAccountId - region imported - importedS3Bucket - importedGlueDatabase - importedKmsKey - importedAdminRole - S3BucketName - GlueDatabaseName - IAMDatasetAdminRoleArn - KmsAlias + restricted { + AwsAccountId + region + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + KmsAlias + } } } """, @@ -457,17 +459,13 @@ def test_import_dataset(org_fixture, env_fixture, dataset1, client, group): }, ) assert response.data.importDataset.label == 'datasetImported' - assert response.data.importDataset.AwsAccountId == env_fixture.AwsAccountId - assert response.data.importDataset.region == env_fixture.region + assert response.data.importDataset.restricted.AwsAccountId == env_fixture.AwsAccountId + assert response.data.importDataset.restricted.region == env_fixture.region assert response.data.importDataset.imported is True - assert response.data.importDataset.importedS3Bucket is True - assert response.data.importDataset.importedGlueDatabase is True - assert response.data.importDataset.importedKmsKey is True - assert response.data.importDataset.importedAdminRole is True - assert response.data.importDataset.S3BucketName == 'dhimportedbucket' - assert response.data.importDataset.GlueDatabaseName == 'dhimportedGlueDB' - assert response.data.importDataset.KmsAlias == '1234-YYEY' - assert 'dhimportedRole' in response.data.importDataset.IAMDatasetAdminRoleArn + assert response.data.importDataset.restricted.S3BucketName == 'dhimportedbucket' + assert response.data.importDataset.restricted.GlueDatabaseName == 'dhimportedGlueDB' + assert response.data.importDataset.restricted.KmsAlias == '1234-YYEY' + assert 'dhimportedRole' in response.data.importDataset.restricted.IAMDatasetAdminRoleArn def test_get_dataset_by_prefix(db, env_fixture, org_fixture): @@ -512,13 +510,15 @@ def test_stewardship(client, dataset, env_fixture, org_fixture, db, group2, grou datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName stewards diff --git a/tests/modules/s3_datasets/test_dataset_glossary.py b/tests/modules/s3_datasets/test_dataset_glossary.py index 543d20f91..5a25d1b34 100644 --- a/tests/modules/s3_datasets/test_dataset_glossary.py +++ b/tests/modules/s3_datasets/test_dataset_glossary.py @@ -14,12 +14,12 @@ def _columns(db, dataset_fixture, table_fixture) -> List[DatasetTableColumn]: datasetUri=dataset_fixture.datasetUri, tableUri=table_fixture.tableUri, label=f'c{i+1}', - AWSAccountId=dataset_fixture.AwsAccountId, - region=dataset_fixture.region, + AWSAccountId=dataset_fixture.restricted.AwsAccountId, + region=dataset_fixture.restricted.region, GlueTableName='table', typeName='String', owner='user', - GlueDatabaseName=dataset_fixture.GlueDatabaseName, + GlueDatabaseName=dataset_fixture.restricted.GlueDatabaseName, ) session.add(c) cols.append(c) diff --git a/tests/modules/s3_datasets/test_dataset_location.py b/tests/modules/s3_datasets/test_dataset_location.py index 3d388e641..d74f863da 100644 --- a/tests/modules/s3_datasets/test_dataset_location.py +++ b/tests/modules/s3_datasets/test_dataset_location.py @@ -51,11 +51,11 @@ def test_manage_dataset_location(client, dataset1, user, group): query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId description - region - imported - importedS3Bucket + restricted { + AwsAccountId + region + } locations{ nodes{ locationUri diff --git a/tests/modules/s3_datasets/test_dataset_table.py b/tests/modules/s3_datasets/test_dataset_table.py index e20af5b86..1736c4958 100644 --- a/tests/modules/s3_datasets/test_dataset_table.py +++ b/tests/modules/s3_datasets/test_dataset_table.py @@ -81,9 +81,11 @@ def test_list_dataset_tables(client, dataset_fixture): tableUri name label - GlueDatabaseName - GlueTableName - S3Prefix + restricted { + GlueDatabaseName + GlueTableName + S3Prefix + } } } } diff --git a/tests/modules/s3_datasets_shares/conftest.py b/tests/modules/s3_datasets_shares/conftest.py index 8fc050487..7a5ac19ba 100644 --- a/tests/modules/s3_datasets_shares/conftest.py +++ b/tests/modules/s3_datasets_shares/conftest.py @@ -83,15 +83,16 @@ def factory( datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName - GlueCrawlerName + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } tables{ nodes{ tableUri @@ -253,7 +254,12 @@ def dataset_confidential_fixture(env_fixture, org_fixture, dataset, group) -> S3 @pytest.fixture(scope='module') def table_fixture(db, dataset_fixture, table, group, user): - table1 = table(dataset=dataset_fixture, name='table1', username=user.username) + dataset = dataset_fixture + dataset.GlueDatabaseName = dataset_fixture.restricted.GlueDatabaseName + dataset.region = dataset_fixture.restricted.region + dataset.S3BucketName = dataset_fixture.restricted.S3BucketName + dataset.AwsAccountId = dataset_fixture.restricted.AwsAccountId + table1 = table(dataset=dataset, name='table1', username=user.username) with db.scoped_session() as session: ResourcePolicyService.attach_resource_policy( diff --git a/tests/permissions.py b/tests/permissions.py index 9f3a29785..910681e53 100644 --- a/tests/permissions.py +++ b/tests/permissions.py @@ -246,6 +246,7 @@ def __post_init__(self): field_id('Dataset', 'owners'): TestData( resource_ignore=IgnoreReason.INTRAMODULE, tenant_ignore=IgnoreReason.NOTREQUIRED ), + field_id('Dataset', 'restricted'): TestData(resource_perm=GET_DATASET, tenant_ignore=IgnoreReason.NOTREQUIRED), field_id('Dataset', 'stack'): TestData(resource_perm=GET_DATASET, tenant_ignore=IgnoreReason.NOTREQUIRED), field_id('Dataset', 'statistics'): TestData( resource_ignore=IgnoreReason.INTRAMODULE, tenant_ignore=IgnoreReason.NOTREQUIRED @@ -285,6 +286,9 @@ def __post_init__(self): field_id('DatasetStorageLocation', 'dataset'): TestData( resource_perm=GET_DATASET, tenant_ignore=IgnoreReason.NOTREQUIRED ), + field_id('DatasetStorageLocation', 'restricted'): TestData( + resource_perm=GET_DATASET_FOLDER, tenant_ignore=IgnoreReason.NOTREQUIRED + ), field_id('DatasetStorageLocation', 'terms'): TestData( resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED ), @@ -297,6 +301,9 @@ def __post_init__(self): field_id('DatasetTable', 'dataset'): TestData( resource_ignore=IgnoreReason.INTRAMODULE, tenant_ignore=IgnoreReason.NOTREQUIRED ), + field_id('DatasetTable', 'restricted'): TestData( + resource_perm=GET_DATASET_TABLE, tenant_ignore=IgnoreReason.NOTREQUIRED + ), field_id('DatasetTable', 'terms'): TestData( resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED ), @@ -725,8 +732,8 @@ def __post_init__(self): resource_perm=CREDENTIALS_PIPELINE, tenant_perm=MANAGE_PIPELINES ), field_id('Query', 'getDataset'): TestData( - resource_ignore=IgnoreReason.NOTREQUIRED, tenant_ignore=IgnoreReason.NOTREQUIRED - ), # TODO Review + resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED + ), field_id('Query', 'getDatasetAssumeRoleUrl'): TestData( tenant_perm=MANAGE_DATASETS, resource_perm=CREDENTIALS_DATASET ), @@ -737,11 +744,11 @@ def __post_init__(self): tenant_perm=MANAGE_DATASETS, resource_perm=CREDENTIALS_DATASET ), field_id('Query', 'getDatasetStorageLocation'): TestData( - resource_perm=GET_DATASET_FOLDER, tenant_ignore=IgnoreReason.NOTREQUIRED + resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED ), field_id('Query', 'getDatasetTable'): TestData( - resource_ignore=IgnoreReason.NOTREQUIRED, tenant_ignore=IgnoreReason.NOTREQUIRED - ), # TODO Review + resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED + ), field_id('Query', 'getDatasetTableProfilingRun'): TestData( resource_ignore=IgnoreReason.CUSTOM, tenant_ignore=IgnoreReason.CUSTOM ), diff --git a/tests_new/integration_tests/modules/catalog/conftest.py b/tests_new/integration_tests/modules/catalog/conftest.py index fa93399fe..3476afad8 100644 --- a/tests_new/integration_tests/modules/catalog/conftest.py +++ b/tests_new/integration_tests/modules/catalog/conftest.py @@ -85,7 +85,7 @@ def dataset_association1(client1, group1, glossary1, glossary_term1, session_s3_ datasetUri=session_s3_dataset1.datasetUri, input={ 'terms': [glossary_term1.nodeUri], - 'KmsAlias': session_s3_dataset1.KmsAlias, + 'KmsAlias': session_s3_dataset1.restricted.KmsAlias, }, ) response = list_glossary_associations(client1, node_uri=glossary1.nodeUri) @@ -100,7 +100,7 @@ def dataset_association1(client1, group1, glossary1, glossary_term1, session_s3_ datasetUri=session_s3_dataset1.datasetUri, input={ 'terms': [], - 'KmsAlias': session_s3_dataset1.KmsAlias, + 'KmsAlias': session_s3_dataset1.restricted.KmsAlias, }, ) diff --git a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py index e05861e73..ea8341ee5 100644 --- a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py +++ b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py @@ -181,14 +181,22 @@ def create_tables(client, dataset): file_path = os.path.join(os.path.dirname(__file__), 'sample_data/csv_table/csv_sample.csv') s3_client = S3Client(dataset_session, dataset.region) glue_client = GlueClient(dataset_session, dataset.region) - s3_client.upload_file_to_prefix(local_file_path=file_path, s3_path=f'{dataset.S3BucketName}/integrationtest1') + s3_client.upload_file_to_prefix( + local_file_path=file_path, s3_path=f'{dataset.restricted.S3BucketName}/integrationtest1' + ) glue_client.create_table( - database_name=dataset.GlueDatabaseName, table_name='integrationtest1', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, + table_name='integrationtest1', + bucket=dataset.restricted.S3BucketName, ) - s3_client.upload_file_to_prefix(local_file_path=file_path, s3_path=f'{dataset.S3BucketName}/integrationtest2') + s3_client.upload_file_to_prefix( + local_file_path=file_path, s3_path=f'{dataset.restricted.S3BucketName}/integrationtest2' + ) glue_client.create_table( - database_name=dataset.GlueDatabaseName, table_name='integrationtest2', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, + table_name='integrationtest2', + bucket=dataset.restricted.S3BucketName, ) response = sync_tables(client, datasetUri=dataset.datasetUri) return [table for table in response.get('nodes', []) if table.GlueTableName.startswith('integrationtest')] @@ -238,7 +246,9 @@ def session_s3_dataset1(client1, group1, org1, session_env1, session_id, testdat finally: if ds: delete_s3_dataset(client1, session_env1['environmentUri'], ds) - delete_aws_dataset_resources(aws_client=session_env1_aws_client, env=session_env1, bucket=ds.S3BucketName) + delete_aws_dataset_resources( + aws_client=session_env1_aws_client, env=session_env1, bucket=ds.restricted.S3BucketName + ) @pytest.fixture(scope='session') @@ -394,7 +404,9 @@ def temp_s3_dataset1(client1, group1, org1, session_env1, session_id, testdata, if ds: delete_s3_dataset(client1, session_env1['environmentUri'], ds) - delete_aws_dataset_resources(aws_client=session_env1_aws_client, env=session_env1, bucket=ds.S3BucketName) + delete_aws_dataset_resources( + aws_client=session_env1_aws_client, env=session_env1, bucket=ds.restricted.S3BucketName + ) """ diff --git a/tests_new/integration_tests/modules/s3_datasets/queries.py b/tests_new/integration_tests/modules/s3_datasets/queries.py index e80ddbf7d..a92ba6828 100644 --- a/tests_new/integration_tests/modules/s3_datasets/queries.py +++ b/tests_new/integration_tests/modules/s3_datasets/queries.py @@ -10,24 +10,16 @@ created updated admins -AwsAccountId -region -S3BucketName -GlueDatabaseName -GlueCrawlerName -GlueCrawlerSchedule -GlueProfilingJobName -GlueProfilingTriggerSchedule -IAMDatasetAdminRoleArn -KmsAlias SamlAdminGroupName -businessOwnerEmail -businessOwnerDelegationEmails -importedS3Bucket -importedGlueDatabase -importedKmsKey -importedAdminRole imported +restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn +} environment { environmentUri label @@ -265,8 +257,6 @@ def start_glue_crawler(client, datasetUri, input): mutation StartGlueCrawler($datasetUri: String, $input: CrawlerInput) {{ startGlueCrawler(datasetUri: $datasetUri, input: $input) {{ Name - AwsAccountId - region status }} }} @@ -299,12 +289,14 @@ def list_s3_datasets_owned_by_env_group(client, environment_uri, group_uri, term nodes {{ datasetUri label - AwsAccountId - region - GlueDatabaseName + restricted {{ + AwsAccountId + region + S3BucketName + GlueDatabaseName + }} SamlAdminGroupName name - S3BucketName created owner stack {{ @@ -360,8 +352,6 @@ def update_folder(client, locationUri, input): mutation updateDatasetStorageLocation($locationUri: String!, $input: ModifyDatasetStorageLocationInput!) {{ updateDatasetStorageLocation(locationUri: $locationUri, input: $input) {{ locationUri - S3Prefix - label }} }} """, @@ -431,25 +421,7 @@ def sync_tables(client, datasetUri): 'variables': {'datasetUri': datasetUri}, 'query': f""" mutation SyncTables($datasetUri: String!) {{ - syncTables(datasetUri: $datasetUri) {{ - count - nodes {{ - tableUri - GlueTableName - GlueDatabaseName - description - name - label - created - S3Prefix - dataset {{ - datasetUri - name - GlueDatabaseName - userRoleForDataset - }} - }} - }} + syncTables(datasetUri: $datasetUri) }} """, } @@ -500,13 +472,15 @@ def get_dataset_table(client, tableUri): created tags tableUri - AwsAccountId - GlueTableName - GlueDatabaseName + restricted {{ + S3Prefix + AwsAccountId + GlueTableName + GlueDatabaseName + }} LastGlueTableStatus label name - S3Prefix }} }} """, @@ -526,7 +500,9 @@ def list_dataset_tables(client, datasetUri): tables {{ count nodes {{ - GlueTableName + restricted {{ + GlueTableName + }} }} }} }} diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py index e1882ac0d..fcdcc2865 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py @@ -159,7 +159,7 @@ def test_update_dataset(client1, dataset_fixture_name, request): test_description = f'a test description {datetime.utcnow().isoformat()}' dataset_uri = dataset.datasetUri updated_dataset = update_dataset( - client1, dataset_uri, {'description': test_description, 'KmsAlias': dataset.KmsAlias} + client1, dataset_uri, {'description': test_description, 'KmsAlias': dataset.restricted.KmsAlias} ) assert_that(updated_dataset).contains_entry(datasetUri=dataset_uri, description=test_description) env = get_dataset(client1, dataset_uri) @@ -175,7 +175,7 @@ def test_update_dataset_unauthorized(client1, client2, dataset_fixture_name, req test_description = f'unauthorized {datetime.utcnow().isoformat()}' dataset_uri = dataset.datasetUri assert_that(update_dataset).raises(GqlError).when_called_with( - client2, dataset_uri, {'description': test_description, 'KmsAlias': dataset.KmsAlias} + client2, dataset_uri, {'description': test_description, 'KmsAlias': dataset.restricted.KmsAlias} ).contains('UnauthorizedOperation', dataset_uri) response = get_dataset(client1, dataset_uri) assert_that(response).contains_entry(datasetUri=dataset_uri).does_not_contain_entry(description=test_description) diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py index 934cac9d4..798303905 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py @@ -117,7 +117,7 @@ def test_delete_table(client1, dataset_fixture_name, request): aws_session_token=creds['sessionToken'], ) GlueClient(dataset_session, dataset.region).create_table( - database_name=dataset.GlueDatabaseName, table_name='todelete', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, table_name='todelete', bucket=dataset.restricted.S3BucketName ) response = sync_tables(client1, datasetUri=dataset.datasetUri) table_uri = [table.tableUri for table in response.get('nodes', []) if table.label == 'todelete'][0] From 9432a4e17745eed76ec85d445de39cf55ef1a050 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Thu, 19 Dec 2024 11:06:38 +0000 Subject: [PATCH 3/5] Integrational Tests fixes (#1744) ### Feature or Bugfix - Bugfix ### Detail - Fixed attributes' names for new 'restricted' section in queries - Don't expect GQL exceptions - `test_get_folder_unauthorized` removed, since we have no access control for this query ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --------- Co-authored-by: Sofia Sazonova --- .../modules/s3_datasets/api/dataset/types.py | 1 + .../modules/s3_datasets/global_conftest.py | 15 ++++-- .../modules/s3_datasets/queries.py | 4 ++ .../modules/s3_datasets/test_s3_dataset.py | 2 +- .../modules/s3_datasets/test_s3_folders.py | 12 ----- .../modules/s3_datasets/test_s3_tables.py | 13 +++-- .../s3_datasets/test_s3_tables_profiling.py | 7 +-- .../shares/s3_datasets_shares/conftest.py | 14 +++--- .../shared_test_functions.py | 13 ++--- .../test_new_crossacc_s3_share.py | 34 ++++++++----- .../test_persistent_crossacc_share.py | 48 +++++++++++-------- .../integration_tests/modules/shares/types.py | 5 -- 12 files changed, 95 insertions(+), 73 deletions(-) diff --git a/backend/dataall/modules/s3_datasets/api/dataset/types.py b/backend/dataall/modules/s3_datasets/api/dataset/types.py index cc2e88139..6bb344b2e 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/types.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/types.py @@ -31,6 +31,7 @@ gql.Field(name='region', type=gql.String), gql.Field(name='S3BucketName', type=gql.String), gql.Field(name='GlueDatabaseName', type=gql.String), + gql.Field(name='GlueCrawlerName', type=gql.String), gql.Field(name='IAMDatasetAdminRoleArn', type=gql.String), gql.Field(name='KmsAlias', type=gql.String), gql.Field(name='importedS3Bucket', type=gql.Boolean), diff --git a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py index ea8341ee5..4f17212c7 100644 --- a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py +++ b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py @@ -17,7 +17,9 @@ sync_tables, create_folder, create_table_data_filter, + list_dataset_tables, ) + from tests_new.integration_tests.modules.datasets_base.queries import list_datasets from integration_tests.aws_clients.s3 import S3Client as S3CommonClient from integration_tests.modules.s3_datasets.aws_clients import S3Client, KMSClient, GlueClient, LakeFormationClient @@ -179,8 +181,8 @@ def create_tables(client, dataset): aws_session_token=creds['sessionToken'], ) file_path = os.path.join(os.path.dirname(__file__), 'sample_data/csv_table/csv_sample.csv') - s3_client = S3Client(dataset_session, dataset.region) - glue_client = GlueClient(dataset_session, dataset.region) + s3_client = S3Client(dataset_session, dataset.restricted.region) + glue_client = GlueClient(dataset_session, dataset.restricted.region) s3_client.upload_file_to_prefix( local_file_path=file_path, s3_path=f'{dataset.restricted.S3BucketName}/integrationtest1' ) @@ -198,8 +200,13 @@ def create_tables(client, dataset): table_name='integrationtest2', bucket=dataset.restricted.S3BucketName, ) - response = sync_tables(client, datasetUri=dataset.datasetUri) - return [table for table in response.get('nodes', []) if table.GlueTableName.startswith('integrationtest')] + sync_tables(client, datasetUri=dataset.datasetUri) + response = list_dataset_tables(client, datasetUri=dataset.datasetUri) + return [ + table + for table in response.tables.get('nodes', []) + if table.restricted.GlueTableName.startswith('integrationtest') + ] def create_folders(client, dataset): diff --git a/tests_new/integration_tests/modules/s3_datasets/queries.py b/tests_new/integration_tests/modules/s3_datasets/queries.py index a92ba6828..abf3245ba 100644 --- a/tests_new/integration_tests/modules/s3_datasets/queries.py +++ b/tests_new/integration_tests/modules/s3_datasets/queries.py @@ -18,6 +18,7 @@ KmsAlias S3BucketName GlueDatabaseName + GlueCrawlerName IAMDatasetAdminRoleArn } environment { @@ -352,6 +353,7 @@ def update_folder(client, locationUri, input): mutation updateDatasetStorageLocation($locationUri: String!, $input: ModifyDatasetStorageLocationInput!) {{ updateDatasetStorageLocation(locationUri: $locationUri, input: $input) {{ locationUri + label }} }} """, @@ -500,6 +502,8 @@ def list_dataset_tables(client, datasetUri): tables {{ count nodes {{ + tableUri + label restricted {{ GlueTableName }} diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py index fcdcc2865..6fb71bbca 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py @@ -223,7 +223,7 @@ def test_start_crawler(client1, dataset_fixture_name, request): dataset = request.getfixturevalue(dataset_fixture_name) dataset_uri = dataset.datasetUri response = start_glue_crawler(client1, datasetUri=dataset_uri, input={}) - assert_that(response.Name).is_equal_to(dataset.GlueCrawlerName) + assert_that(response.Name).is_equal_to(dataset.restricted.GlueCrawlerName) # TODO: check it can run successfully + check sending prefix - We should first implement it in API diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_folders.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_folders.py index 606b236da..7d25becf4 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_folders.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_folders.py @@ -42,18 +42,6 @@ def test_get_folder(client1, folders_fixture_name, request): assert_that(response.label).is_equal_to('labelSessionFolderA') -@pytest.mark.parametrize( - 'folders_fixture_name', - ['session_s3_dataset1_folders'], -) -def test_get_folder_unauthorized(client2, folders_fixture_name, request): - folders = request.getfixturevalue(folders_fixture_name) - folder = folders[0] - assert_that(get_folder).raises(GqlError).when_called_with(client2, locationUri=folder.locationUri).contains( - 'UnauthorizedOperation', 'GET_DATASET_FOLDER', folder.locationUri - ) - - @pytest.mark.parametrize(*FOLDERS_FIXTURES_PARAMS) def test_update_folder(client1, folders_fixture_name, request): folders = request.getfixturevalue(folders_fixture_name) diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py index 798303905..4e0146b83 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py @@ -59,7 +59,11 @@ def test_list_dataset_tables(client1, dataset_fixture_name, request): dataset = request.getfixturevalue(dataset_fixture_name) response = list_dataset_tables(client1, dataset.datasetUri) assert_that(response.tables.count).is_greater_than_or_equal_to(2) - tables = [table for table in response.tables.get('nodes', []) if table.GlueTableName.startswith('integrationtest')] + tables = [ + table + for table in response.tables.get('nodes', []) + if table.restricted.GlueTableName.startswith('integrationtest') + ] assert_that(len(tables)).is_equal_to(2) @@ -116,11 +120,12 @@ def test_delete_table(client1, dataset_fixture_name, request): aws_secret_access_key=creds['SessionKey'], aws_session_token=creds['sessionToken'], ) - GlueClient(dataset_session, dataset.region).create_table( + GlueClient(dataset_session, dataset.restricted.region).create_table( database_name=dataset.restricted.GlueDatabaseName, table_name='todelete', bucket=dataset.restricted.S3BucketName ) - response = sync_tables(client1, datasetUri=dataset.datasetUri) - table_uri = [table.tableUri for table in response.get('nodes', []) if table.label == 'todelete'][0] + sync_tables(client1, datasetUri=dataset.datasetUri) + response = list_dataset_tables(client1, datasetUri=dataset.datasetUri) + table_uri = [table.tableUri for table in response.tables.get('nodes', []) if table.label == 'todelete'][0] response = delete_table(client1, table_uri) assert_that(response).is_true() diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables_profiling.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables_profiling.py index 6a605a58a..260cc0f58 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables_profiling.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables_profiling.py @@ -44,10 +44,11 @@ def test_start_table_profiling(client1, dataset_fixture_name, tables_fixture_nam table = tables[0] dataset_uri = dataset.datasetUri response = start_dataset_profiling_run( - client1, input={'datasetUri': dataset_uri, 'tableUri': table.tableUri, 'GlueTableName': table.GlueTableName} + client1, + input={'datasetUri': dataset_uri, 'tableUri': table.tableUri, 'GlueTableName': table.restricted.GlueTableName}, ) assert_that(response.datasetUri).is_equal_to(dataset_uri) - assert_that(response.GlueTableName).is_equal_to(table.GlueTableName) + assert_that(response.GlueTableName).is_equal_to(table.restricted.GlueTableName) @pytest.mark.parametrize('dataset_fixture_name', ['session_s3_dataset1']) @@ -90,7 +91,7 @@ def test_get_table_profiling_run_by_confidentiality(client2, tables_fixture_name table_uri = tables[0].tableUri if confidentiality in ['Unclassified']: response = get_table_profiling_run(client2, tableUri=table_uri) - assert_that(response.GlueTableName).is_equal_to(tables[0].GlueTableName) + assert_that(response.GlueTableName).is_equal_to(tables[0].restricted.GlueTableName) else: assert_that(get_table_profiling_run).raises(GqlError).when_called_with(client2, table_uri).contains( 'UnauthorizedOperation', 'GET_TABLE_PROFILING_METRICS' diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py index 535480897..60c24f143 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py @@ -194,11 +194,11 @@ def principal1(request, group5, session_consumption_role_1): @pytest.fixture(params=['Group', 'ConsumptionRole']) -def share_params_main(request, session_share_1, session_share_consrole_1, session_s3_dataset1): +def share_params_main(request, session_share_1, session_cross_acc_env_1, session_share_consrole_1, session_s3_dataset1): if request.param == 'Group': - yield session_share_1, session_s3_dataset1 + yield session_share_1, session_s3_dataset1, session_cross_acc_env_1 else: - yield session_share_consrole_1, session_s3_dataset1 + yield session_share_consrole_1, session_s3_dataset1, session_cross_acc_env_1 @pytest.fixture(params=[(False, 'Group'), (True, 'Group'), (False, 'ConsumptionRole'), (True, 'ConsumptionRole')]) @@ -315,8 +315,10 @@ def persistent_role_share_1( @pytest.fixture(params=['Group', 'ConsumptionRole']) -def persistent_share_params_main(request, persistent_role_share_1, persistent_group_share_1): +def persistent_share_params_main( + request, persistent_cross_acc_env_1, persistent_role_share_1, persistent_group_share_1 +): if request.param == 'Group': - yield persistent_group_share_1 + yield persistent_group_share_1, persistent_cross_acc_env_1 else: - yield persistent_role_share_1 + yield persistent_role_share_1, persistent_cross_acc_env_1 diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py index b28a5bdeb..b7e101ca3 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py @@ -124,19 +124,20 @@ def check_bucket_access(client, s3_client, bucket_name, should_have_access): def check_accesspoint_access(client, s3_client, access_point_arn, item_uri, should_have_access): + folder = get_folder(client, item_uri) if should_have_access: - folder = get_folder(client, item_uri) assert_that(s3_client.list_accesspoint_folder_objects(access_point_arn, folder.S3Prefix + '/')).is_not_none() else: - assert_that(get_folder).raises(Exception).when_called_with(client, item_uri).contains( - 'is not authorized to perform: GET_DATASET_FOLDER' - ) + assert_that(s3_client.list_accesspoint_folder_objects).raises(ClientError).when_called_with( + access_point_arn, folder.S3Prefix + '/' + ).contains('AccessDenied') def check_share_items_access( client, group, shareUri, + share_environment, consumption_role, env_client, ): @@ -144,7 +145,7 @@ def check_share_items_access( dataset = share.dataset principal_type = share.principal.principalType if principal_type == 'Group': - credentials_str = get_environment_access_token(client, share.environment.environmentUri, group) + credentials_str = get_environment_access_token(client, share_environment.environmentUri, group) credentials = json.loads(credentials_str) session = boto3.Session( aws_access_key_id=credentials['AccessKey'], @@ -169,7 +170,7 @@ def check_share_items_access( f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{consumption_data.s3AccessPointName}' ) if principal_type == 'Group': - workgroup = athena_client.get_env_work_group(share.environment.label) + workgroup = athena_client.get_env_work_group(share_environment.label) athena_workgroup_output_location = None else: workgroup = 'primary' diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py index e54e04db4..5ba7c402e 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py @@ -137,7 +137,7 @@ def test_reject_share(client1, client5, session_cross_acc_env_1, session_s3_data def test_change_share_purpose(client5, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main change_request_purpose = update_share_request_reason(client5, share.shareUri, 'new purpose') assert_that(change_request_purpose).is_true() updated_share = get_share_object(client5, share.shareUri) @@ -153,19 +153,19 @@ def test_submit_object(client5, share_params_all): @pytest.mark.dependency(name='share_approved', depends=['share_submitted']) def test_approve_share(client1, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main check_approve_share_object(client1, share.shareUri) @pytest.mark.dependency(name='share_succeeded', depends=['share_approved']) def test_share_succeeded(client1, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main check_share_succeeded(client1, share.shareUri, check_contains_all_item_types=True) @pytest.mark.dependency(name='share_verified', depends=['share_succeeded']) def test_verify_share_items(client1, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main check_verify_share_items(client1, share.shareUri) @@ -173,9 +173,14 @@ def test_verify_share_items(client1, share_params_main): def test_check_item_access( client5, session_cross_acc_env_1_aws_client, share_params_main, group5, session_consumption_role_1 ): - share, dataset = share_params_main + share, dataset, share_environment = share_params_main check_share_items_access( - client5, group5, share.shareUri, session_consumption_role_1, session_cross_acc_env_1_aws_client + client5, + group5, + share.shareUri, + share_environment, + session_consumption_role_1, + session_cross_acc_env_1_aws_client, ) @@ -183,7 +188,7 @@ def test_check_item_access( def test_unhealthy_items( client5, session_cross_acc_env_1_aws_client, session_cross_acc_env_1_integration_role_arn, share_params_main ): - share, _ = share_params_main + share, _, _ = share_params_main iam = session_cross_acc_env_1_aws_client.resource('iam') principal_role = iam.Role(share.principal.principalRoleName) # break s3 by removing policies @@ -209,7 +214,7 @@ def test_unhealthy_items( @pytest.mark.dependency(depends=['share_approved']) def test_reapply_unauthoried(client5, share_params_main): - share, _ = share_params_main + share, _, _ = share_params_main share_uri = share.shareUri share_object = get_share_object(client5, share_uri) item_uris = [item.shareItemUri for item in share_object['items'].nodes] @@ -220,7 +225,7 @@ def test_reapply_unauthoried(client5, share_params_main): @pytest.mark.dependency(depends=['share_approved']) def test_reapply(client1, share_params_main): - share, _ = share_params_main + share, _, _ = share_params_main share_uri = share.shareUri share_object = get_share_object(client1, share_uri) item_uris = [item.shareItemUri for item in share_object['items'].nodes] @@ -233,7 +238,7 @@ def test_reapply(client1, share_params_main): @pytest.mark.dependency(name='share_revoked', depends=['share_succeeded']) def test_revoke_share(client1, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main check_share_ready(client1, share.shareUri) revoke_and_check_all_shared_items(client1, share.shareUri, check_contains_all_item_types=True) @@ -242,8 +247,13 @@ def test_revoke_share(client1, share_params_main): def test_revoke_succeeded( client1, client5, session_cross_acc_env_1_aws_client, share_params_main, group5, session_consumption_role_1 ): - share, dataset = share_params_main + share, dataset, share_environment = share_params_main check_all_items_revoke_job_succeeded(client1, share.shareUri, check_contains_all_item_types=True) check_share_items_access( - client5, group5, share.shareUri, session_consumption_role_1, session_cross_acc_env_1_aws_client + client5, + group5, + share.shareUri, + share_environment, + session_consumption_role_1, + session_cross_acc_env_1_aws_client, ) diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py index a7567db82..ad5a09f4f 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py @@ -25,26 +25,28 @@ def test_verify_share_items(client5, persistent_share_params_main): - check_verify_share_items(client5, persistent_share_params_main.shareUri) + share, _ = persistent_share_params_main + check_verify_share_items(client5, share.shareUri) def test_check_share_items_access( client5, group5, persistent_share_params_main, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client ): + share, env = persistent_share_params_main check_share_items_access( client5, group5, - persistent_share_params_main.shareUri, + share.shareUri, + env, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client, ) def test_revoke_share(client1, persistent_share_params_main): - check_share_ready(client1, persistent_share_params_main.shareUri) - revoke_and_check_all_shared_items( - client1, persistent_share_params_main.shareUri, check_contains_all_item_types=True - ) + share, _ = persistent_share_params_main + check_share_ready(client1, share.shareUri) + revoke_and_check_all_shared_items(client1, share.shareUri, check_contains_all_item_types=True) def test_revoke_succeeded( @@ -55,45 +57,51 @@ def test_revoke_succeeded( persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client, ): - check_all_items_revoke_job_succeeded( - client1, persistent_share_params_main.shareUri, check_contains_all_item_types=True - ) + share, env = persistent_share_params_main + check_all_items_revoke_job_succeeded(client1, share.shareUri, check_contains_all_item_types=True) check_share_items_access( client5, group5, - persistent_share_params_main.shareUri, + share.shareUri, + env, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client, ) def test_delete_all_nonshared_items(client5, persistent_share_params_main): - check_share_ready(client5, persistent_share_params_main.shareUri) - delete_all_non_shared_items(client5, persistent_share_params_main.shareUri) + share, _ = persistent_share_params_main + check_share_ready(client5, share.shareUri) + delete_all_non_shared_items(client5, share.shareUri) def test_add_items_back_to_share(client5, persistent_share_params_main): - check_share_ready(client5, persistent_share_params_main.shareUri) - add_all_items_to_share(client5, persistent_share_params_main.shareUri) + share, _ = persistent_share_params_main + check_share_ready(client5, share.shareUri) + add_all_items_to_share(client5, share.shareUri) def test_submit_share(client5, persistent_share_params_main, persistent_s3_dataset1): - check_submit_share_object(client5, persistent_share_params_main.shareUri, persistent_s3_dataset1) + share, _ = persistent_share_params_main + check_submit_share_object(client5, share.shareUri, persistent_s3_dataset1) def test_approve_share(client1, persistent_share_params_main): - check_approve_share_object(client1, persistent_share_params_main.shareUri) + share, _ = persistent_share_params_main + check_approve_share_object(client1, share.shareUri) def test_re_share_succeeded( client5, persistent_share_params_main, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client ): - check_share_succeeded(client5, persistent_share_params_main.shareUri, check_contains_all_item_types=True) - check_verify_share_items(client5, persistent_share_params_main.shareUri) + share, env = persistent_share_params_main + check_share_succeeded(client5, share.shareUri, check_contains_all_item_types=True) + check_verify_share_items(client5, share.shareUri) check_share_items_access( client5, - persistent_share_params_main.group, - persistent_share_params_main.shareUri, + share.group, + share.shareUri, + env, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client, ) diff --git a/tests_new/integration_tests/modules/shares/types.py b/tests_new/integration_tests/modules/shares/types.py index 844e8a3b7..099a4aad0 100644 --- a/tests_new/integration_tests/modules/shares/types.py +++ b/tests_new/integration_tests/modules/shares/types.py @@ -69,11 +69,6 @@ items(filter: $filter){{ {SharedItemSearchResult} }}, -environment{{ - environmentUri - label - region -}} canViewLogs, userRoleForShareObject, """ From 959e2a088519a11d1368310c17c1b95e3604088e Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Thu, 19 Dec 2024 09:51:28 -0500 Subject: [PATCH 4/5] Bump deps and fix snyk workflow (#1745) ### Feature or Bugfix - Bugfix ### Detail - Bump deps for `nanoid` and `path-to-regexp` npm packages - remove `with:` arg in Snyk Workflow for weekly runs ### Relates - https://github.com/data-dot-all/dataall/pull/1743 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .github/workflows/snyk.yaml | 2 -- frontend/package-lock.json | 14 ++++++++------ frontend/package.json | 7 ++++--- frontend/yarn.lock | 16 ++++++++-------- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/.github/workflows/snyk.yaml b/.github/workflows/snyk.yaml index 41986f7df..6193beedd 100644 --- a/.github/workflows/snyk.yaml +++ b/.github/workflows/snyk.yaml @@ -28,5 +28,3 @@ jobs: run: snyk test --all-projects --detection-depth=5 --severity-threshold=high env: SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} - with: - args: --all-projects --detection-depth=5 --severity-threshold=high diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 96a830cfc..35ced25d0 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -39,6 +39,7 @@ "graphql-tag": "^2.12.6", "json5": "^2.2.2", "jwt-decode": "^3.1.2", + "nanoid": "^3.3.8", "notistack": "^2.0.3", "nprogress": "^0.2.0", "nth-check": "^2.0.1", @@ -26836,15 +26837,16 @@ "integrity": "sha512-wynEP02LmIbLpcYw8uBKpcfF6dmg2vcpKqxeH5UcoKEYdExslsdUA4ugFauuaeYdTB76ez6gJW8XAZ6CgkXYxA==" }, "node_modules/nanoid": { - "version": "3.3.7", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", - "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==", + "version": "3.3.8", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz", + "integrity": "sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==", "funding": [ { "type": "github", "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "bin": { "nanoid": "bin/nanoid.cjs" }, @@ -27600,9 +27602,9 @@ } }, "node_modules/path-to-regexp": { - "version": "0.1.10", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz", - "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==", + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz", + "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==", "license": "MIT" }, "node_modules/path-type": { diff --git a/frontend/package.json b/frontend/package.json index 36777d00d..ef210e710 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -71,7 +71,8 @@ "uuid": "^10.0.0", "web-vitals": "^2.1.4", "yup": "^0.32.11", - "webpack": "^5.94.0" + "webpack": "^5.94.0", + "nanoid": "^3.3.8" }, "overrides": { "aws-amplify": { @@ -97,7 +98,7 @@ "express": "4.20.0", "ejs": "3.1.10", "fast-xml-parser": "4.4.1", - "path-to-regexp": "0.1.10", + "path-to-regexp": "0.1.12", "body-parser": "^1.20.3", "send": "0.19.0", "rollup": "3.29.5", @@ -119,7 +120,7 @@ "ejs": "3.1.10", "ws": "^8.17.1", "fast-xml-parser": "4.4.1", - "path-to-regexp": "0.1.10", + "path-to-regexp": "0.1.12", "body-parser": "^1.20.3", "send": "0.19.0", "rollup": "3.29.5", diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 48954a56d..4de9415d5 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -11238,10 +11238,10 @@ nanoclone@^0.2.1: resolved "https://registry.npmjs.org/nanoclone/-/nanoclone-0.2.1.tgz" integrity sha512-wynEP02LmIbLpcYw8uBKpcfF6dmg2vcpKqxeH5UcoKEYdExslsdUA4ugFauuaeYdTB76ez6gJW8XAZ6CgkXYxA== -nanoid@^3.3.6: - version "3.3.7" - resolved "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz" - integrity sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g== +nanoid@^3.3.6, nanoid@^3.3.8: + version "3.3.8" + resolved "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz" + integrity sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w== natural-compare-lite@^1.4.0: version "1.4.0" @@ -11739,10 +11739,10 @@ path-scurry@^1.11.1: lru-cache "^10.2.0" minipass "^5.0.0 || ^6.0.2 || ^7.0.0" -path-to-regexp@0.1.10: - version "0.1.10" - resolved "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz" - integrity sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w== +path-to-regexp@0.1.12: + version "0.1.12" + resolved "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz" + integrity sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ== path-type@^4.0.0: version "4.0.0" From 59ce9ae0ce4c3398085a99e12b5e2eaad688c482 Mon Sep 17 00:00:00 2001 From: Sofia Sazonova Date: Mon, 23 Dec 2024 08:35:12 +0000 Subject: [PATCH 5/5] CICD Integration tests: new shares for pre-existing datasets (#1611) ### Feature or Bugfix - Feature ### Detail For group share and consumption role shares perform - [x] Create Share request - [x] Submit request without Items (not allowed) - [x] Add items to request - [x] Submit request - [x] Change request purpose - [x] Reject request - [x] Change reject purpose - [x] Approve request - [x] Request succeeded - [x] Item health verification - [x] Folder sharing validation (cal list objects via s3 accesspoint) - [x] Bucket sharing validation (cal list objects in s3 bucket ) - [x] Table sharing validation (cal perform athena query: select * from db.table ) - [x] Revoke items - [x] Folder sharing validation (no more access) - [x] Bucket sharing validation (no more access) - [x] Table sharing validation (no more access) - [x] Delete share Tests are the same as for new shares for new datasets Fixtures are parametrised ### Relates - #1376 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --------- Co-authored-by: Sofia Sazonova --- .../core/environment/cdk/environment_stack.py | 2 +- .../integration_tests/aws_clients/athena.py | 18 +- .../integration_tests/aws_clients/iam.py | 2 +- tests_new/integration_tests/conftest.py | 18 ++ .../core/environment/global_conftest.py | 31 ++- .../shares/s3_datasets_shares/conftest.py | 253 ++++++++++++++++-- .../shared_test_functions.py | 2 +- .../test_new_crossacc_s3_share.py | 159 +++++------ .../test_persistent_crossacc_share.py | 2 +- 9 files changed, 367 insertions(+), 120 deletions(-) diff --git a/backend/dataall/core/environment/cdk/environment_stack.py b/backend/dataall/core/environment/cdk/environment_stack.py index df6c8cc17..85694cda1 100644 --- a/backend/dataall/core/environment/cdk/environment_stack.py +++ b/backend/dataall/core/environment/cdk/environment_stack.py @@ -697,7 +697,7 @@ def create_integration_tests_role(self): ], effect=iam.Effect.ALLOW, resources=[ - f'arn:aws:iam::{self.account}:role/dataall-test-*', + f'arn:aws:iam::{self.account}:role/dataall-test*', f'arn:aws:iam::{self.account}:role/dataall-session*', ], ), diff --git a/tests_new/integration_tests/aws_clients/athena.py b/tests_new/integration_tests/aws_clients/athena.py index 5adaf1cd5..58040a855 100644 --- a/tests_new/integration_tests/aws_clients/athena.py +++ b/tests_new/integration_tests/aws_clients/athena.py @@ -29,9 +29,19 @@ def list_work_groups(self): result = self._client.list_work_groups() return [x['Name'] for x in result['WorkGroups']] - def get_env_work_group(self, env_name): + def get_work_group(self, env_name, group): workgroups = self.list_work_groups() + group_snakify = group.lower().replace('-', '_') + env_name_snakify = env_name.lower().replace('-', '_') + default_workgroup = 'primary' + env_workgroup, group_workgroup = None, None for workgroup in workgroups: - if env_name in workgroup: - return workgroup - return workgroups[0] if workgroups else None + if env_name_snakify in workgroup: + env_workgroup = workgroup + if group_snakify in workgroup: + group_workgroup = workgroup + if group_workgroup: + return group_workgroup + elif env_workgroup: + return env_workgroup + return default_workgroup diff --git a/tests_new/integration_tests/aws_clients/iam.py b/tests_new/integration_tests/aws_clients/iam.py index d85ae06b1..98536ee76 100644 --- a/tests_new/integration_tests/aws_clients/iam.py +++ b/tests_new/integration_tests/aws_clients/iam.py @@ -66,7 +66,7 @@ def put_consumption_role_policy(self, role_name): "Version": "2012-10-17", "Statement": [ { - "Sid": "VisualEditor0", + "Sid": "TestPolicyDoc", "Effect": "Allow", "Action": [ "s3:*", diff --git a/tests_new/integration_tests/conftest.py b/tests_new/integration_tests/conftest.py index 23e8f76a3..b231e98a3 100644 --- a/tests_new/integration_tests/conftest.py +++ b/tests_new/integration_tests/conftest.py @@ -108,6 +108,12 @@ def user5(userdata): yield userdata['testUser5'] +@pytest.fixture(scope='session', autouse=True) +def user6(userdata): + # Existing user with name and password + yield userdata['testUser6'] + + @pytest.fixture(scope='session', autouse=True) def group1(): # Existing Cognito group with name testGroup1 @@ -143,6 +149,13 @@ def group5(): yield 'testGroup5' +@pytest.fixture(scope='session', autouse=True) +def group6(): + # Existing Cognito group with name testGroup5 + # Add user5 + yield 'testGroup6' + + @pytest.fixture(scope='session') def client1(user1) -> Client: yield Client(user1.username, user1.password) @@ -168,6 +181,11 @@ def client5(user5) -> Client: yield Client(user5.username, user5.password) +@pytest.fixture(scope='session') +def client6(user6) -> Client: + yield Client(user6.username, user6.password) + + @pytest.fixture(scope='session') def clientTenant(userTenant) -> Client: yield Client(userTenant.username, userTenant.password) diff --git a/tests_new/integration_tests/core/environment/global_conftest.py b/tests_new/integration_tests/core/environment/global_conftest.py index 47f58b8e5..173645ba1 100644 --- a/tests_new/integration_tests/core/environment/global_conftest.py +++ b/tests_new/integration_tests/core/environment/global_conftest.py @@ -1,7 +1,7 @@ +import pytest import logging from contextlib import contextmanager -import pytest from assertpy import assert_that from integration_tests.aws_clients.sts import STSClient @@ -13,7 +13,11 @@ list_environments, invite_group_on_env, ) -from integration_tests.core.organizations.queries import create_organization +from integration_tests.core.organizations.queries import ( + create_organization, + list_organizations, + invite_team_to_organization, +) from integration_tests.core.stack.utils import check_stack_ready from tests_new.integration_tests.aws_clients.s3 import S3Client from tests_new.integration_tests.core.environment.utils import update_env_stack @@ -192,8 +196,29 @@ def persistent_env1(client1, group1, testdata): @pytest.fixture(scope='session') -def persistent_cross_acc_env_1(client5, group5, testdata): +def persistent_cross_acc_env_1(client5, group5, client6, group6, testdata): with get_or_create_persistent_env('persistent_cross_acc_env_1', client5, group5, testdata) as env: + orgs = [org.organizationUri for org in list_organizations(client6).nodes] + envs = [org.environmentUri for org in list_environments(client6).nodes] + if env.organization.organizationUri not in orgs: + invite_team_to_organization(client5, env.organization.organizationUri, group6) + if env.environmentUri not in envs: + invite_group_on_env( + client5, + env.environmentUri, + group6, + [ + 'UPDATE_ENVIRONMENT', + 'GET_ENVIRONMENT', + 'ADD_ENVIRONMENT_CONSUMPTION_ROLES', + 'LIST_ENVIRONMENT_CONSUMPTION_ROLES', + 'LIST_ENVIRONMENT_GROUPS', + 'CREDENTIALS_ENVIRONMENT', + 'CREATE_SHARE_OBJECT', + 'LIST_ENVIRONMENT_SHARED_WITH_OBJECTS', + ], + ) + update_env_stack(client5, env) yield env diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py index 60c24f143..69aae09fe 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py @@ -18,6 +18,7 @@ from tests_new.integration_tests.modules.shares.utils import check_share_ready test_session_cons_role_name = 'dataall-test-ShareTestConsumptionRole' +test_session_cons_role_name_2 = 'dataall-test-ShareTestConsumptionRole2' test_persistent_cons_role_name = 'dataall-test-PersistentConsumptionRole' @@ -77,6 +78,22 @@ def session_consumption_role_1(client5, group5, session_cross_acc_env_1, session iam_client.delete_consumption_role(test_session_cons_role_name) +@pytest.fixture(scope='session') +def session_consumption_role_2(client6, group6, persistent_cross_acc_env_1, persistent_cross_acc_env_1_aws_client): + consumption_role = create_consumption_role( + client6, + group6, + persistent_cross_acc_env_1, + persistent_cross_acc_env_1_aws_client, + test_session_cons_role_name_2, + 'SessionConsRole2', + ) + yield consumption_role + remove_consumption_role(client6, persistent_cross_acc_env_1.environmentUri, consumption_role.consumptionRoleUri) + iam_client = IAMClient(session=persistent_cross_acc_env_1_aws_client, region=persistent_cross_acc_env_1['region']) + iam_client.delete_consumption_role(test_session_cons_role_name_2) + + @pytest.fixture(scope='session') def session_share_1( client5, @@ -130,6 +147,31 @@ def session_share_2( clean_up_share(client5, share2.shareUri) +@pytest.fixture(scope='session') +def session_share_3( + client6, + client1, + persistent_env1, + persistent_cross_acc_env_1, + persistent_s3_dataset1, + group6, +): + share3 = create_share_object( + client=client6, + dataset_or_item_params={'datasetUri': persistent_s3_dataset1.datasetUri}, + environmentUri=persistent_cross_acc_env_1.environmentUri, + groupUri=group6, + principalId=group6, + principalType='Group', + requestPurpose='test create share object', + attachMissingPolicies=True, + permissions=['Read'], + ) + share3 = get_share_object(client6, share3.shareUri) + yield share3 + clean_up_share(client6, share3.shareUri) + + @pytest.fixture(scope='session') def session_share_consrole_1( client5, @@ -185,43 +227,208 @@ def session_share_consrole_2( clean_up_share(client5, share2cr.shareUri) -@pytest.fixture(params=['Group', 'ConsumptionRole']) -def principal1(request, group5, session_consumption_role_1): - if request.param == 'Group': - yield group5, request.param - else: - yield session_consumption_role_1.consumptionRoleUri, request.param +@pytest.fixture(scope='session') +def session_share_consrole_3( + client6, + client1, + persistent_env1, + persistent_cross_acc_env_1, + persistent_s3_dataset1, + group6, + session_consumption_role_2, +): + share3cr = create_share_object( + client=client6, + dataset_or_item_params={'datasetUri': persistent_s3_dataset1.datasetUri}, + environmentUri=persistent_cross_acc_env_1.environmentUri, + groupUri=group6, + principalId=session_consumption_role_2.consumptionRoleUri, + principalType='ConsumptionRole', + requestPurpose='test create share object', + attachMissingPolicies=True, + permissions=['Read'], + ) + share3cr = get_share_object(client6, share3cr.shareUri) + yield share3cr + clean_up_share(client6, share3cr.shareUri) -@pytest.fixture(params=['Group', 'ConsumptionRole']) -def share_params_main(request, session_share_1, session_cross_acc_env_1, session_share_consrole_1, session_s3_dataset1): - if request.param == 'Group': - yield session_share_1, session_s3_dataset1, session_cross_acc_env_1 - else: - yield session_share_consrole_1, session_s3_dataset1, session_cross_acc_env_1 +@pytest.fixture( + params=[ + ('session_dataset', 'Group'), + ('session_persistent_dataset', 'Group'), + ('session_dataset', 'ConsumptionRole'), + ('session_persistent_dataset', 'ConsumptionRole'), + ] +) +def new_share_param( + request, + group5, + group6, + client5, + client6, + session_consumption_role_1, + session_consumption_role_2, + session_s3_dataset1, + persistent_s3_dataset1, + session_cross_acc_env_1, + persistent_cross_acc_env_1, +): # return: client, group, dataset, env, principal_id, principal_type + share_type, principal_type = request.param + if principal_type == 'Group': + if share_type == 'session_dataset': + yield client5, group5, session_s3_dataset1, session_cross_acc_env_1, group5, principal_type + if share_type == 'session_persistent_dataset': + yield ( + client6, + group6, + persistent_s3_dataset1, + persistent_cross_acc_env_1, + group6, + principal_type, + ) + else: + if share_type == 'session_dataset': + yield ( + client5, + group5, + session_s3_dataset1, + session_cross_acc_env_1, + session_consumption_role_1.consumptionRoleUri, + principal_type, + ) + if share_type == 'session_persistent_dataset': + yield ( + client6, + group6, + persistent_s3_dataset1, + persistent_cross_acc_env_1, + session_consumption_role_2.consumptionRoleUri, + principal_type, + ) + + +@pytest.fixture( + params=[ + ('session_dataset', 'Group'), + ('session_persistent_dataset', 'Group'), + ('session_dataset', 'ConsumptionRole'), + ('session_persistent_dataset', 'ConsumptionRole'), + ] +) +def share_params_main( + request, + group5, + group6, + client5, + client6, + session_share_1, + session_share_consrole_1, + session_share_3, + session_share_consrole_3, + session_s3_dataset1, + persistent_s3_dataset1, + session_cross_acc_env_1_aws_client, + persistent_cross_acc_env_1_aws_client, + persistent_cross_acc_env_1, + session_cross_acc_env_1_integration_role_arn, + persistent_cross_acc_env_1_integration_role_arn, + session_cross_acc_env_1, + session_consumption_role_2, + session_consumption_role_1, +): # return: client, group, env_client, role, share, dataset + share_type, principal_type = request.param + if principal_type == 'Group': + if share_type == 'session_dataset': + yield ( + client5, + group5, + session_cross_acc_env_1, + session_cross_acc_env_1_aws_client, + session_consumption_role_1, + session_share_1, + session_s3_dataset1, + session_cross_acc_env_1_integration_role_arn, + ) + if share_type == 'session_persistent_dataset': + yield ( + client6, + group6, + persistent_cross_acc_env_1, + persistent_cross_acc_env_1_aws_client, + session_consumption_role_2, + session_share_3, + persistent_s3_dataset1, + persistent_cross_acc_env_1_integration_role_arn, + ) -@pytest.fixture(params=[(False, 'Group'), (True, 'Group'), (False, 'ConsumptionRole'), (True, 'ConsumptionRole')]) + else: + if share_type == 'session_dataset': + yield ( + client5, + group5, + session_cross_acc_env_1, + session_cross_acc_env_1_aws_client, + session_consumption_role_1, + session_share_consrole_1, + session_s3_dataset1, + session_cross_acc_env_1_integration_role_arn, + ) + if share_type == 'session_persistent_dataset': + yield ( + client6, + group6, + persistent_cross_acc_env_1, + persistent_cross_acc_env_1_aws_client, + session_consumption_role_2, + session_share_consrole_3, + persistent_s3_dataset1, + persistent_cross_acc_env_1_integration_role_arn, + ) + + +@pytest.fixture( + params=[ + (False, 'Group', 'session_dataset'), + (False, 'Group', 'session_persistent_dataset'), + (True, 'Group', None), + (False, 'ConsumptionRole', 'session_dataset'), + (False, 'ConsumptionRole', 'session_persistent_dataset'), + (True, 'ConsumptionRole', None), + ] +) def share_params_all( request, + client5, + client6, session_share_1, session_share_consrole_1, + session_share_3, + session_share_consrole_3, session_s3_dataset1, session_share_2, session_share_consrole_2, session_imported_sse_s3_dataset1, -): - autoapproval, principal_type = request.param + persistent_s3_dataset1, +): # return client, share, dataset + autoapproval, principal_type, share_type = request.param if autoapproval: if principal_type == 'Group': - yield session_share_2, session_imported_sse_s3_dataset1 + yield client5, session_share_2, session_imported_sse_s3_dataset1 else: - yield session_share_consrole_2, session_imported_sse_s3_dataset1 + yield client5, session_share_consrole_2, session_imported_sse_s3_dataset1 else: if principal_type == 'Group': - yield session_share_1, session_s3_dataset1 + if share_type == 'session_dataset': + yield client5, session_share_1, session_s3_dataset1 + if share_type == 'session_persistent_dataset': + yield client6, session_share_3, persistent_s3_dataset1 else: - yield session_share_consrole_1, session_s3_dataset1 + if share_type == 'session_dataset': + yield client5, session_share_consrole_1, session_s3_dataset1 + if share_type == 'session_persistent_dataset': + yield client6, session_share_consrole_3, persistent_s3_dataset1 # --------------PERSISTENT FIXTURES---------------------------- @@ -255,12 +462,12 @@ def persistent_group_share_1( client1, persistent_env1, persistent_cross_acc_env_1, - updated_persistent_s3_dataset1, + persistent_s3_dataset1, group5, ): share1 = create_share_object( client=client5, - dataset_or_item_params={'datasetUri': updated_persistent_s3_dataset1.datasetUri}, + dataset_or_item_params={'datasetUri': persistent_s3_dataset1.datasetUri}, environmentUri=persistent_cross_acc_env_1.environmentUri, groupUri=group5, principalId=group5, @@ -287,13 +494,13 @@ def persistent_role_share_1( client1, persistent_env1, persistent_cross_acc_env_1, - updated_persistent_s3_dataset1, + persistent_s3_dataset1, group5, persistent_consumption_role_1, ): share1 = create_share_object( client=client5, - dataset_or_item_params={'datasetUri': updated_persistent_s3_dataset1.datasetUri}, + dataset_or_item_params={'datasetUri': persistent_s3_dataset1.datasetUri}, environmentUri=persistent_cross_acc_env_1.environmentUri, groupUri=group5, principalId=persistent_consumption_role_1.consumptionRoleUri, diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py index b7e101ca3..035d339d6 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py @@ -170,7 +170,7 @@ def check_share_items_access( f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{consumption_data.s3AccessPointName}' ) if principal_type == 'Group': - workgroup = athena_client.get_env_work_group(share_environment.label) + workgroup = athena_client.get_work_group(share_environment.label, group) athena_workgroup_output_location = None else: workgroup = 'primary' diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py index 5ba7c402e..11d87859e 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py @@ -31,13 +31,13 @@ ) -def test_create_and_delete_share_object(client5, session_cross_acc_env_1, session_s3_dataset1, principal1, group5): - principal_id, principal_type = principal1 +def test_create_and_delete_share_object(new_share_param): + client, group, dataset, env, principal_id, principal_type = new_share_param share = create_share_object( - client=client5, - dataset_or_item_params={'datasetUri': session_s3_dataset1.datasetUri}, - environmentUri=session_cross_acc_env_1.environmentUri, - groupUri=group5, + client=client, + dataset_or_item_params={'datasetUri': dataset.datasetUri}, + environmentUri=env.environmentUri, + groupUri=group, principalId=principal_id, principalType=principal_type, requestPurpose='test create share object', @@ -45,84 +45,84 @@ def test_create_and_delete_share_object(client5, session_cross_acc_env_1, sessio permissions=['Read'], ) assert_that(share.status).is_equal_to('Draft') - delete_share_object(client5, share.shareUri) + delete_share_object(client, share.shareUri) -def test_submit_empty_object(client5, session_cross_acc_env_1, session_s3_dataset1, group5, principal1): +def test_submit_empty_object(new_share_param): # here Exception is not recognized as GqlError, so we use base class # toDo: back to GqlError - principal_id, principal_type = principal1 + client, group, dataset, env, principal_id, principal_type = new_share_param share = create_share_object( - client=client5, - dataset_or_item_params={'datasetUri': session_s3_dataset1.datasetUri}, - environmentUri=session_cross_acc_env_1.environmentUri, - groupUri=group5, + client=client, + dataset_or_item_params={'datasetUri': dataset.datasetUri}, + environmentUri=env.environmentUri, + groupUri=group, principalId=principal_id, principalType=principal_type, requestPurpose='test create share object', attachMissingPolicies=True, permissions=['Read'], ) - assert_that(submit_share_object).raises(Exception).when_called_with(client5, share.shareUri).contains( + assert_that(submit_share_object).raises(Exception).when_called_with(client, share.shareUri).contains( 'ShareItemsFound', 'The request is empty' ) - clean_up_share(client5, share.shareUri) + clean_up_share(client, share.shareUri) -def test_add_share_items(client5, session_cross_acc_env_1, session_s3_dataset1, group5, principal1): - principal_id, principal_type = principal1 +def test_add_share_items(new_share_param): + client, group, dataset, env, principal_id, principal_type = new_share_param share = create_share_object( - client=client5, - dataset_or_item_params={'datasetUri': session_s3_dataset1.datasetUri}, - environmentUri=session_cross_acc_env_1.environmentUri, - groupUri=group5, + client=client, + dataset_or_item_params={'datasetUri': dataset.datasetUri}, + environmentUri=env.environmentUri, + groupUri=group, principalId=principal_id, principalType=principal_type, requestPurpose='test create share object', attachMissingPolicies=True, permissions=['Read'], ) - share = get_share_object(client5, share.shareUri) + share = get_share_object(client, share.shareUri) items = share['items'].nodes assert_that(len(items)).is_greater_than(0) assert_that(items[0].status).is_none() item_to_add = items[0] - share_item_uri = add_share_item(client5, share.shareUri, item_to_add.itemUri, item_to_add.itemType) + share_item_uri = add_share_item(client, share.shareUri, item_to_add.itemUri, item_to_add.itemType) assert_that(share_item_uri).is_not_none() - updated_share = get_share_object(client5, share.shareUri, {'isShared': True}) + updated_share = get_share_object(client, share.shareUri, {'isShared': True}) items = updated_share['items'].nodes assert_that(items).is_length(1) assert_that(items[0].shareItemUri).is_equal_to(share_item_uri) assert_that(items[0].status).is_equal_to('PendingApproval') - clean_up_share(client5, share.shareUri) + clean_up_share(client, share.shareUri) -def test_reject_share(client1, client5, session_cross_acc_env_1, session_s3_dataset1, group5, principal1): - principal_id, principal_type = principal1 +def test_reject_share(client1, new_share_param): + client, group, dataset, env, principal_id, principal_type = new_share_param share = create_share_object( - client=client5, - dataset_or_item_params={'datasetUri': session_s3_dataset1.datasetUri}, - environmentUri=session_cross_acc_env_1.environmentUri, - groupUri=group5, + client=client, + dataset_or_item_params={'datasetUri': dataset.datasetUri}, + environmentUri=env.environmentUri, + groupUri=group, principalId=principal_id, principalType=principal_type, requestPurpose='test create share object', attachMissingPolicies=True, permissions=['Read'], ) - share = get_share_object(client5, share.shareUri) + share = get_share_object(client, share.shareUri) items = share['items'].nodes assert_that(len(items)).is_greater_than(0) assert_that(items[0].status).is_none() item_to_add = items[0] - add_share_item(client5, share.shareUri, item_to_add.itemUri, item_to_add.itemType) - submit_share_object(client5, share.shareUri) + add_share_item(client, share.shareUri, item_to_add.itemUri, item_to_add.itemType) + submit_share_object(client, share.shareUri) reject_share_object(client1, share.shareUri) updated_share = get_share_object(client1, share.shareUri) @@ -130,79 +130,68 @@ def test_reject_share(client1, client5, session_cross_acc_env_1, session_s3_data change_request_purpose = update_share_reject_reason(client1, share.shareUri, 'new purpose') assert_that(change_request_purpose).is_true() - updated_share = get_share_object(client5, share.shareUri) + updated_share = get_share_object(client, share.shareUri) assert_that(updated_share.rejectPurpose).is_equal_to('new purpose') - clean_up_share(client5, share.shareUri) + clean_up_share(client, share.shareUri) -def test_change_share_purpose(client5, share_params_main): - share, dataset, _ = share_params_main - change_request_purpose = update_share_request_reason(client5, share.shareUri, 'new purpose') +def test_change_share_purpose(share_params_main): + client, _, _, _, _, share, _, _ = share_params_main + change_request_purpose = update_share_request_reason(client, share.shareUri, 'new purpose') assert_that(change_request_purpose).is_true() - updated_share = get_share_object(client5, share.shareUri) + updated_share = get_share_object(client, share.shareUri) assert_that(updated_share.requestPurpose).is_equal_to('new purpose') @pytest.mark.dependency(name='share_submitted') -def test_submit_object(client5, share_params_all): - share, dataset = share_params_all - add_all_items_to_share(client5, share.shareUri) - check_submit_share_object(client5, share.shareUri, dataset) +def test_submit_object(share_params_all): + client, share, dataset = share_params_all + add_all_items_to_share(client, share.shareUri) + check_submit_share_object(client, share.shareUri, dataset) @pytest.mark.dependency(name='share_approved', depends=['share_submitted']) def test_approve_share(client1, share_params_main): - share, dataset, _ = share_params_main + client, _, _, _, _, share, _, _ = share_params_main check_approve_share_object(client1, share.shareUri) @pytest.mark.dependency(name='share_succeeded', depends=['share_approved']) def test_share_succeeded(client1, share_params_main): - share, dataset, _ = share_params_main + client, _, _, _, _, share, _, _ = share_params_main check_share_succeeded(client1, share.shareUri, check_contains_all_item_types=True) @pytest.mark.dependency(name='share_verified', depends=['share_succeeded']) def test_verify_share_items(client1, share_params_main): - share, dataset, _ = share_params_main + client, _, _, _, _, share, _, _ = share_params_main check_verify_share_items(client1, share.shareUri) @pytest.mark.dependency(depends=['share_verified']) -def test_check_item_access( - client5, session_cross_acc_env_1_aws_client, share_params_main, group5, session_consumption_role_1 -): - share, dataset, share_environment = share_params_main - check_share_items_access( - client5, - group5, - share.shareUri, - share_environment, - session_consumption_role_1, - session_cross_acc_env_1_aws_client, - ) +def test_check_item_access(share_params_main): + client, group, env, env_client, role, share, _, _ = share_params_main + check_share_items_access(client, group, share.shareUri, env, role, env_client) @pytest.mark.dependency(name='unhealthy_items', depends=['share_verified']) -def test_unhealthy_items( - client5, session_cross_acc_env_1_aws_client, session_cross_acc_env_1_integration_role_arn, share_params_main -): - share, _, _ = share_params_main - iam = session_cross_acc_env_1_aws_client.resource('iam') +def test_unhealthy_items(share_params_main): + client, group, env, env_client, role, share, _, integration_role_arn = share_params_main + iam = env_client.resource('iam') principal_role = iam.Role(share.principal.principalRoleName) # break s3 by removing policies for policy in principal_role.attached_policies.all(): if '/dataall-env-' in policy.arn and 'share-policy' in policy.arn: principal_role.detach_policy(PolicyArn=policy.arn) # break lf by removing DESCRIBE perms from principal - lf_client = LakeFormationClient(session_cross_acc_env_1_aws_client, session_cross_acc_env_1_aws_client.region_name) - lf_client.add_role_to_datalake_admin(session_cross_acc_env_1_integration_role_arn) + lf_client = LakeFormationClient(env_client, env_client.region_name) + lf_client.add_role_to_datalake_admin(integration_role_arn) db_name = f'dataall_{share.dataset.datasetName}_{share.dataset.datasetUri}_shared'.replace('-', '_') lf_client.revoke_db_perms(principal_role.arn, db_name, ['DESCRIBE']) # verify all items are `Unhealthy` check_verify_share_items( - client5, + client, share.shareUri, expected_health_status=['Unhealthy'], expected_health_msg=[ @@ -213,19 +202,19 @@ def test_unhealthy_items( @pytest.mark.dependency(depends=['share_approved']) -def test_reapply_unauthoried(client5, share_params_main): - share, _, _ = share_params_main +def test_reapply_unauthoried(share_params_main): + client, _, _, _, _, share, _, _ = share_params_main share_uri = share.shareUri - share_object = get_share_object(client5, share_uri) + share_object = get_share_object(client, share_uri) item_uris = [item.shareItemUri for item in share_object['items'].nodes] - assert_that(reapply_items_share_object).raises(GqlError).when_called_with(client5, share_uri, item_uris).contains( + assert_that(reapply_items_share_object).raises(GqlError).when_called_with(client, share_uri, item_uris).contains( 'UnauthorizedOperation' ) @pytest.mark.dependency(depends=['share_approved']) def test_reapply(client1, share_params_main): - share, _, _ = share_params_main + _, _, _, _, _, share, _, _ = share_params_main share_uri = share.shareUri share_object = get_share_object(client1, share_uri) item_uris = [item.shareItemUri for item in share_object['items'].nodes] @@ -237,23 +226,21 @@ def test_reapply(client1, share_params_main): @pytest.mark.dependency(name='share_revoked', depends=['share_succeeded']) -def test_revoke_share(client1, share_params_main): - share, dataset, _ = share_params_main - check_share_ready(client1, share.shareUri) - revoke_and_check_all_shared_items(client1, share.shareUri, check_contains_all_item_types=True) +def test_revoke_share(share_params_main): + client, _, _, _, _, share, _, _ = share_params_main + check_share_ready(client, share.shareUri) + revoke_and_check_all_shared_items(client, share.shareUri, check_contains_all_item_types=True) @pytest.mark.dependency(name='share_revoke_succeeded', depends=['share_revoked']) -def test_revoke_succeeded( - client1, client5, session_cross_acc_env_1_aws_client, share_params_main, group5, session_consumption_role_1 -): - share, dataset, share_environment = share_params_main - check_all_items_revoke_job_succeeded(client1, share.shareUri, check_contains_all_item_types=True) +def test_revoke_succeeded(client1, share_params_main): + client, group, env, env_client, role, share, dataset, _ = share_params_main + check_all_items_revoke_job_succeeded(client, share.shareUri, check_contains_all_item_types=True) check_share_items_access( - client5, - group5, + client, + group, share.shareUri, - share_environment, - session_consumption_role_1, - session_cross_acc_env_1_aws_client, + env, + role, + env_client, ) diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py index ad5a09f4f..1e2e7683d 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py @@ -99,7 +99,7 @@ def test_re_share_succeeded( check_verify_share_items(client5, share.shareUri) check_share_items_access( client5, - share.group, + share.principal.SamlGroupName, share.shareUri, env, persistent_consumption_role_1,