diff --git a/backend/dataall/modules/s3_datasets/api/dataset/types.py b/backend/dataall/modules/s3_datasets/api/dataset/types.py index cc2e88139..6bb344b2e 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/types.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/types.py @@ -31,6 +31,7 @@ gql.Field(name='region', type=gql.String), gql.Field(name='S3BucketName', type=gql.String), gql.Field(name='GlueDatabaseName', type=gql.String), + gql.Field(name='GlueCrawlerName', type=gql.String), gql.Field(name='IAMDatasetAdminRoleArn', type=gql.String), gql.Field(name='KmsAlias', type=gql.String), gql.Field(name='importedS3Bucket', type=gql.Boolean), diff --git a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py index ea8341ee5..4f17212c7 100644 --- a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py +++ b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py @@ -17,7 +17,9 @@ sync_tables, create_folder, create_table_data_filter, + list_dataset_tables, ) + from tests_new.integration_tests.modules.datasets_base.queries import list_datasets from integration_tests.aws_clients.s3 import S3Client as S3CommonClient from integration_tests.modules.s3_datasets.aws_clients import S3Client, KMSClient, GlueClient, LakeFormationClient @@ -179,8 +181,8 @@ def create_tables(client, dataset): aws_session_token=creds['sessionToken'], ) file_path = os.path.join(os.path.dirname(__file__), 'sample_data/csv_table/csv_sample.csv') - s3_client = S3Client(dataset_session, dataset.region) - glue_client = GlueClient(dataset_session, dataset.region) + s3_client = S3Client(dataset_session, dataset.restricted.region) + glue_client = GlueClient(dataset_session, dataset.restricted.region) s3_client.upload_file_to_prefix( local_file_path=file_path, s3_path=f'{dataset.restricted.S3BucketName}/integrationtest1' ) @@ -198,8 +200,13 @@ def create_tables(client, dataset): table_name='integrationtest2', bucket=dataset.restricted.S3BucketName, ) - response = sync_tables(client, datasetUri=dataset.datasetUri) - return [table for table in response.get('nodes', []) if table.GlueTableName.startswith('integrationtest')] + sync_tables(client, datasetUri=dataset.datasetUri) + response = list_dataset_tables(client, datasetUri=dataset.datasetUri) + return [ + table + for table in response.tables.get('nodes', []) + if table.restricted.GlueTableName.startswith('integrationtest') + ] def create_folders(client, dataset): diff --git a/tests_new/integration_tests/modules/s3_datasets/queries.py b/tests_new/integration_tests/modules/s3_datasets/queries.py index a92ba6828..abf3245ba 100644 --- a/tests_new/integration_tests/modules/s3_datasets/queries.py +++ b/tests_new/integration_tests/modules/s3_datasets/queries.py @@ -18,6 +18,7 @@ KmsAlias S3BucketName GlueDatabaseName + GlueCrawlerName IAMDatasetAdminRoleArn } environment { @@ -352,6 +353,7 @@ def update_folder(client, locationUri, input): mutation updateDatasetStorageLocation($locationUri: String!, $input: ModifyDatasetStorageLocationInput!) {{ updateDatasetStorageLocation(locationUri: $locationUri, input: $input) {{ locationUri + label }} }} """, @@ -500,6 +502,8 @@ def list_dataset_tables(client, datasetUri): tables {{ count nodes {{ + tableUri + label restricted {{ GlueTableName }} diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py index fcdcc2865..6fb71bbca 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py @@ -223,7 +223,7 @@ def test_start_crawler(client1, dataset_fixture_name, request): dataset = request.getfixturevalue(dataset_fixture_name) dataset_uri = dataset.datasetUri response = start_glue_crawler(client1, datasetUri=dataset_uri, input={}) - assert_that(response.Name).is_equal_to(dataset.GlueCrawlerName) + assert_that(response.Name).is_equal_to(dataset.restricted.GlueCrawlerName) # TODO: check it can run successfully + check sending prefix - We should first implement it in API diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_folders.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_folders.py index 606b236da..7d25becf4 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_folders.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_folders.py @@ -42,18 +42,6 @@ def test_get_folder(client1, folders_fixture_name, request): assert_that(response.label).is_equal_to('labelSessionFolderA') -@pytest.mark.parametrize( - 'folders_fixture_name', - ['session_s3_dataset1_folders'], -) -def test_get_folder_unauthorized(client2, folders_fixture_name, request): - folders = request.getfixturevalue(folders_fixture_name) - folder = folders[0] - assert_that(get_folder).raises(GqlError).when_called_with(client2, locationUri=folder.locationUri).contains( - 'UnauthorizedOperation', 'GET_DATASET_FOLDER', folder.locationUri - ) - - @pytest.mark.parametrize(*FOLDERS_FIXTURES_PARAMS) def test_update_folder(client1, folders_fixture_name, request): folders = request.getfixturevalue(folders_fixture_name) diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py index 798303905..4e0146b83 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py @@ -59,7 +59,11 @@ def test_list_dataset_tables(client1, dataset_fixture_name, request): dataset = request.getfixturevalue(dataset_fixture_name) response = list_dataset_tables(client1, dataset.datasetUri) assert_that(response.tables.count).is_greater_than_or_equal_to(2) - tables = [table for table in response.tables.get('nodes', []) if table.GlueTableName.startswith('integrationtest')] + tables = [ + table + for table in response.tables.get('nodes', []) + if table.restricted.GlueTableName.startswith('integrationtest') + ] assert_that(len(tables)).is_equal_to(2) @@ -116,11 +120,12 @@ def test_delete_table(client1, dataset_fixture_name, request): aws_secret_access_key=creds['SessionKey'], aws_session_token=creds['sessionToken'], ) - GlueClient(dataset_session, dataset.region).create_table( + GlueClient(dataset_session, dataset.restricted.region).create_table( database_name=dataset.restricted.GlueDatabaseName, table_name='todelete', bucket=dataset.restricted.S3BucketName ) - response = sync_tables(client1, datasetUri=dataset.datasetUri) - table_uri = [table.tableUri for table in response.get('nodes', []) if table.label == 'todelete'][0] + sync_tables(client1, datasetUri=dataset.datasetUri) + response = list_dataset_tables(client1, datasetUri=dataset.datasetUri) + table_uri = [table.tableUri for table in response.tables.get('nodes', []) if table.label == 'todelete'][0] response = delete_table(client1, table_uri) assert_that(response).is_true() diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables_profiling.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables_profiling.py index 6a605a58a..260cc0f58 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables_profiling.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables_profiling.py @@ -44,10 +44,11 @@ def test_start_table_profiling(client1, dataset_fixture_name, tables_fixture_nam table = tables[0] dataset_uri = dataset.datasetUri response = start_dataset_profiling_run( - client1, input={'datasetUri': dataset_uri, 'tableUri': table.tableUri, 'GlueTableName': table.GlueTableName} + client1, + input={'datasetUri': dataset_uri, 'tableUri': table.tableUri, 'GlueTableName': table.restricted.GlueTableName}, ) assert_that(response.datasetUri).is_equal_to(dataset_uri) - assert_that(response.GlueTableName).is_equal_to(table.GlueTableName) + assert_that(response.GlueTableName).is_equal_to(table.restricted.GlueTableName) @pytest.mark.parametrize('dataset_fixture_name', ['session_s3_dataset1']) @@ -90,7 +91,7 @@ def test_get_table_profiling_run_by_confidentiality(client2, tables_fixture_name table_uri = tables[0].tableUri if confidentiality in ['Unclassified']: response = get_table_profiling_run(client2, tableUri=table_uri) - assert_that(response.GlueTableName).is_equal_to(tables[0].GlueTableName) + assert_that(response.GlueTableName).is_equal_to(tables[0].restricted.GlueTableName) else: assert_that(get_table_profiling_run).raises(GqlError).when_called_with(client2, table_uri).contains( 'UnauthorizedOperation', 'GET_TABLE_PROFILING_METRICS' diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py index 535480897..60c24f143 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/conftest.py @@ -194,11 +194,11 @@ def principal1(request, group5, session_consumption_role_1): @pytest.fixture(params=['Group', 'ConsumptionRole']) -def share_params_main(request, session_share_1, session_share_consrole_1, session_s3_dataset1): +def share_params_main(request, session_share_1, session_cross_acc_env_1, session_share_consrole_1, session_s3_dataset1): if request.param == 'Group': - yield session_share_1, session_s3_dataset1 + yield session_share_1, session_s3_dataset1, session_cross_acc_env_1 else: - yield session_share_consrole_1, session_s3_dataset1 + yield session_share_consrole_1, session_s3_dataset1, session_cross_acc_env_1 @pytest.fixture(params=[(False, 'Group'), (True, 'Group'), (False, 'ConsumptionRole'), (True, 'ConsumptionRole')]) @@ -315,8 +315,10 @@ def persistent_role_share_1( @pytest.fixture(params=['Group', 'ConsumptionRole']) -def persistent_share_params_main(request, persistent_role_share_1, persistent_group_share_1): +def persistent_share_params_main( + request, persistent_cross_acc_env_1, persistent_role_share_1, persistent_group_share_1 +): if request.param == 'Group': - yield persistent_group_share_1 + yield persistent_group_share_1, persistent_cross_acc_env_1 else: - yield persistent_role_share_1 + yield persistent_role_share_1, persistent_cross_acc_env_1 diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py index b28a5bdeb..b7e101ca3 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/shared_test_functions.py @@ -124,19 +124,20 @@ def check_bucket_access(client, s3_client, bucket_name, should_have_access): def check_accesspoint_access(client, s3_client, access_point_arn, item_uri, should_have_access): + folder = get_folder(client, item_uri) if should_have_access: - folder = get_folder(client, item_uri) assert_that(s3_client.list_accesspoint_folder_objects(access_point_arn, folder.S3Prefix + '/')).is_not_none() else: - assert_that(get_folder).raises(Exception).when_called_with(client, item_uri).contains( - 'is not authorized to perform: GET_DATASET_FOLDER' - ) + assert_that(s3_client.list_accesspoint_folder_objects).raises(ClientError).when_called_with( + access_point_arn, folder.S3Prefix + '/' + ).contains('AccessDenied') def check_share_items_access( client, group, shareUri, + share_environment, consumption_role, env_client, ): @@ -144,7 +145,7 @@ def check_share_items_access( dataset = share.dataset principal_type = share.principal.principalType if principal_type == 'Group': - credentials_str = get_environment_access_token(client, share.environment.environmentUri, group) + credentials_str = get_environment_access_token(client, share_environment.environmentUri, group) credentials = json.loads(credentials_str) session = boto3.Session( aws_access_key_id=credentials['AccessKey'], @@ -169,7 +170,7 @@ def check_share_items_access( f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{consumption_data.s3AccessPointName}' ) if principal_type == 'Group': - workgroup = athena_client.get_env_work_group(share.environment.label) + workgroup = athena_client.get_env_work_group(share_environment.label) athena_workgroup_output_location = None else: workgroup = 'primary' diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py index e54e04db4..5ba7c402e 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_new_crossacc_s3_share.py @@ -137,7 +137,7 @@ def test_reject_share(client1, client5, session_cross_acc_env_1, session_s3_data def test_change_share_purpose(client5, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main change_request_purpose = update_share_request_reason(client5, share.shareUri, 'new purpose') assert_that(change_request_purpose).is_true() updated_share = get_share_object(client5, share.shareUri) @@ -153,19 +153,19 @@ def test_submit_object(client5, share_params_all): @pytest.mark.dependency(name='share_approved', depends=['share_submitted']) def test_approve_share(client1, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main check_approve_share_object(client1, share.shareUri) @pytest.mark.dependency(name='share_succeeded', depends=['share_approved']) def test_share_succeeded(client1, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main check_share_succeeded(client1, share.shareUri, check_contains_all_item_types=True) @pytest.mark.dependency(name='share_verified', depends=['share_succeeded']) def test_verify_share_items(client1, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main check_verify_share_items(client1, share.shareUri) @@ -173,9 +173,14 @@ def test_verify_share_items(client1, share_params_main): def test_check_item_access( client5, session_cross_acc_env_1_aws_client, share_params_main, group5, session_consumption_role_1 ): - share, dataset = share_params_main + share, dataset, share_environment = share_params_main check_share_items_access( - client5, group5, share.shareUri, session_consumption_role_1, session_cross_acc_env_1_aws_client + client5, + group5, + share.shareUri, + share_environment, + session_consumption_role_1, + session_cross_acc_env_1_aws_client, ) @@ -183,7 +188,7 @@ def test_check_item_access( def test_unhealthy_items( client5, session_cross_acc_env_1_aws_client, session_cross_acc_env_1_integration_role_arn, share_params_main ): - share, _ = share_params_main + share, _, _ = share_params_main iam = session_cross_acc_env_1_aws_client.resource('iam') principal_role = iam.Role(share.principal.principalRoleName) # break s3 by removing policies @@ -209,7 +214,7 @@ def test_unhealthy_items( @pytest.mark.dependency(depends=['share_approved']) def test_reapply_unauthoried(client5, share_params_main): - share, _ = share_params_main + share, _, _ = share_params_main share_uri = share.shareUri share_object = get_share_object(client5, share_uri) item_uris = [item.shareItemUri for item in share_object['items'].nodes] @@ -220,7 +225,7 @@ def test_reapply_unauthoried(client5, share_params_main): @pytest.mark.dependency(depends=['share_approved']) def test_reapply(client1, share_params_main): - share, _ = share_params_main + share, _, _ = share_params_main share_uri = share.shareUri share_object = get_share_object(client1, share_uri) item_uris = [item.shareItemUri for item in share_object['items'].nodes] @@ -233,7 +238,7 @@ def test_reapply(client1, share_params_main): @pytest.mark.dependency(name='share_revoked', depends=['share_succeeded']) def test_revoke_share(client1, share_params_main): - share, dataset = share_params_main + share, dataset, _ = share_params_main check_share_ready(client1, share.shareUri) revoke_and_check_all_shared_items(client1, share.shareUri, check_contains_all_item_types=True) @@ -242,8 +247,13 @@ def test_revoke_share(client1, share_params_main): def test_revoke_succeeded( client1, client5, session_cross_acc_env_1_aws_client, share_params_main, group5, session_consumption_role_1 ): - share, dataset = share_params_main + share, dataset, share_environment = share_params_main check_all_items_revoke_job_succeeded(client1, share.shareUri, check_contains_all_item_types=True) check_share_items_access( - client5, group5, share.shareUri, session_consumption_role_1, session_cross_acc_env_1_aws_client + client5, + group5, + share.shareUri, + share_environment, + session_consumption_role_1, + session_cross_acc_env_1_aws_client, ) diff --git a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py index a7567db82..ad5a09f4f 100644 --- a/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py +++ b/tests_new/integration_tests/modules/shares/s3_datasets_shares/test_persistent_crossacc_share.py @@ -25,26 +25,28 @@ def test_verify_share_items(client5, persistent_share_params_main): - check_verify_share_items(client5, persistent_share_params_main.shareUri) + share, _ = persistent_share_params_main + check_verify_share_items(client5, share.shareUri) def test_check_share_items_access( client5, group5, persistent_share_params_main, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client ): + share, env = persistent_share_params_main check_share_items_access( client5, group5, - persistent_share_params_main.shareUri, + share.shareUri, + env, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client, ) def test_revoke_share(client1, persistent_share_params_main): - check_share_ready(client1, persistent_share_params_main.shareUri) - revoke_and_check_all_shared_items( - client1, persistent_share_params_main.shareUri, check_contains_all_item_types=True - ) + share, _ = persistent_share_params_main + check_share_ready(client1, share.shareUri) + revoke_and_check_all_shared_items(client1, share.shareUri, check_contains_all_item_types=True) def test_revoke_succeeded( @@ -55,45 +57,51 @@ def test_revoke_succeeded( persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client, ): - check_all_items_revoke_job_succeeded( - client1, persistent_share_params_main.shareUri, check_contains_all_item_types=True - ) + share, env = persistent_share_params_main + check_all_items_revoke_job_succeeded(client1, share.shareUri, check_contains_all_item_types=True) check_share_items_access( client5, group5, - persistent_share_params_main.shareUri, + share.shareUri, + env, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client, ) def test_delete_all_nonshared_items(client5, persistent_share_params_main): - check_share_ready(client5, persistent_share_params_main.shareUri) - delete_all_non_shared_items(client5, persistent_share_params_main.shareUri) + share, _ = persistent_share_params_main + check_share_ready(client5, share.shareUri) + delete_all_non_shared_items(client5, share.shareUri) def test_add_items_back_to_share(client5, persistent_share_params_main): - check_share_ready(client5, persistent_share_params_main.shareUri) - add_all_items_to_share(client5, persistent_share_params_main.shareUri) + share, _ = persistent_share_params_main + check_share_ready(client5, share.shareUri) + add_all_items_to_share(client5, share.shareUri) def test_submit_share(client5, persistent_share_params_main, persistent_s3_dataset1): - check_submit_share_object(client5, persistent_share_params_main.shareUri, persistent_s3_dataset1) + share, _ = persistent_share_params_main + check_submit_share_object(client5, share.shareUri, persistent_s3_dataset1) def test_approve_share(client1, persistent_share_params_main): - check_approve_share_object(client1, persistent_share_params_main.shareUri) + share, _ = persistent_share_params_main + check_approve_share_object(client1, share.shareUri) def test_re_share_succeeded( client5, persistent_share_params_main, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client ): - check_share_succeeded(client5, persistent_share_params_main.shareUri, check_contains_all_item_types=True) - check_verify_share_items(client5, persistent_share_params_main.shareUri) + share, env = persistent_share_params_main + check_share_succeeded(client5, share.shareUri, check_contains_all_item_types=True) + check_verify_share_items(client5, share.shareUri) check_share_items_access( client5, - persistent_share_params_main.group, - persistent_share_params_main.shareUri, + share.group, + share.shareUri, + env, persistent_consumption_role_1, persistent_cross_acc_env_1_aws_client, ) diff --git a/tests_new/integration_tests/modules/shares/types.py b/tests_new/integration_tests/modules/shares/types.py index 844e8a3b7..099a4aad0 100644 --- a/tests_new/integration_tests/modules/shares/types.py +++ b/tests_new/integration_tests/modules/shares/types.py @@ -69,11 +69,6 @@ items(filter: $filter){{ {SharedItemSearchResult} }}, -environment{{ - environmentUri - label - region -}} canViewLogs, userRoleForShareObject, """