diff --git a/backend/dataall/modules/s3_datasets/api/dataset/types.py b/backend/dataall/modules/s3_datasets/api/dataset/types.py index eee06304b..cc2e88139 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/types.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/types.py @@ -131,8 +131,6 @@ name='GlueCrawler', fields=[ gql.Field(name='Name', type=gql.ID), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='region', type=gql.String), gql.Field(name='status', type=gql.String), ], ) diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index 811ba9d28..356d5608a 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -430,8 +430,6 @@ def start_crawler(uri: str, data: dict = None): return { 'Name': dataset.GlueCrawlerName, - 'AwsAccountId': dataset.AwsAccountId, - 'region': dataset.region, 'status': crawler.get('LastCrawl', {}).get('Status', 'N/A'), } diff --git a/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js b/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js index 2c110baa5..feb59d959 100644 --- a/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js +++ b/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js @@ -9,8 +9,6 @@ export const startGlueCrawler = ({ datasetUri, input }) => ({ mutation StartGlueCrawler($datasetUri: String, $input: CrawlerInput) { startGlueCrawler(datasetUri: $datasetUri, input: $input) { Name - AwsAccountId - region status } } diff --git a/tests/modules/s3_datasets/conftest.py b/tests/modules/s3_datasets/conftest.py index 99a4dcdcf..8295b4be2 100644 --- a/tests/modules/s3_datasets/conftest.py +++ b/tests/modules/s3_datasets/conftest.py @@ -76,19 +76,20 @@ def factory( datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName - GlueCrawlerName enableExpiration expirySetting expiryMinDuration expiryMaxDuration + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } tables{ nodes{ tableUri @@ -180,11 +181,11 @@ def factory(dataset: S3Dataset, name, username) -> DatasetTable: label=name, owner=username, datasetUri=dataset.datasetUri, - GlueDatabaseName=dataset.GlueDatabaseName, + GlueDatabaseName=dataset.restricted.GlueDatabaseName, GlueTableName=name, - region=dataset.region, - AWSAccountId=dataset.AwsAccountId, - S3BucketName=dataset.S3BucketName, + region=dataset.restricted.region, + AWSAccountId=dataset.restricted.AwsAccountId, + S3BucketName=dataset.restricted.S3BucketName, S3Prefix=f'{name}', ) session.add(table) @@ -325,9 +326,9 @@ def factory(dataset: S3Dataset, name, username) -> DatasetStorageLocation: label=name, owner=username, datasetUri=dataset.datasetUri, - S3BucketName=dataset.S3BucketName, - region=dataset.region, - AWSAccountId=dataset.AwsAccountId, + S3BucketName=dataset.restricted.S3BucketName, + region=dataset.restricted.region, + AWSAccountId=dataset.restricted.AwsAccountId, S3Prefix=f'{name}', ) session.add(ds_location) diff --git a/tests/modules/s3_datasets/test_dataset.py b/tests/modules/s3_datasets/test_dataset.py index 69b1de0f6..c3801edfa 100644 --- a/tests/modules/s3_datasets/test_dataset.py +++ b/tests/modules/s3_datasets/test_dataset.py @@ -69,13 +69,15 @@ def test_get_dataset(client, dataset1, env_fixture, group): query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId description - region - imported - importedS3Bucket stewards owners + imported + restricted { + AwsAccountId + region + importedS3Bucket + } } } """, @@ -83,11 +85,11 @@ def test_get_dataset(client, dataset1, env_fixture, group): username='alice', groups=[group.name], ) - assert response.data.getDataset.AwsAccountId == env_fixture.AwsAccountId - assert response.data.getDataset.region == env_fixture.region + assert response.data.getDataset.restricted.AwsAccountId == env_fixture.AwsAccountId + assert response.data.getDataset.restricted.region == env_fixture.region assert response.data.getDataset.label == 'dataset1' assert response.data.getDataset.imported is False - assert response.data.getDataset.importedS3Bucket is False + assert response.data.getDataset.restricted.importedS3Bucket is False def test_list_datasets(client, dataset1, group): @@ -194,8 +196,6 @@ def test_start_crawler(org_fixture, env_fixture, dataset1, client, group, module mutation StartGlueCrawler($datasetUri:String, $input:CrawlerInput){ startGlueCrawler(datasetUri:$datasetUri,input:$input){ Name - AwsAccountId - region status } } @@ -209,7 +209,7 @@ def test_start_crawler(org_fixture, env_fixture, dataset1, client, group, module 'prefix': 'raw', }, ) - assert response.data.startGlueCrawler.Name == dataset1.GlueCrawlerName + assert response.data.Name == dataset1.restricted.GlueCrawlerName def test_update_dataset_unauthorized(dataset1, client, group): @@ -309,9 +309,11 @@ def test_list_dataset_tables(client, dataset1, group): tableUri name label - GlueDatabaseName - GlueTableName - S3Prefix + restricted{ + GlueDatabaseName + GlueTableName + S3Prefix + } } } } @@ -391,9 +393,11 @@ def test_delete_dataset(client, dataset, env_fixture, org_fixture, db, module_mo query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId + restricted { + AwsAccountId + region + } description - region } } """, @@ -428,17 +432,15 @@ def test_import_dataset(org_fixture, env_fixture, dataset1, client, group): mutation importDataset($input:ImportDatasetInput){ importDataset(input:$input){ label - AwsAccountId - region imported - importedS3Bucket - importedGlueDatabase - importedKmsKey - importedAdminRole - S3BucketName - GlueDatabaseName - IAMDatasetAdminRoleArn - KmsAlias + restricted { + AwsAccountId + region + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + KmsAlias + } } } """, @@ -457,17 +459,13 @@ def test_import_dataset(org_fixture, env_fixture, dataset1, client, group): }, ) assert response.data.importDataset.label == 'datasetImported' - assert response.data.importDataset.AwsAccountId == env_fixture.AwsAccountId - assert response.data.importDataset.region == env_fixture.region + assert response.data.importDataset.restricted.AwsAccountId == env_fixture.AwsAccountId + assert response.data.importDataset.restricted.region == env_fixture.region assert response.data.importDataset.imported is True - assert response.data.importDataset.importedS3Bucket is True - assert response.data.importDataset.importedGlueDatabase is True - assert response.data.importDataset.importedKmsKey is True - assert response.data.importDataset.importedAdminRole is True - assert response.data.importDataset.S3BucketName == 'dhimportedbucket' - assert response.data.importDataset.GlueDatabaseName == 'dhimportedGlueDB' - assert response.data.importDataset.KmsAlias == '1234-YYEY' - assert 'dhimportedRole' in response.data.importDataset.IAMDatasetAdminRoleArn + assert response.data.importDataset.restricted.S3BucketName == 'dhimportedbucket' + assert response.data.importDataset.restricted.GlueDatabaseName == 'dhimportedGlueDB' + assert response.data.importDataset.restricted.KmsAlias == '1234-YYEY' + assert 'dhimportedRole' in response.data.importDataset.restricted.IAMDatasetAdminRoleArn def test_get_dataset_by_prefix(db, env_fixture, org_fixture): @@ -512,13 +510,15 @@ def test_stewardship(client, dataset, env_fixture, org_fixture, db, group2, grou datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName stewards diff --git a/tests/modules/s3_datasets/test_dataset_glossary.py b/tests/modules/s3_datasets/test_dataset_glossary.py index 543d20f91..5a25d1b34 100644 --- a/tests/modules/s3_datasets/test_dataset_glossary.py +++ b/tests/modules/s3_datasets/test_dataset_glossary.py @@ -14,12 +14,12 @@ def _columns(db, dataset_fixture, table_fixture) -> List[DatasetTableColumn]: datasetUri=dataset_fixture.datasetUri, tableUri=table_fixture.tableUri, label=f'c{i+1}', - AWSAccountId=dataset_fixture.AwsAccountId, - region=dataset_fixture.region, + AWSAccountId=dataset_fixture.restricted.AwsAccountId, + region=dataset_fixture.restricted.region, GlueTableName='table', typeName='String', owner='user', - GlueDatabaseName=dataset_fixture.GlueDatabaseName, + GlueDatabaseName=dataset_fixture.restricted.GlueDatabaseName, ) session.add(c) cols.append(c) diff --git a/tests/modules/s3_datasets/test_dataset_location.py b/tests/modules/s3_datasets/test_dataset_location.py index 3d388e641..d74f863da 100644 --- a/tests/modules/s3_datasets/test_dataset_location.py +++ b/tests/modules/s3_datasets/test_dataset_location.py @@ -51,11 +51,11 @@ def test_manage_dataset_location(client, dataset1, user, group): query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId description - region - imported - importedS3Bucket + restricted { + AwsAccountId + region + } locations{ nodes{ locationUri diff --git a/tests/modules/s3_datasets/test_dataset_table.py b/tests/modules/s3_datasets/test_dataset_table.py index e20af5b86..1736c4958 100644 --- a/tests/modules/s3_datasets/test_dataset_table.py +++ b/tests/modules/s3_datasets/test_dataset_table.py @@ -81,9 +81,11 @@ def test_list_dataset_tables(client, dataset_fixture): tableUri name label - GlueDatabaseName - GlueTableName - S3Prefix + restricted { + GlueDatabaseName + GlueTableName + S3Prefix + } } } } diff --git a/tests/modules/s3_datasets_shares/conftest.py b/tests/modules/s3_datasets_shares/conftest.py index 8fc050487..7a5ac19ba 100644 --- a/tests/modules/s3_datasets_shares/conftest.py +++ b/tests/modules/s3_datasets_shares/conftest.py @@ -83,15 +83,16 @@ def factory( datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName - GlueCrawlerName + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } tables{ nodes{ tableUri @@ -253,7 +254,12 @@ def dataset_confidential_fixture(env_fixture, org_fixture, dataset, group) -> S3 @pytest.fixture(scope='module') def table_fixture(db, dataset_fixture, table, group, user): - table1 = table(dataset=dataset_fixture, name='table1', username=user.username) + dataset = dataset_fixture + dataset.GlueDatabaseName = dataset_fixture.restricted.GlueDatabaseName + dataset.region = dataset_fixture.restricted.region + dataset.S3BucketName = dataset_fixture.restricted.S3BucketName + dataset.AwsAccountId = dataset_fixture.restricted.AwsAccountId + table1 = table(dataset=dataset, name='table1', username=user.username) with db.scoped_session() as session: ResourcePolicyService.attach_resource_policy( diff --git a/tests_new/integration_tests/modules/catalog/conftest.py b/tests_new/integration_tests/modules/catalog/conftest.py index fa93399fe..3476afad8 100644 --- a/tests_new/integration_tests/modules/catalog/conftest.py +++ b/tests_new/integration_tests/modules/catalog/conftest.py @@ -85,7 +85,7 @@ def dataset_association1(client1, group1, glossary1, glossary_term1, session_s3_ datasetUri=session_s3_dataset1.datasetUri, input={ 'terms': [glossary_term1.nodeUri], - 'KmsAlias': session_s3_dataset1.KmsAlias, + 'KmsAlias': session_s3_dataset1.restricted.KmsAlias, }, ) response = list_glossary_associations(client1, node_uri=glossary1.nodeUri) @@ -100,7 +100,7 @@ def dataset_association1(client1, group1, glossary1, glossary_term1, session_s3_ datasetUri=session_s3_dataset1.datasetUri, input={ 'terms': [], - 'KmsAlias': session_s3_dataset1.KmsAlias, + 'KmsAlias': session_s3_dataset1.restricted.KmsAlias, }, ) diff --git a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py index e05861e73..4d339959e 100644 --- a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py +++ b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py @@ -181,14 +181,14 @@ def create_tables(client, dataset): file_path = os.path.join(os.path.dirname(__file__), 'sample_data/csv_table/csv_sample.csv') s3_client = S3Client(dataset_session, dataset.region) glue_client = GlueClient(dataset_session, dataset.region) - s3_client.upload_file_to_prefix(local_file_path=file_path, s3_path=f'{dataset.S3BucketName}/integrationtest1') + s3_client.upload_file_to_prefix(local_file_path=file_path, s3_path=f'{dataset.restricted.S3BucketName}/integrationtest1') glue_client.create_table( - database_name=dataset.GlueDatabaseName, table_name='integrationtest1', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, table_name='integrationtest1', bucket=dataset.restricted.S3BucketName ) - s3_client.upload_file_to_prefix(local_file_path=file_path, s3_path=f'{dataset.S3BucketName}/integrationtest2') + s3_client.upload_file_to_prefix(local_file_path=file_path, s3_path=f'{dataset.restricted.S3BucketName}/integrationtest2') glue_client.create_table( - database_name=dataset.GlueDatabaseName, table_name='integrationtest2', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, table_name='integrationtest2', bucket=dataset.restricted.S3BucketName ) response = sync_tables(client, datasetUri=dataset.datasetUri) return [table for table in response.get('nodes', []) if table.GlueTableName.startswith('integrationtest')] @@ -238,7 +238,7 @@ def session_s3_dataset1(client1, group1, org1, session_env1, session_id, testdat finally: if ds: delete_s3_dataset(client1, session_env1['environmentUri'], ds) - delete_aws_dataset_resources(aws_client=session_env1_aws_client, env=session_env1, bucket=ds.S3BucketName) + delete_aws_dataset_resources(aws_client=session_env1_aws_client, env=session_env1, bucket=ds.restricted.S3BucketName) @pytest.fixture(scope='session') @@ -394,7 +394,7 @@ def temp_s3_dataset1(client1, group1, org1, session_env1, session_id, testdata, if ds: delete_s3_dataset(client1, session_env1['environmentUri'], ds) - delete_aws_dataset_resources(aws_client=session_env1_aws_client, env=session_env1, bucket=ds.S3BucketName) + delete_aws_dataset_resources(aws_client=session_env1_aws_client, env=session_env1, bucket=ds.restricted.S3BucketName) """ diff --git a/tests_new/integration_tests/modules/s3_datasets/queries.py b/tests_new/integration_tests/modules/s3_datasets/queries.py index e80ddbf7d..a92ba6828 100644 --- a/tests_new/integration_tests/modules/s3_datasets/queries.py +++ b/tests_new/integration_tests/modules/s3_datasets/queries.py @@ -10,24 +10,16 @@ created updated admins -AwsAccountId -region -S3BucketName -GlueDatabaseName -GlueCrawlerName -GlueCrawlerSchedule -GlueProfilingJobName -GlueProfilingTriggerSchedule -IAMDatasetAdminRoleArn -KmsAlias SamlAdminGroupName -businessOwnerEmail -businessOwnerDelegationEmails -importedS3Bucket -importedGlueDatabase -importedKmsKey -importedAdminRole imported +restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn +} environment { environmentUri label @@ -265,8 +257,6 @@ def start_glue_crawler(client, datasetUri, input): mutation StartGlueCrawler($datasetUri: String, $input: CrawlerInput) {{ startGlueCrawler(datasetUri: $datasetUri, input: $input) {{ Name - AwsAccountId - region status }} }} @@ -299,12 +289,14 @@ def list_s3_datasets_owned_by_env_group(client, environment_uri, group_uri, term nodes {{ datasetUri label - AwsAccountId - region - GlueDatabaseName + restricted {{ + AwsAccountId + region + S3BucketName + GlueDatabaseName + }} SamlAdminGroupName name - S3BucketName created owner stack {{ @@ -360,8 +352,6 @@ def update_folder(client, locationUri, input): mutation updateDatasetStorageLocation($locationUri: String!, $input: ModifyDatasetStorageLocationInput!) {{ updateDatasetStorageLocation(locationUri: $locationUri, input: $input) {{ locationUri - S3Prefix - label }} }} """, @@ -431,25 +421,7 @@ def sync_tables(client, datasetUri): 'variables': {'datasetUri': datasetUri}, 'query': f""" mutation SyncTables($datasetUri: String!) {{ - syncTables(datasetUri: $datasetUri) {{ - count - nodes {{ - tableUri - GlueTableName - GlueDatabaseName - description - name - label - created - S3Prefix - dataset {{ - datasetUri - name - GlueDatabaseName - userRoleForDataset - }} - }} - }} + syncTables(datasetUri: $datasetUri) }} """, } @@ -500,13 +472,15 @@ def get_dataset_table(client, tableUri): created tags tableUri - AwsAccountId - GlueTableName - GlueDatabaseName + restricted {{ + S3Prefix + AwsAccountId + GlueTableName + GlueDatabaseName + }} LastGlueTableStatus label name - S3Prefix }} }} """, @@ -526,7 +500,9 @@ def list_dataset_tables(client, datasetUri): tables {{ count nodes {{ - GlueTableName + restricted {{ + GlueTableName + }} }} }} }} diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py index e1882ac0d..fcdcc2865 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py @@ -159,7 +159,7 @@ def test_update_dataset(client1, dataset_fixture_name, request): test_description = f'a test description {datetime.utcnow().isoformat()}' dataset_uri = dataset.datasetUri updated_dataset = update_dataset( - client1, dataset_uri, {'description': test_description, 'KmsAlias': dataset.KmsAlias} + client1, dataset_uri, {'description': test_description, 'KmsAlias': dataset.restricted.KmsAlias} ) assert_that(updated_dataset).contains_entry(datasetUri=dataset_uri, description=test_description) env = get_dataset(client1, dataset_uri) @@ -175,7 +175,7 @@ def test_update_dataset_unauthorized(client1, client2, dataset_fixture_name, req test_description = f'unauthorized {datetime.utcnow().isoformat()}' dataset_uri = dataset.datasetUri assert_that(update_dataset).raises(GqlError).when_called_with( - client2, dataset_uri, {'description': test_description, 'KmsAlias': dataset.KmsAlias} + client2, dataset_uri, {'description': test_description, 'KmsAlias': dataset.restricted.KmsAlias} ).contains('UnauthorizedOperation', dataset_uri) response = get_dataset(client1, dataset_uri) assert_that(response).contains_entry(datasetUri=dataset_uri).does_not_contain_entry(description=test_description) diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py index 934cac9d4..798303905 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py @@ -117,7 +117,7 @@ def test_delete_table(client1, dataset_fixture_name, request): aws_session_token=creds['sessionToken'], ) GlueClient(dataset_session, dataset.region).create_table( - database_name=dataset.GlueDatabaseName, table_name='todelete', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, table_name='todelete', bucket=dataset.restricted.S3BucketName ) response = sync_tables(client1, datasetUri=dataset.datasetUri) table_uri = [table.tableUri for table in response.get('nodes', []) if table.label == 'todelete'][0]