Skip to content

Commit

Permalink
Merge branch 'main' into feat/integ-tests-notifications
Browse files Browse the repository at this point in the history
  • Loading branch information
dlpzx committed Dec 23, 2024
2 parents ac6bc24 + 59ce9ae commit c8292e2
Show file tree
Hide file tree
Showing 60 changed files with 782 additions and 488 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/snyk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,3 @@ jobs:
run: snyk test --all-projects --detection-depth=5 --severity-threshold=high
env:
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
with:
args: --all-projects --detection-depth=5 --severity-threshold=high
2 changes: 1 addition & 1 deletion backend/dataall/core/environment/cdk/environment_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,7 @@ def create_integration_tests_role(self):
],
effect=iam.Effect.ALLOW,
resources=[
f'arn:aws:iam::{self.account}:role/dataall-test-*',
f'arn:aws:iam::{self.account}:role/dataall-test*',
f'arn:aws:iam::{self.account}:role/dataall-session*',
],
),
Expand Down
6 changes: 6 additions & 0 deletions backend/dataall/modules/s3_datasets/api/dataset/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ def get_dataset_statistics(context: Context, source: S3Dataset, **kwargs):
return DatasetService.get_dataset_statistics(source)


def get_dataset_restricted_information(context: Context, source: S3Dataset, **kwargs):
if not source:
return None
return DatasetService.get_dataset_restricted_information(uri=source.datasetUri, dataset=source)


@is_feature_enabled('modules.s3_datasets.features.aws_actions')
def get_dataset_assume_role_url(context: Context, source, datasetUri: str = None):
return DatasetService.get_dataset_assume_role_url(uri=datasetUri)
Expand Down
46 changes: 23 additions & 23 deletions backend/dataall/modules/s3_datasets/api/dataset/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
get_dataset_statistics,
get_dataset_glossary_terms,
resolve_dataset_stack,
get_dataset_restricted_information,
)
from dataall.core.environment.api.enums import EnvironmentPermission

Expand All @@ -23,6 +24,23 @@
],
)

DatasetRestrictedInformation = gql.ObjectType(
name='DatasetRestrictedInformation',
fields=[
gql.Field(name='AwsAccountId', type=gql.String),
gql.Field(name='region', type=gql.String),
gql.Field(name='S3BucketName', type=gql.String),
gql.Field(name='GlueDatabaseName', type=gql.String),
gql.Field(name='GlueCrawlerName', type=gql.String),
gql.Field(name='IAMDatasetAdminRoleArn', type=gql.String),
gql.Field(name='KmsAlias', type=gql.String),
gql.Field(name='importedS3Bucket', type=gql.Boolean),
gql.Field(name='importedGlueDatabase', type=gql.Boolean),
gql.Field(name='importedKmsKey', type=gql.Boolean),
gql.Field(name='importedAdminRole', type=gql.Boolean),
],
)

Dataset = gql.ObjectType(
name='Dataset',
fields=[
Expand All @@ -35,29 +53,13 @@
gql.Field(name='created', type=gql.String),
gql.Field(name='updated', type=gql.String),
gql.Field(name='admins', type=gql.ArrayType(gql.String)),
gql.Field(name='AwsAccountId', type=gql.String),
gql.Field(name='region', type=gql.String),
gql.Field(name='S3BucketName', type=gql.String),
gql.Field(name='GlueDatabaseName', type=gql.String),
gql.Field(name='GlueCrawlerName', type=gql.String),
gql.Field(name='GlueCrawlerSchedule', type=gql.String),
gql.Field(name='GlueProfilingJobName', type=gql.String),
gql.Field(name='GlueProfilingTriggerSchedule', type=gql.String),
gql.Field(name='IAMDatasetAdminRoleArn', type=gql.String),
gql.Field(name='KmsAlias', type=gql.String),
gql.Field(name='bucketCreated', type=gql.Boolean),
gql.Field(name='glueDatabaseCreated', type=gql.Boolean),
gql.Field(name='iamAdminRoleCreated', type=gql.Boolean),
gql.Field(name='lakeformationLocationCreated', type=gql.Boolean),
gql.Field(name='bucketPolicyCreated', type=gql.Boolean),
gql.Field(name='SamlAdminGroupName', type=gql.String),
gql.Field(name='businessOwnerEmail', type=gql.String),
gql.Field(name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String)),
gql.Field(name='importedS3Bucket', type=gql.Boolean),
gql.Field(name='importedGlueDatabase', type=gql.Boolean),
gql.Field(name='importedKmsKey', type=gql.Boolean),
gql.Field(name='importedAdminRole', type=gql.Boolean),
gql.Field(name='imported', type=gql.Boolean),
gql.Field(
name='restricted',
type=DatasetRestrictedInformation,
resolver=get_dataset_restricted_information,
),
gql.Field(
name='environment',
type=gql.Ref('EnvironmentSimplified'),
Expand Down Expand Up @@ -130,8 +132,6 @@
name='GlueCrawler',
fields=[
gql.Field(name='Name', type=gql.ID),
gql.Field(name='AwsAccountId', type=gql.String),
gql.Field(name='region', type=gql.String),
gql.Field(name='status', type=gql.String),
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ def resolve_dataset(context, source: DatasetStorageLocation, **kwargs):
return DatasetService.find_dataset(uri=source.datasetUri)


def get_folder_restricted_information(context: Context, source: DatasetStorageLocation, **kwargs):
if not source:
return None
return DatasetLocationService.get_folder_restricted_information(uri=source.locationUri, folder=source)


def resolve_glossary_terms(context: Context, source: DatasetStorageLocation, **kwargs):
if not source:
return None
Expand Down
48 changes: 10 additions & 38 deletions backend/dataall/modules/s3_datasets/api/storage_location/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from dataall.base.api import gql
from dataall.modules.s3_datasets.api.storage_location.resolvers import resolve_glossary_terms, resolve_dataset
from dataall.modules.s3_datasets.api.storage_location.resolvers import (
resolve_glossary_terms,
resolve_dataset,
get_folder_restricted_information,
)

DatasetStorageLocation = gql.ObjectType(
name='DatasetStorageLocation',
Expand All @@ -11,13 +15,15 @@
gql.Field(name='owner', type=gql.String),
gql.Field(name='created', type=gql.String),
gql.Field(name='updated', type=gql.String),
gql.Field(name='region', type=gql.String),
gql.Field(name='tags', type=gql.ArrayType(gql.String)),
gql.Field(name='AwsAccountId', type=gql.String),
gql.Field(name='S3BucketName', type=gql.String),
gql.Field(name='S3Prefix', type=gql.String),
gql.Field(name='locationCreated', type=gql.Boolean),
gql.Field(name='dataset', type=gql.Ref('Dataset'), resolver=resolve_dataset),
gql.Field(
name='restricted',
type=gql.Ref('DatasetRestrictedInformation'),
resolver=get_folder_restricted_information,
),
gql.Field(name='userRoleForStorageLocation', type=gql.Ref('DatasetRole')),
gql.Field(name='environmentEndPoint', type=gql.String),
gql.Field(
Expand All @@ -40,37 +46,3 @@
gql.Field(name='hasPrevious', type=gql.Boolean),
],
)


DatasetAccessPoint = gql.ObjectType(
name='DatasetAccessPoint',
fields=[
gql.Field(name='accessPointUri', type=gql.ID),
gql.Field(name='location', type=DatasetStorageLocation),
gql.Field(name='dataset', type=gql.Ref('Dataset')),
gql.Field(name='name', type=gql.String),
gql.Field(name='description', type=gql.String),
gql.Field(name='owner', type=gql.String),
gql.Field(name='created', type=gql.String),
gql.Field(name='updated', type=gql.String),
gql.Field(name='region', type=gql.String),
gql.Field(name='AwsAccountId', type=gql.String),
gql.Field(name='S3BucketName', type=gql.String),
gql.Field(name='S3Prefix', type=gql.String),
gql.Field(name='S3AccessPointName', type=gql.String),
],
)


DatasetAccessPointSearchResult = gql.ObjectType(
name='DatasetAccessPointSearchResult',
fields=[
gql.Field(name='count', type=gql.Integer),
gql.Field(name='page', type=gql.Integer),
gql.Field(name='pageSize', type=gql.Integer),
gql.Field(name='pages', type=gql.Integer),
gql.Field(name='hasNext', type=gql.Integer),
gql.Field(name='hasPrevious', type=gql.Integer),
gql.Field(name='nodes', type=gql.ArrayType(DatasetAccessPoint)),
],
)
2 changes: 1 addition & 1 deletion backend/dataall/modules/s3_datasets/api/table/mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
syncTables = gql.MutationField(
name='syncTables',
args=[gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String))],
type=gql.Ref('DatasetTableSearchResult'),
type=gql.Integer,
resolver=sync_tables,
)

Expand Down
6 changes: 6 additions & 0 deletions backend/dataall/modules/s3_datasets/api/table/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,9 @@ def list_table_data_filters(context: Context, source, tableUri: str = None, filt
if not filter:
filter = {'page': 1, 'pageSize': 5}
return DatasetTableDataFilterService.list_table_data_filters(uri=tableUri, data=filter)


def get_dataset_table_restricted_information(context: Context, source: DatasetTable, **kwargs):
if not source:
return None
return DatasetTableService.get_table_restricted_information(uri=source.tableUri, table=source)
17 changes: 13 additions & 4 deletions backend/dataall/modules/s3_datasets/api/table/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
resolve_dataset,
get_glue_table_properties,
resolve_glossary_terms,
get_dataset_table_restricted_information,
)

TablePermission = gql.ObjectType(
Expand All @@ -21,6 +22,15 @@
gql.Field(name='nodes', type=gql.ArrayType(TablePermission)),
],
)
DatasetTableRestrictedInformation = gql.ObjectType(
name='DatasetTableRestrictedInformation',
fields=[
gql.Field(name='AwsAccountId', type=gql.String),
gql.Field(name='GlueDatabaseName', type=gql.String),
gql.Field(name='GlueTableName', type=gql.String),
gql.Field(name='S3Prefix', type=gql.String),
],
)

DatasetTable = gql.ObjectType(
name='DatasetTable',
Expand All @@ -35,12 +45,11 @@
gql.Field(name='created', type=gql.String),
gql.Field(name='updated', type=gql.String),
gql.Field(name='admins', type=gql.ArrayType(gql.String)),
gql.Field(name='AwsAccountId', type=gql.String),
gql.Field(name='GlueDatabaseName', type=gql.String),
gql.Field(name='GlueTableName', type=gql.String),
gql.Field(name='LastGlueTableStatus', type=gql.String),
gql.Field(name='S3Prefix', type=gql.String),
gql.Field(name='GlueTableConfig', type=gql.String),
gql.Field(
name='restricted', type=DatasetTableRestrictedInformation, resolver=get_dataset_table_restricted_information
),
gql.Field(
name='GlueTableProperties',
type=gql.String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService
from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService
from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository
from dataall.base.db.exceptions import ResourceShared, ResourceAlreadyExists
from dataall.base.db.exceptions import ResourceAlreadyExists
from dataall.modules.s3_datasets.services.dataset_service import DatasetService
from dataall.modules.s3_datasets.aws.s3_location_client import S3LocationClient
from dataall.modules.s3_datasets.db.dataset_location_repositories import DatasetLocationRepository
Expand Down Expand Up @@ -59,7 +59,6 @@ def list_dataset_locations(uri: str, filter: dict = None):
return DatasetLocationRepository.list_dataset_locations(session=session, uri=uri, data=filter)

@staticmethod
@ResourcePolicyService.has_resource_permission(GET_DATASET_FOLDER)
def get_storage_location(uri):
with get_context().db_engine.scoped_session() as session:
return DatasetLocationRepository.get_location_by_uri(session, uri)
Expand Down Expand Up @@ -135,3 +134,10 @@ def _delete_dataset_folder_read_permission(session, dataset: S3Dataset, location
}
for group in permission_group:
ResourcePolicyService.delete_resource_policy(session=session, group=group, resource_uri=location_uri)

@staticmethod
@ResourcePolicyService.has_resource_permission(GET_DATASET_FOLDER)
def get_folder_restricted_information(uri: str, folder: DatasetStorageLocation):
context = get_context()
with context.db_engine.scoped_session() as session:
return DatasetRepository.get_dataset_by_uri(session, folder.datasetUri)
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
DATASET_ALL,
DATASET_READ,
IMPORT_DATASET,
GET_DATASET,
DATASET_TABLE_ALL,
GET_DATASET,
)
from dataall.modules.datasets_base.services.dataset_list_permissions import LIST_ENVIRONMENT_DATASETS
from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository
Expand Down Expand Up @@ -344,6 +344,11 @@ def get_dataset_statistics(dataset: S3Dataset):
'upvotes': count_upvotes or 0,
}

@staticmethod
@ResourcePolicyService.has_resource_permission(GET_DATASET)
def get_dataset_restricted_information(uri: str, dataset: S3Dataset):
return dataset

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET)
Expand Down Expand Up @@ -397,8 +402,6 @@ def start_crawler(uri: str, data: dict = None):

return {
'Name': dataset.GlueCrawlerName,
'AwsAccountId': dataset.AwsAccountId,
'region': dataset.region,
'status': crawler.get('LastCrawl', {}).get('Status', 'N/A'),
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging

from dataall.base.context import get_context
from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService
from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService
Expand Down Expand Up @@ -44,6 +43,11 @@ def get_table(uri: str):
with get_context().db_engine.scoped_session() as session:
return DatasetTableRepository.get_dataset_table_by_uri(session, uri)

@staticmethod
@ResourcePolicyService.has_resource_permission(GET_DATASET_TABLE)
def get_table_restricted_information(uri: str, table: DatasetTable):
return table

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_DATASETS)
@ResourcePolicyService.has_resource_permission(UPDATE_DATASET_TABLE, parent_resource=_get_dataset_uri)
Expand Down Expand Up @@ -127,11 +131,7 @@ def sync_tables_for_dataset(cls, uri):
DatasetTableIndexer.upsert_all(session=session, dataset_uri=dataset.datasetUri)
DatasetTableIndexer.remove_all_deleted(session=session, dataset_uri=dataset.datasetUri)
DatasetIndexer.upsert(session=session, dataset_uri=dataset.datasetUri)
return DatasetRepository.paginated_dataset_tables(
session=session,
uri=uri,
data={'page': 1, 'pageSize': 10},
)
return DatasetRepository.count_dataset_tables(session, dataset.datasetUri)

@staticmethod
def sync_existing_tables(session, uri, glue_tables=None):
Expand Down
14 changes: 8 additions & 6 deletions frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c8292e2

Please sign in to comment.