From 688bd0bf0bd23c325bc4f02d0b444e9162052bab Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:15:56 +0100 Subject: [PATCH 1/6] Avoid infinite loop in glossaries checks (#1725) ### Feature or Bugfix - Feature - Bugfix ### Detail In some edge cases where a category and term is orphan and does not have a Glossary as parent we would run into an infinite loop in the glossaries permission check. This PR adds a maximum depth level (which in reality is lower, categories can only host terms, the REAL_MAX_DEPTH=3) ### Relates - #1721 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/modules/catalog/services/glossaries_service.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/dataall/modules/catalog/services/glossaries_service.py b/backend/dataall/modules/catalog/services/glossaries_service.py index d98f85bce..3e8b8f6ea 100644 --- a/backend/dataall/modules/catalog/services/glossaries_service.py +++ b/backend/dataall/modules/catalog/services/glossaries_service.py @@ -29,8 +29,11 @@ def wrapper(*args, **kwargs): context = get_context() with context.db_engine.scoped_session() as session: node = GlossaryRepository.get_node(session=session, uri=uri) - while node.nodeType != 'G': + MAX_GLOSSARY_DEPTH = 10 + depth = 0 + while node.nodeType != 'G' and depth <= MAX_GLOSSARY_DEPTH: node = GlossaryRepository.get_node(session=session, uri=node.parentUri) + depth += 1 if node and (node.admin in context.groups): return f(*args, **kwargs) else: From 5438bdb545a313a7685421054fa1f759d11944f0 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:04:54 +0100 Subject: [PATCH 2/6] Feed consistent permissions (#1722) ### Feature or Bugfix - Feature - Bugfix ### Detail The Feeds module is used in the frontend in several modules. Some restrict access to admins only and some don't. In this PR we unify the behavior. ONLY ADMINS CAN SEE THE FEED in the frontend. - Dashboards: accessible to any user -----> add isAdmin - PIpelines: accessible to any user -----> add isAdmin - Redshift_Datasets: accessible to admin users only - Redshift_Tables : accessible to admin users only - S3_Datasets: accessible to admin users only - Folders: accessible to admin users only - Tables: accessible to admin users only Alongside the frontend changes, the backend should follow the same logic and restrict the API calls with permissions checks. That is what this PR does, it introduces resource permission checks depending on the Feed targetType with GET_X permission checks. - [x] Add security-focused tests for unauthorized cases Screenshot 2024-11-26 at 14 49 56 ### Testing - [X] UI shows chat button for admins (creators or admin team) - verified in Datasets and Dashboards - [X] UI does not show chat button for non-admins - verified in Datasets and Dashboards - [x] Deploy in AWS - Call getFeed, postFeedMessage with resource admin (with GET permissions) and get feed - [X] Dataset - [x] Table - [x] Folder - [X] Redshift Dataset - [X] Redshift Table - [x] Dashboard - Call getFeed, postFeedMessage with another team not the resource admin (with UPDATE permissions) and get unauthorized response: - [X] Dataset - [x] Table - [x] Folder - [x] Redshift Dataset - [x] Redshift Table - [x] Dashboard ### Relates - ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/modules/dashboards/__init__.py | 4 +- .../dataall/modules/datapipelines/__init__.py | 2 +- backend/dataall/modules/feed/api/registry.py | 5 +++ backend/dataall/modules/feed/api/resolvers.py | 2 +- .../modules/feed/services/feed_service.py | 39 +++++++++++++++---- .../modules/redshift_datasets/__init__.py | 11 ++++-- .../dataall/modules/s3_datasets/__init__.py | 8 ++-- .../modules/Dashboards/views/DashboardView.js | 22 ++++++----- .../modules/Pipelines/views/PipelineView.js | 34 ++++++++++------ .../modules/feed/test_feed.py | 12 ++++++ 10 files changed, 99 insertions(+), 40 deletions(-) diff --git a/backend/dataall/modules/dashboards/__init__.py b/backend/dataall/modules/dashboards/__init__.py index ffbc8e92d..068a1f97f 100644 --- a/backend/dataall/modules/dashboards/__init__.py +++ b/backend/dataall/modules/dashboards/__init__.py @@ -5,7 +5,6 @@ from dataall.base.loader import ImportMode, ModuleInterface - log = logging.getLogger(__name__) @@ -33,8 +32,9 @@ def __init__(self): from dataall.modules.catalog.indexers.registry import GlossaryRegistry, GlossaryDefinition from dataall.modules.vote.services.vote_service import add_vote_type from dataall.modules.dashboards.indexers.dashboard_indexer import DashboardIndexer + from dataall.modules.dashboards.services.dashboard_permissions import GET_DASHBOARD - FeedRegistry.register(FeedDefinition('Dashboard', Dashboard)) + FeedRegistry.register(FeedDefinition('Dashboard', Dashboard, GET_DASHBOARD)) GlossaryRegistry.register( GlossaryDefinition( diff --git a/backend/dataall/modules/datapipelines/__init__.py b/backend/dataall/modules/datapipelines/__init__.py index 7b1a56334..171a6a311 100644 --- a/backend/dataall/modules/datapipelines/__init__.py +++ b/backend/dataall/modules/datapipelines/__init__.py @@ -35,7 +35,7 @@ def __init__(self): ) import dataall.modules.datapipelines.api - FeedRegistry.register(FeedDefinition('DataPipeline', DataPipeline)) + FeedRegistry.register(FeedDefinition('DataPipeline', DataPipeline, GET_PIPELINE)) TargetType('pipeline', GET_PIPELINE, UPDATE_PIPELINE, MANAGE_PIPELINES) TargetType('cdkpipeline', GET_PIPELINE, UPDATE_PIPELINE, MANAGE_PIPELINES) diff --git a/backend/dataall/modules/feed/api/registry.py b/backend/dataall/modules/feed/api/registry.py index 3fe72f245..8db914263 100644 --- a/backend/dataall/modules/feed/api/registry.py +++ b/backend/dataall/modules/feed/api/registry.py @@ -10,6 +10,7 @@ class FeedDefinition: target_type: str model: Type[Resource] + permission: str class FeedRegistry(UnionTypeRegistry): @@ -25,6 +26,10 @@ def register(cls, definition: FeedDefinition): def find_model(cls, target_type: str): return cls._DEFINITIONS[target_type].model + @classmethod + def find_permission(cls, target_type: str): + return cls._DEFINITIONS[target_type].permission + @classmethod def find_target(cls, obj: Resource): for target_type, definition in cls._DEFINITIONS.items(): diff --git a/backend/dataall/modules/feed/api/resolvers.py b/backend/dataall/modules/feed/api/resolvers.py index a3bcca622..e971d90bf 100644 --- a/backend/dataall/modules/feed/api/resolvers.py +++ b/backend/dataall/modules/feed/api/resolvers.py @@ -43,4 +43,4 @@ def resolve_feed_messages(context: Context, source: Feed, filter: dict = None): _required_uri(source.targetUri) if not filter: filter = {} - return FeedService.list_feed_messages(targetUri=source.targetUri, filter=filter) + return FeedService.list_feed_messages(targetUri=source.targetUri, targetType=source.targetType, filter=filter) diff --git a/backend/dataall/modules/feed/services/feed_service.py b/backend/dataall/modules/feed/services/feed_service.py index 364b2a575..69d271186 100644 --- a/backend/dataall/modules/feed/services/feed_service.py +++ b/backend/dataall/modules/feed/services/feed_service.py @@ -6,8 +6,10 @@ import logging from dataall.base.context import get_context +from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.modules.feed.db.feed_models import FeedMessage from dataall.modules.feed.db.feed_repository import FeedRepository +from dataall.modules.feed.api.registry import FeedRegistry logger = logging.getLogger(__name__) @@ -27,10 +29,6 @@ def targetType(self): return self._targetType -def _session(): - return get_context().db_engine.scoped_session() - - class FeedService: """ Encapsulate the logic of interactions with Feeds. @@ -41,6 +39,15 @@ def get_feed( targetUri: str = None, targetType: str = None, ) -> Feed: + context = get_context() + with context.db_engine.scoped_session() as session: + ResourcePolicyService.check_user_resource_permission( + session=session, + username=context.username, + groups=context.groups, + resource_uri=targetUri, + permission_name=FeedRegistry.find_permission(target_type=targetType), + ) return Feed(targetUri=targetUri, targetType=targetType) @staticmethod @@ -49,17 +56,33 @@ def post_feed_message( targetType: str = None, content: str = None, ): - with _session() as session: + context = get_context() + with context.db_engine.scoped_session() as session: + ResourcePolicyService.check_user_resource_permission( + session=session, + username=context.username, + groups=context.groups, + resource_uri=targetUri, + permission_name=FeedRegistry.find_permission(target_type=targetType), + ) m = FeedMessage( targetUri=targetUri, targetType=targetType, - creator=get_context().username, + creator=context.username, content=content, ) session.add(m) return m @staticmethod - def list_feed_messages(targetUri: str, filter: dict = None): - with _session() as session: + def list_feed_messages(targetUri: str, targetType: str, filter: dict = None): + context = get_context() + with context.db_engine.scoped_session() as session: + ResourcePolicyService.check_user_resource_permission( + session=session, + username=context.username, + groups=context.groups, + resource_uri=targetUri, + permission_name=FeedRegistry.find_permission(target_type=targetType), + ) return FeedRepository(session).paginated_feed_messages(uri=targetUri, filter=filter) diff --git a/backend/dataall/modules/redshift_datasets/__init__.py b/backend/dataall/modules/redshift_datasets/__init__.py index cd9e73f68..1a6f1344f 100644 --- a/backend/dataall/modules/redshift_datasets/__init__.py +++ b/backend/dataall/modules/redshift_datasets/__init__.py @@ -51,11 +51,16 @@ def __init__(self): FEED_REDSHIFT_DATASET_TABLE_NAME, VOTE_REDSHIFT_DATASET_NAME, ) - + from dataall.modules.redshift_datasets.services.redshift_dataset_permissions import ( + GET_REDSHIFT_DATASET, + GET_REDSHIFT_DATASET_TABLE, + ) import dataall.modules.redshift_datasets.api - FeedRegistry.register(FeedDefinition(FEED_REDSHIFT_DATASET_TABLE_NAME, RedshiftTable)) - FeedRegistry.register(FeedDefinition(FEED_REDSHIFT_DATASET_NAME, RedshiftDataset)) + FeedRegistry.register( + FeedDefinition(FEED_REDSHIFT_DATASET_TABLE_NAME, RedshiftTable, GET_REDSHIFT_DATASET_TABLE) + ) + FeedRegistry.register(FeedDefinition(FEED_REDSHIFT_DATASET_NAME, RedshiftDataset, GET_REDSHIFT_DATASET)) GlossaryRegistry.register( GlossaryDefinition( diff --git a/backend/dataall/modules/s3_datasets/__init__.py b/backend/dataall/modules/s3_datasets/__init__.py index dbd4f458c..42872063d 100644 --- a/backend/dataall/modules/s3_datasets/__init__.py +++ b/backend/dataall/modules/s3_datasets/__init__.py @@ -44,14 +44,16 @@ def __init__(self): from dataall.modules.s3_datasets.services.dataset_permissions import ( GET_DATASET, UPDATE_DATASET, + GET_DATASET_TABLE, + GET_DATASET_FOLDER, MANAGE_DATASETS, ) from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, S3Dataset - FeedRegistry.register(FeedDefinition('DatasetStorageLocation', DatasetStorageLocation)) - FeedRegistry.register(FeedDefinition('DatasetTable', DatasetTable)) - FeedRegistry.register(FeedDefinition('Dataset', S3Dataset)) + FeedRegistry.register(FeedDefinition('DatasetStorageLocation', DatasetStorageLocation, GET_DATASET_FOLDER)) + FeedRegistry.register(FeedDefinition('DatasetTable', DatasetTable, GET_DATASET_TABLE)) + FeedRegistry.register(FeedDefinition('Dataset', S3Dataset, GET_DATASET)) GlossaryRegistry.register( GlossaryDefinition( diff --git a/frontend/src/modules/Dashboards/views/DashboardView.js b/frontend/src/modules/Dashboards/views/DashboardView.js index 03abaa021..d093546e7 100644 --- a/frontend/src/modules/Dashboards/views/DashboardView.js +++ b/frontend/src/modules/Dashboards/views/DashboardView.js @@ -227,16 +227,18 @@ const DashboardView = () => { onClick={() => upVoteDashboard(dashboard.dashboardUri)} upVotes={upVotes || 0} /> - + {isAdmin && ( + + )} + {isAdmin && ( + + )}