From 5fc51c31848e1723873a96324bb816b2f1edc0ca Mon Sep 17 00:00:00 2001 From: Scott Black Date: Sat, 6 Jan 2024 20:33:59 -0700 Subject: [PATCH 1/8] add policy generation from hydroshar --- app/api/subsetter/app/db.py | 14 +++- .../access_control/policy_generation.py | 44 ++++++----- .../app/routers/access_control/router.py | 78 ++++++++++++------- app/api/subsetter/app/users.py | 2 +- app/api/subsetter/config/__init__.py | 3 + 5 files changed, 87 insertions(+), 54 deletions(-) diff --git a/app/api/subsetter/app/db.py b/app/api/subsetter/app/db.py index e90f2ef2..2a9688fb 100644 --- a/app/api/subsetter/app/db.py +++ b/app/api/subsetter/app/db.py @@ -1,4 +1,5 @@ from enum import Enum +from functools import lru_cache from typing import List, Optional, Tuple import httpx @@ -9,10 +10,14 @@ from subsetter.config import get_settings -DATABASE_URL = get_settings().mongo_url -client = motor.motor_asyncio.AsyncIOMotorClient(DATABASE_URL, uuidRepresentation="standard") +client = motor.motor_asyncio.AsyncIOMotorClient(get_settings().mongo_url, uuidRepresentation="standard") db = client[get_settings().mongo_database] +client_hydroshare = motor.motor_asyncio.AsyncIOMotorClient( + get_settings().hydroshare_mongo_url, uuidRepresentation="standard" +) +db_hydroshare = client_hydroshare[get_settings().hydroshare_mongo_database] + class OAuthAccount(BaseOAuthAccount): pass @@ -80,3 +85,8 @@ async def update_submission(self, submission: Submission) -> None: async def get_user_db(): yield BeanieUserDatabase(User, OAuthAccount) + + +@lru_cache +def get_hydroshare_access_db(): + return db_hydroshare diff --git a/app/api/subsetter/app/routers/access_control/policy_generation.py b/app/api/subsetter/app/routers/access_control/policy_generation.py index 861388ba..9cd7b655 100644 --- a/app/api/subsetter/app/routers/access_control/policy_generation.py +++ b/app/api/subsetter/app/routers/access_control/policy_generation.py @@ -44,6 +44,7 @@ def refresh_minio_policy(user): ''' import copy +from typing import Dict def bucket_name(resource_id: str): @@ -52,7 +53,7 @@ def bucket_name(resource_id: str): return "subsetter-outputs" -def create_view_statements(owner, submissions) -> list: +def create_view_statements(user, views: Dict[str, list[str]]) -> list: view_statement_template_get = { "Effect": "Allow", "Action": ["s3:GetBucketLocation", "s3:GetObject"], @@ -64,35 +65,36 @@ def create_view_statements(owner, submissions) -> list: "Resource": [], "Condition": {"StringLike": {"s3:prefix": []}}, } - bucketname = bucket_name("blah") - get_resources = [f"arn:aws:s3:::{bucketname}/{owner.username}/*"] + + get_resources = [f"arn:aws:s3:::{user.username}/*"] view_statement = copy.deepcopy(view_statement_template_listing) - view_statement["Resource"] = [f"arn:aws:s3:::{bucketname}"] - view_statement["Condition"]["StringLike"]["s3:prefix"] = [f"{owner.username}/*"] + view_statement["Resource"] = [f"arn:aws:s3:::{user.username}"] + del view_statement["Condition"] list_statements = [view_statement] - for user, submission in submissions: - bucketname = bucket_name(submission.workflow_id) - get_resources.append( - f"arn:aws:s3:::{bucketname}/{user.username}/{submission.workflow_name}/{submission.workflow_id}/*" - ) + for bucket_owner, resource_paths in views.items(): + get_resources = get_resources + [ + f"arn:aws:s3:::{bucket_owner}/{resource_path}/*" for resource_path in resource_paths + ] view_statement = copy.deepcopy(view_statement_template_listing) - view_statement["Resource"] = [f"arn:aws:s3:::{bucketname}"] + view_statement["Resource"] = [f"arn:aws:s3:::{bucket_owner}"] view_statement["Condition"]["StringLike"]["s3:prefix"] = [ - f"{user.username}/{submission.workflow_name}/{submission.workflow_id}/*" + f"{resource_path}/*" for resource_path in resource_paths ] list_statements.append(view_statement) view_statement_template_get["Resource"] = get_resources return list_statements + [view_statement_template_get] -# def create_edit_owner_statements(resource_ids: list[str]) -> list: -# edit_statement_template = {"Effect": "Allow", "Action": ["s3:*"], "Resource": []} -# edit_statement_template["Resource"] = [ -# f"arn:aws:s3:::{bucket_name(resource_id)}/hydroshare/{resource_id}/*" for resource_id in resource_ids -# ] -# return [edit_statement_template] +def create_edit_statements(user, edits: Dict[str, list[str]]) -> list: + edit_statement_template = {"Effect": "Allow", "Action": ["s3:*"], "Resource": []} + resources = [] + for bucket_owner, resource_paths in edits.items(): + resources = resources + [f"arn:aws:s3:::{bucket_owner}/{resource_path}/*" for resource_path in resource_paths] + edit_statement_template["Resource"] = resources + return [edit_statement_template] -def minio_policy(user, view_submissions): - view_statements = create_view_statements(user, view_submissions) - return {"Version": "2012-10-17", "Statement": view_statements} +def minio_policy(user, owners: Dict[str, list[str]], edits: Dict[str, list[str]], views: Dict[str, list[str]]): + statements = create_view_statements(user, views) + statements = statements + create_edit_statements(user, edits) + return {"Version": "2012-10-17", "Statement": statements} diff --git a/app/api/subsetter/app/routers/access_control/router.py b/app/api/subsetter/app/routers/access_control/router.py index a48c56ad..19aa9089 100644 --- a/app/api/subsetter/app/routers/access_control/router.py +++ b/app/api/subsetter/app/routers/access_control/router.py @@ -2,54 +2,72 @@ import os import subprocess import tempfile +from typing import Dict -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends from pydantic import BaseModel -from subsetter.app.db import Submission, User +from subsetter.app.db import User, get_hydroshare_access_db from subsetter.app.routers.access_control.policy_generation import minio_policy from subsetter.app.users import current_active_user router = APIRouter() -class ShareWorkflowBody(BaseModel): +class UserAccess(BaseModel): + owner: list[str] + edit: list[str] + view: list[str] + + +class MinioUserResourceAccess(BaseModel): + owners: list[str] + resource_id: str + minio_resource_url: str + + +class MinioUserAccess(BaseModel): + owner: list[MinioUserResourceAccess] + edit: list[MinioUserResourceAccess] + view: list[MinioUserResourceAccess] + + +class UserPrivilege(BaseModel): username: str - workflow_id: str + all: UserAccess + minio: MinioUserAccess -@router.post('/policy/add') -async def share_workflow_with_user(share_params: ShareWorkflowBody, user: User = Depends(current_active_user)): - submission: Submission = user.get_submission(share_params.workflow_id) - if submission: - submission.add_user(share_params.username) - await user.update_submission(submission) - return user - else: - return HTTPException(status_code=400) +def check_owners_in_bucket_path(resource_access: MinioUserResourceAccess): + for owner in resource_access.owners: + if f"/browser/{owner}/" in resource_access.minio_resource_url: + return owner + return None -@router.delete('/policy/remove') -async def unshare_workflow_with_user(share_params: ShareWorkflowBody, user: User = Depends(current_active_user)): - submission: Submission = user.get_submission(share_params.workflow_id) - if submission: - submission.remove_user(share_params.username) - await user.update_submission(submission) - return user - else: - return HTTPException(status_code=400) +def sort_privileges(user_accesses: list[MinioUserResourceAccess]): + authorized_users = {} + for user_access in user_accesses: + bucket_owner = check_owners_in_bucket_path(user_access) + if bucket_owner: + resource_path = user_access.minio_resource_url.split(f"{bucket_owner}/", 1)[-1] + authorized_users.setdefault(bucket_owner, []).append(resource_path) + return authorized_users @router.get('/policy') async def generate_user_policy(user: User = Depends(current_active_user)): - users = await User.find({"submissions.view_users": user.username}).to_list() - # this should be rewritten to query all on the db, but I don't have time to figure that out now - matching_submissions = [] - for u in users: - for submission in u.submissions: - if user.username in submission.view_users: - matching_submissions.append((u, submission)) - return minio_policy(user, matching_submissions) + hydroshare_access_db = get_hydroshare_access_db() + user_privilege = await hydroshare_access_db.userprivileges.find_one({"username": user.username}) + user_privilege: UserPrivilege = UserPrivilege(**user_privilege) + + # Check Authorization + minio_user_access: MinioUserAccess = user_privilege.minio + authorized_owners: Dict[str, list[str]] = sort_privileges(minio_user_access.owner) + authorized_edits: Dict[str, list[str]] = sort_privileges(minio_user_access.edit) + authorized_views: Dict[str, list[str]] = sort_privileges(minio_user_access.view) + + return minio_policy(user, authorized_owners, authorized_edits, authorized_views) @router.get('/profile') diff --git a/app/api/subsetter/app/users.py b/app/api/subsetter/app/users.py index 7714b6c9..5ebd85a6 100644 --- a/app/api/subsetter/app/users.py +++ b/app/api/subsetter/app/users.py @@ -11,7 +11,7 @@ from httpx_oauth.oauth2 import OAuth2 from subsetter.app.db import User, get_user_db -from subsetter.config import get_settings, get_minio_client +from subsetter.config import get_minio_client, get_settings SECRET = "SECRET" diff --git a/app/api/subsetter/config/__init__.py b/app/api/subsetter/config/__init__.py index ccbccb4c..c65f1ef4 100644 --- a/app/api/subsetter/config/__init__.py +++ b/app/api/subsetter/config/__init__.py @@ -16,6 +16,9 @@ class Settings(BaseSettings): mongo_url: str mongo_database: str + hydroshare_mongo_url: str + hydroshare_mongo_database: str + oauth2_client_id: str oauth2_client_secret: str oauth2_redirect_url: str From a7838ae2ce2c6543299c65b71f5429458c4541f6 Mon Sep 17 00:00:00 2001 From: Scott Black Date: Sat, 6 Jan 2024 21:31:43 -0700 Subject: [PATCH 2/8] add minio policy saving to minio --- .../access_control/policy_generation.py | 74 ++++++++----------- .../app/routers/access_control/router.py | 19 +---- app/api/subsetter/app/routers/argo/router.py | 10 ++- 3 files changed, 39 insertions(+), 64 deletions(-) diff --git a/app/api/subsetter/app/routers/access_control/policy_generation.py b/app/api/subsetter/app/routers/access_control/policy_generation.py index 9cd7b655..5d757e08 100644 --- a/app/api/subsetter/app/routers/access_control/policy_generation.py +++ b/app/api/subsetter/app/routers/access_control/policy_generation.py @@ -1,56 +1,41 @@ -''' -def admin_policy_attach(target, name): - arguments = ['mc', '--config-dir', '/hydroshare/', 'admin', 'policy', 'attach', target, name, '--user', name] - logger.info(arguments) - try: - _output = subprocess.check_output(arguments, user='hydro-service') - logger.info(_output) - except subprocess.CalledProcessError as e: - logger.exception(e.output) - +import copy +import json +import logging as logger +import os import subprocess +import tempfile +from typing import Dict + + def admin_policy_create(target, name, file): - arguments = ['mc', '--config-dir', '/hydroshare/', '--json', 'admin', 'policy', 'create', target, name, file] + arguments = ['mc', '--json', 'admin', 'policy', 'create', target, name, file] logger.info(arguments) try: - _output = subprocess.check_output(arguments, user='hydro-service') + _output = subprocess.check_output(arguments) logger.info(_output) except subprocess.CalledProcessError as e: logger.exception(e.output) - admin_policy_attach(target, name) + def admin_policy_remove(target, name): - arguments = ['mc', '--config-dir', '/hydroshare/', '--json', 'admin', 'policy', 'remove', target, name] + arguments = ['mc', '--json', 'admin', 'policy', 'rm', target, name] logger.info(arguments) try: - _output = subprocess.check_output(arguments, user='hydro-service') + _output = subprocess.check_output(arguments) logger.info(_output) except subprocess.CalledProcessError as e: logger.exception(e.output) -def refresh_minio_policy(user): - policy = minio_policy(user) - logging.info(json.dumps(policy, indent=2)) - if policy["Statement"]: - with tempfile.TemporaryDirectory(dir='/hs_tmp') as tmpdirname: - filepath = os.path.join(tmpdirname, "metadata.json") - fp = open(filepath, "w") - fp.write(json.dumps(policy)) - fp.close() - admin_policy_remove(target='cuahsi', name=user.username) - admin_policy_create(target='cuahsi', name=user.username, file=filepath) - else: - admin_policy_remove(target='cuahsi', name=user.username) -''' - -import copy -from typing import Dict - -def bucket_name(resource_id: str): - # raccess = ResourceAccess.objects.filter(resource__short_id=resource_id).first() - # return raccess.owners.first().username - return "subsetter-outputs" +def refresh_minio_policy(user, policy): + logger.info(json.dumps(policy, indent=2)) + with tempfile.TemporaryDirectory() as tmpdirname: + filepath = os.path.join(tmpdirname, "metadata.json") + fp = open(filepath, "w") + fp.write(json.dumps(policy)) + fp.close() + admin_policy_create(target='cuahsi', name=user.username, file=filepath) + return policy def create_view_statements(user, views: Dict[str, list[str]]) -> list: @@ -67,10 +52,7 @@ def create_view_statements(user, views: Dict[str, list[str]]) -> list: } get_resources = [f"arn:aws:s3:::{user.username}/*"] - view_statement = copy.deepcopy(view_statement_template_listing) - view_statement["Resource"] = [f"arn:aws:s3:::{user.username}"] - del view_statement["Condition"] - list_statements = [view_statement] + list_statements = [] for bucket_owner, resource_paths in views.items(): get_resources = get_resources + [ f"arn:aws:s3:::{bucket_owner}/{resource_path}/*" for resource_path in resource_paths @@ -90,11 +72,15 @@ def create_edit_statements(user, edits: Dict[str, list[str]]) -> list: resources = [] for bucket_owner, resource_paths in edits.items(): resources = resources + [f"arn:aws:s3:::{bucket_owner}/{resource_path}/*" for resource_path in resource_paths] - edit_statement_template["Resource"] = resources - return [edit_statement_template] + if resources: + edit_statement_template["Resource"] = resources + return [edit_statement_template] + else: + return [] def minio_policy(user, owners: Dict[str, list[str]], edits: Dict[str, list[str]], views: Dict[str, list[str]]): - statements = create_view_statements(user, views) + statements = [{"Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": [f"arn:aws:s3:::{user.username}"]}] + statements = statements + create_view_statements(user, views) statements = statements + create_edit_statements(user, edits) return {"Version": "2012-10-17", "Statement": statements} diff --git a/app/api/subsetter/app/routers/access_control/router.py b/app/api/subsetter/app/routers/access_control/router.py index 19aa9089..d0c310fe 100644 --- a/app/api/subsetter/app/routers/access_control/router.py +++ b/app/api/subsetter/app/routers/access_control/router.py @@ -11,6 +11,8 @@ from subsetter.app.routers.access_control.policy_generation import minio_policy from subsetter.app.users import current_active_user +from .policy_generation import refresh_minio_policy + router = APIRouter() @@ -76,22 +78,7 @@ async def refresh_profile(user: User = Depends(current_active_user)): return user -def admin_policy_create(name, policy, target="cuahsi"): - with tempfile.TemporaryDirectory() as tmpdirname: - print(policy) - filepath = os.path.join(tmpdirname, "metadata.json") - fp = open(filepath, "w") - fp.write(json.dumps(policy)) - fp.close() - arguments = ['mc', '--json', 'admin', 'policy', 'create', target, name, filepath] - try: - _output = subprocess.check_output(arguments) - except subprocess.CalledProcessError as e: - raise - - @router.get('/policy/minio/cuahsi') async def generate_and_save_user_policy(user: User = Depends(current_active_user)): user_policy = await generate_user_policy(user) - admin_policy_create(user.username, user_policy) - return user_policy + return refresh_minio_policy(user, user_policy) diff --git a/app/api/subsetter/app/routers/argo/router.py b/app/api/subsetter/app/routers/argo/router.py index e215227c..2b8a8da9 100644 --- a/app/api/subsetter/app/routers/argo/router.py +++ b/app/api/subsetter/app/routers/argo/router.py @@ -18,6 +18,7 @@ ) from subsetter.app.users import current_active_user from subsetter.config import get_minio_client, get_settings + from .transformer import transform_latlon if get_settings().cloud_run: @@ -43,8 +44,9 @@ def parflow_submission_body(hucs: list, username: str, workflow_name: str): "name": workflow_name, "parameters": [ f"output-path=argo_workflows/parflow/{workflow_name}", - f"output-bucket={username}", - "hucs=" + ",".join(hucs)], + f"output-bucket={username}", + "hucs=" + ",".join(hucs), + ], }, } @@ -123,7 +125,7 @@ async def submit_parflow( async def submit_nwm1( y_south: float, x_west: float, y_north: float, x_east: float, user: User = Depends(current_active_user) ) -> SubmissionResponseModel: - #y_south, x_west, y_north, x_east = transform_latlon(y_south, x_west, y_north, x_east) + # y_south, x_west, y_north, x_east = transform_latlon(y_south, x_west, y_north, x_east) workflow_id = str(uuid.uuid4()) api_response = api_instance.submit_workflow( namespace=get_settings().argo_namespace, @@ -139,7 +141,7 @@ async def submit_nwm1( async def submit_nwm2( y_south: float, x_west: float, y_north: float, x_east: float, user: User = Depends(current_active_user) ) -> SubmissionResponseModel: - #y_south, x_west, y_north, x_east = transform_latlon(y_south, x_west, y_north, x_east) + # y_south, x_west, y_north, x_east = transform_latlon(y_south, x_west, y_north, x_east) workflow_id = str(uuid.uuid4()) api_response = api_instance.submit_workflow( namespace=get_settings().argo_namespace, From eabde4a529ac14ffe741decee57fe36a2197fa11 Mon Sep 17 00:00:00 2001 From: Scott Black Date: Sat, 6 Jan 2024 22:03:46 -0700 Subject: [PATCH 3/8] clean up view/edit statements generation --- .../access_control/policy_generation.py | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/app/api/subsetter/app/routers/access_control/policy_generation.py b/app/api/subsetter/app/routers/access_control/policy_generation.py index 5d757e08..30456909 100644 --- a/app/api/subsetter/app/routers/access_control/policy_generation.py +++ b/app/api/subsetter/app/routers/access_control/policy_generation.py @@ -39,9 +39,14 @@ def refresh_minio_policy(user, policy): def create_view_statements(user, views: Dict[str, list[str]]) -> list: - view_statement_template_get = { + view_statement_template_get_object = { "Effect": "Allow", - "Action": ["s3:GetBucketLocation", "s3:GetObject"], + "Action": ["s3:GetObject"], + "Resource": [], + } + view_statement_template_get_bucket = { + "Effect": "Allow", + "Action": ["s3:GetBucketLocation"], "Resource": [], } view_statement_template_listing = { @@ -51,25 +56,28 @@ def create_view_statements(user, views: Dict[str, list[str]]) -> list: "Condition": {"StringLike": {"s3:prefix": []}}, } - get_resources = [f"arn:aws:s3:::{user.username}/*"] + get_objectt_resources = [] + get_bucket_resources = [] list_statements = [] for bucket_owner, resource_paths in views.items(): - get_resources = get_resources + [ + get_objectt_resources = get_objectt_resources + [ f"arn:aws:s3:::{bucket_owner}/{resource_path}/*" for resource_path in resource_paths ] + get_bucket_resources.append(f"arn:aws:s3:::{bucket_owner}") view_statement = copy.deepcopy(view_statement_template_listing) view_statement["Resource"] = [f"arn:aws:s3:::{bucket_owner}"] view_statement["Condition"]["StringLike"]["s3:prefix"] = [ f"{resource_path}/*" for resource_path in resource_paths ] list_statements.append(view_statement) - view_statement_template_get["Resource"] = get_resources - return list_statements + [view_statement_template_get] + view_statement_template_get_object["Resource"] = get_objectt_resources + view_statement_template_get_bucket["Resource"] = get_bucket_resources + return list_statements + [view_statement_template_get_object] def create_edit_statements(user, edits: Dict[str, list[str]]) -> list: edit_statement_template = {"Effect": "Allow", "Action": ["s3:*"], "Resource": []} - resources = [] + resources = [f"arn:aws:s3:::{user.username}/*"] for bucket_owner, resource_paths in edits.items(): resources = resources + [f"arn:aws:s3:::{bucket_owner}/{resource_path}/*" for resource_path in resource_paths] if resources: @@ -80,7 +88,6 @@ def create_edit_statements(user, edits: Dict[str, list[str]]) -> list: def minio_policy(user, owners: Dict[str, list[str]], edits: Dict[str, list[str]], views: Dict[str, list[str]]): - statements = [{"Effect": "Allow", "Action": ["s3:ListBucket"], "Resource": [f"arn:aws:s3:::{user.username}"]}] - statements = statements + create_view_statements(user, views) + statements = create_view_statements(user, views) statements = statements + create_edit_statements(user, edits) return {"Version": "2012-10-17", "Statement": statements} From b326127d401ebb85c86d52e84add604fc63aad61 Mon Sep 17 00:00:00 2001 From: Scott Black Date: Sat, 6 Jan 2024 23:01:23 -0700 Subject: [PATCH 4/8] improve the edit by path statements --- .../access_control/policy_generation.py | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/app/api/subsetter/app/routers/access_control/policy_generation.py b/app/api/subsetter/app/routers/access_control/policy_generation.py index 30456909..19e6b848 100644 --- a/app/api/subsetter/app/routers/access_control/policy_generation.py +++ b/app/api/subsetter/app/routers/access_control/policy_generation.py @@ -39,6 +39,8 @@ def refresh_minio_policy(user, policy): def create_view_statements(user, views: Dict[str, list[str]]) -> list: + if not views: + return [] view_statement_template_get_object = { "Effect": "Allow", "Action": ["s3:GetObject"], @@ -76,15 +78,29 @@ def create_view_statements(user, views: Dict[str, list[str]]) -> list: def create_edit_statements(user, edits: Dict[str, list[str]]) -> list: - edit_statement_template = {"Effect": "Allow", "Action": ["s3:*"], "Resource": []} - resources = [f"arn:aws:s3:::{user.username}/*"] + edit_all_statement = { + "Effect": "Allow", + "Action": ["s3:*"], + "Resource": [f"arn:aws:s3:::{user.username}"], + } + edit_paths_resources = [] for bucket_owner, resource_paths in edits.items(): - resources = resources + [f"arn:aws:s3:::{bucket_owner}/{resource_path}/*" for resource_path in resource_paths] - if resources: - edit_statement_template["Resource"] = resources - return [edit_statement_template] - else: - return [] + edit_paths_resources = edit_paths_resources + [ + f"arn:aws:s3:::{bucket_owner}/{resource_path}/*" for resource_path in resource_paths + ] + + edit_paths_statement = { + "Effect": "Allow", + "Action": ["s3:*Object"], + "Resource": edit_paths_resources, + } + list_bucket_statement = { + "Effect": "Allow", + "Action": ["s3:ListBucket"], + "Resource": [f"arn:aws:s3:::{bucket_owner}" for bucket_owner, _ in edits.items()], + } + statements = [edit_all_statement, edit_paths_statement, list_bucket_statement] + return [statement for statement in statements if statement] def minio_policy(user, owners: Dict[str, list[str]], edits: Dict[str, list[str]], views: Dict[str, list[str]]): From 89436835f8f68db312b67f23f0726eb6cde5c493 Mon Sep 17 00:00:00 2001 From: Scott Black Date: Sat, 6 Jan 2024 23:41:08 -0700 Subject: [PATCH 5/8] add presigned put url creation --- .../subsetter/app/routers/storage/router.py | 34 ++++--------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/app/api/subsetter/app/routers/storage/router.py b/app/api/subsetter/app/routers/storage/router.py index c8ef717f..640ab12d 100644 --- a/app/api/subsetter/app/routers/storage/router.py +++ b/app/api/subsetter/app/routers/storage/router.py @@ -7,7 +7,7 @@ @router.get('/presigned/get/{workflow_id}', description="Create a download url") -async def presigned_put_minio(workflow_params: WorkflowDep): +async def presigned_get_minio(workflow_params: WorkflowDep): submission = workflow_params.user.get_submission(workflow_params.workflow_id) url = get_minio_client().presigned_get_object( workflow_params.user.username, f"argo_workflows/{submission.workflow_name}/{submission.workflow_id}/all.gz" @@ -15,33 +15,11 @@ async def presigned_put_minio(workflow_params: WorkflowDep): return {'url': url} -''' -@router.post('/bucket/create') -async def create_user_bucket( - user: User = Depends(current_active_user), description="Creates a bucket named with the username" -): - if not get_minio_client().bucket_exists(user.username): - get_minio_client().make_bucket(user.username) - -@router.get('/upload/{user_name}/workflow/{workflow_id}') -async def share_workflow_with_user( - user_name: str, workflow_params: WorkflowDep, user: User = Depends(current_active_user) -): - submission: Submission = workflow_params.submission - submission.add_user(user_name) - await user.update_submission(submission) - return User.get(user.document_id) - - -@router.get('/remove/{user_name}/workflow/{workflow_id}') -async def share_workflow_with_user( - user_name: str, workflow_params: WorkflowDep, user: User = Depends(current_active_user) -): - submission: Submission = workflow_params.submission - submission.remove_user(user_name) - await user.update_submission(submission) - return User.get(document_id=user.document_id) -''' +@router.get('/presigned/put/{bucket}', description="Create a PUT file presigned url") +async def presigned_put_minio(bucket: str, path: str): + url = get_minio_client().presigned_put_object(bucket, path) + return {'url': url} + # @router.post('/extract/{workflow_id}') # async def extract_workflow_artifact(workflow_params: WorkflowDep) -> SubmissionResponseModel: From 1e1d493f8de5ba7da010933318ceda093033b25c Mon Sep 17 00:00:00 2001 From: Scott Black Date: Sun, 7 Jan 2024 12:21:06 -0700 Subject: [PATCH 6/8] refactor for use of metadata extractor template ref --- app/argo/huc-to-shp.yml | 22 ++++++++--- app/argo/metadata-extraction.yml | 17 +++++--- app/argo/nwm1-subset-minio.yml | 60 ++++++---------------------- app/argo/nwm2-subset-minio.yml | 64 ++++++------------------------ app/argo/parflow-subset-minio.yml | 66 +++++++------------------------ 5 files changed, 65 insertions(+), 164 deletions(-) diff --git a/app/argo/huc-to-shp.yml b/app/argo/huc-to-shp.yml index ea33b056..fce28074 100644 --- a/app/argo/huc-to-shp.yml +++ b/app/argo/huc-to-shp.yml @@ -1,36 +1,48 @@ metadata: - generateName: huc-to-shp- + name: huc-to-shp namespace: workflows spec: templates: - name: huc-to-shp + inputs: + parameters: + - name: output-bucket + value: '{{workflow.parameters.output-bucket}}' + - name: output-path + value: '{{workflow.parameters.output-path}}' + - name: hucs + value: '{{workflow.parameters.hucs}}' outputs: artifacts: - name: shape-results path: /output + archive: + none: {} s3: endpoint: api.minio.cuahsi.io - bucket: subsetter-outputs + bucket: '{{inputs.parameters.output-bucket}}' accessKeySecret: name: minio-credentials key: accessKey secretKeySecret: name: minio-credentials key: secretKey - key: '{{workflow.parameters.output_path}}/shape.gz' + key: '{{inputs.parameters.output-path}}/shape' metadata: {} container: name: '' image: us-central1-docker.pkg.dev/apps-320517/subsetter/huc-to-shp:0.0.4 args: - - '{{workflow.parameters.hucs}}' + - '{{inputs.parameters.hucs}}' - /output/watershed.shp resources: {} entrypoint: huc-to-shp arguments: parameters: - - name: output_path + - name: output-path value: qwerty + - name: output-bucket + value: subsetter-outputs - name: hucs value: 102600070103,102600070104,102600090205,102600050206,102600070102 templateDefaults: diff --git a/app/argo/metadata-extraction.yml b/app/argo/metadata-extraction.yml index a26dc507..f8e1c058 100644 --- a/app/argo/metadata-extraction.yml +++ b/app/argo/metadata-extraction.yml @@ -8,8 +8,10 @@ spec: parameters: - name: bucket value: '{{workflow.parameters.bucket}}' - - name: path - value: '{{workflow.parameters.path}}' + - name: input-path + value: '{{workflow.parameters.input-path}}' + - name: output-path + value: '{{workflow.parameters.output-path}}' artifacts: - name: input-data path: /tmp/input-data @@ -22,11 +24,13 @@ spec: secretKeySecret: name: minio-credentials key: secretKey - key: '{{inputs.parameters.path}}' + key: '{{inputs.parameters.input-path}}' outputs: artifacts: - name: output-metadata path: /tmp/input-data + archive: + none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.bucket}}' @@ -36,11 +40,11 @@ spec: secretKeySecret: name: minio-credentials key: secretKey - key: '{{inputs.parameters.path}}' + key: '{{inputs.parameters.output-path}}' metadata: {} container: name: '' - image: scootna/hsextract:0.1 + image: scootna/hsextract:0.3 command: - /bin/sh - '-c' @@ -52,7 +56,8 @@ spec: arguments: parameters: - name: bucket - - name: path + - name: input-path + - name: output-path templateDefaults: inputs: {} outputs: {} diff --git a/app/argo/nwm1-subset-minio.yml b/app/argo/nwm1-subset-minio.yml index 1f33e25e..c341a201 100644 --- a/app/argo/nwm1-subset-minio.yml +++ b/app/argo/nwm1-subset-minio.yml @@ -24,14 +24,18 @@ spec: value: '{{workflow.parameters.x_east}}' - name: output-bucket value: '{{workflow.parameters.output-bucket}}' - - - name: metadata-extractor - template: metadata-extractor-template + - - name: metadata-extraction + templateRef: + name: metadata-extractor + template: metadata-extractor arguments: parameters: + - name: bucket + value: '{{workflow.parameters.output-bucket}}' + - name: input-path + value: '{{workflow.parameters.output-path}}' - name: output-path value: '{{workflow.parameters.output-path}}' - - name: output-bucket - value: '{{workflow.parameters.output-bucket}}' - name: nwm1-subset-template inputs: parameters: @@ -58,6 +62,8 @@ spec: artifacts: - name: subset-results path: /srv/output + archive: + none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.output-bucket}}' @@ -67,7 +73,7 @@ spec: secretKeySecret: name: minio-credentials key: secretKey - key: '{{inputs.parameters.output-path}}/subset.gz' + key: '{{inputs.parameters.output-path}}' metadata: {} container: name: '' @@ -84,50 +90,6 @@ spec: - /srv/domain - /srv/output resources: {} - - name: metadata-extractor-template - inputs: - parameters: - - name: output-path - - name: output-bucket - artifacts: - - name: extractor-input - path: /tmp/subset-data - s3: - endpoint: api.minio.cuahsi.io - bucket: '{{inputs.parameters.output-bucket}}' - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.output-path}}/subset.gz' - outputs: - artifacts: - - name: extractor-output - path: /tmp/subset-data - s3: - endpoint: api.minio.cuahsi.io - bucket: '{{inputs.parameters.output-bucket}}' - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.output-path}}/all.gz' - metadata: {} - container: - name: '' - image: scootna/hsextract:0.1 - command: - - /bin/sh - - '-c' - args: - - >- - python3 hsextract/main.py extract /tmp/subset-data && mv - /tmp/subset-data/.hs /tmp/subset-data/hs - resources: {} entrypoint: nwm1-subset-with-extraction-template arguments: parameters: diff --git a/app/argo/nwm2-subset-minio.yml b/app/argo/nwm2-subset-minio.yml index 32f08bd6..e25940e1 100644 --- a/app/argo/nwm2-subset-minio.yml +++ b/app/argo/nwm2-subset-minio.yml @@ -24,14 +24,18 @@ spec: value: '{{workflow.parameters.output-path}}' - name: output-bucket value: '{{workflow.parameters.output-bucket}}' - - - name: metadata-extractor - template: metadata-extractor-template + - - name: metadata-extraction + templateRef: + name: metadata-extractor + template: metadata-extractor arguments: parameters: + - name: bucket + value: '{{workflow.parameters.output-bucket}}' + - name: input-path + value: '{{workflow.parameters.output-path}}' - name: output-path value: '{{workflow.parameters.output-path}}' - - name: output-bucket - value: '{{workflow.parameters.output-bucket}}' - name: nwm2-subset-template inputs: parameters: @@ -58,6 +62,8 @@ spec: artifacts: - name: subset-results path: /srv/output + archive: + none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.output-bucket}}' @@ -67,7 +73,7 @@ spec: secretKeySecret: name: minio-credentials key: secretKey - key: '{{inputs.parameters.output-path}}/subset.gz' + key: '{{inputs.parameters.output-path}}' metadata: {} container: name: '' @@ -88,54 +94,6 @@ spec: cpu: '1' ephemeral-storage: 2Gi memory: 1Gi - - name: metadata-extractor-template - inputs: - parameters: - - name: output-path - - name: output-bucket - artifacts: - - name: extractor-input - path: /tmp/subset-data - s3: - endpoint: api.minio.cuahsi.io - bucket: '{{inputs.parameters.output-bucket}}' - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.output-path}}/subset.gz' - outputs: - artifacts: - - name: extractor-output - path: /tmp/subset-data - s3: - endpoint: api.minio.cuahsi.io - bucket: '{{inputs.parameters.output-bucket}}' - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.output-path}}/all.gz' - metadata: {} - container: - name: '' - image: scootna/hsextract:0.1 - command: - - /bin/sh - - '-c' - args: - - >- - python3 hsextract/main.py extract /tmp/subset-data && mv - /tmp/subset-data/.hs /tmp/subset-data/hs - resources: - requests: - cpu: '1' - ephemeral-storage: 2Gi - memory: 1Gi entrypoint: nwm2-subset-with-extraction-template arguments: parameters: diff --git a/app/argo/parflow-subset-minio.yml b/app/argo/parflow-subset-minio.yml index b1a0b77b..6898020e 100644 --- a/app/argo/parflow-subset-minio.yml +++ b/app/argo/parflow-subset-minio.yml @@ -28,14 +28,18 @@ spec: value: '{{workflow.parameters.output-path}}' - name: output-bucket value: '{{workflow.parameters.output-bucket}}' - - - name: metadata-extractor - template: metadata-extractor + - - name: metadata-extraction + templateRef: + name: metadata-extractor + template: metadata-extractor arguments: parameters: + - name: bucket + value: '{{workflow.parameters.output-bucket}}' + - name: input-path + value: '{{workflow.parameters.output-path}}' - name: output-path value: '{{workflow.parameters.output-path}}' - - name: output-bucket - value: '{{workflow.parameters.output-bucket}}' - name: huc-to-shp inputs: parameters: @@ -46,6 +50,8 @@ spec: artifacts: - name: shape-results path: /output + archive: + none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.output-bucket}}' @@ -55,7 +61,7 @@ spec: secretKeySecret: name: minio-credentials key: secretKey - key: '{{inputs.parameters.output-path}}/shape.gz' + key: '{{inputs.parameters.output-path}}' metadata: {} container: name: '' @@ -94,11 +100,13 @@ spec: secretKeySecret: name: minio-credentials key: secretKey - key: '{{inputs.parameters.output-path}}/shape.gz' + key: '{{inputs.parameters.output-path}}' outputs: artifacts: - name: subset-results path: /srv/shape + archive: + none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.output-bucket}}' @@ -108,7 +116,7 @@ spec: secretKeySecret: name: minio-credentials key: secretKey - key: '{{inputs.parameters.output-path}}/subset.gz' + key: '{{inputs.parameters.output-path}}' metadata: {} container: name: '' @@ -131,50 +139,6 @@ spec: cpu: '1' ephemeral-storage: 1Gi memory: 1Gi - - name: metadata-extractor - inputs: - parameters: - - name: output-path - - name: output-bucket - artifacts: - - name: extractor-input - path: /tmp/subset-data - s3: - endpoint: api.minio.cuahsi.io - bucket: '{{inputs.parameters.output-bucket}}' - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.output-path}}/subset.gz' - outputs: - artifacts: - - name: extractor-output - path: /tmp/subset-data - s3: - endpoint: api.minio.cuahsi.io - bucket: '{{inputs.parameters.output-bucket}}' - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.output-path}}/all.gz' - metadata: {} - container: - name: '' - image: scootna/hsextract:0.1 - command: - - /bin/sh - - '-c' - args: - - >- - python3 hsextract/main.py extract /tmp/subset-data && mv - /tmp/subset-data/.hs /tmp/subset-data/hs - resources: {} entrypoint: parflow-subset-huc arguments: parameters: From 3c7340605fb7ad89f46b191b9434e21507e84eed Mon Sep 17 00:00:00 2001 From: Scott Black Date: Sun, 7 Jan 2024 15:38:29 -0700 Subject: [PATCH 7/8] pull in hsextract build with feature root folder bug fix --- app/argo/metadata-extraction.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/argo/metadata-extraction.yml b/app/argo/metadata-extraction.yml index f8e1c058..288ce754 100644 --- a/app/argo/metadata-extraction.yml +++ b/app/argo/metadata-extraction.yml @@ -44,7 +44,7 @@ spec: metadata: {} container: name: '' - image: scootna/hsextract:0.3 + image: scootna/hsextract:0.4.1 command: - /bin/sh - '-c' From b8db299e986002e0d1df7c08ab5e800442b9c488 Mon Sep 17 00:00:00 2001 From: Scott Black Date: Sun, 7 Jan 2024 16:43:33 -0700 Subject: [PATCH 8/8] compress subsetter outputs --- .../subsetter/app/routers/storage/router.py | 2 +- app/argo/metadata-extraction.yml | 8 ++--- app/argo/nwm1-subset-minio.yml | 2 -- app/argo/nwm2-subset-minio.yml | 2 -- app/argo/parflow-subset-minio.yml | 36 ++----------------- 5 files changed, 7 insertions(+), 43 deletions(-) diff --git a/app/api/subsetter/app/routers/storage/router.py b/app/api/subsetter/app/routers/storage/router.py index 640ab12d..316f8dbf 100644 --- a/app/api/subsetter/app/routers/storage/router.py +++ b/app/api/subsetter/app/routers/storage/router.py @@ -10,7 +10,7 @@ async def presigned_get_minio(workflow_params: WorkflowDep): submission = workflow_params.user.get_submission(workflow_params.workflow_id) url = get_minio_client().presigned_get_object( - workflow_params.user.username, f"argo_workflows/{submission.workflow_name}/{submission.workflow_id}/all.gz" + workflow_params.user.username, f"argo_workflows/{submission.workflow_name}/{submission.workflow_id}" ) return {'url': url} diff --git a/app/argo/metadata-extraction.yml b/app/argo/metadata-extraction.yml index 288ce754..8b731c2b 100644 --- a/app/argo/metadata-extraction.yml +++ b/app/argo/metadata-extraction.yml @@ -14,7 +14,7 @@ spec: value: '{{workflow.parameters.output-path}}' artifacts: - name: input-data - path: /tmp/input-data + path: /tmp/resource s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.bucket}}' @@ -28,9 +28,7 @@ spec: outputs: artifacts: - name: output-metadata - path: /tmp/input-data - archive: - none: {} + path: /tmp/resource s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.bucket}}' @@ -50,7 +48,7 @@ spec: - '-c' args: - >- - python3 hsextract/main.py extract /tmp/input-data + python3 hsextract/main.py extract /tmp/resource resources: {} entrypoint: metadata-extractor arguments: diff --git a/app/argo/nwm1-subset-minio.yml b/app/argo/nwm1-subset-minio.yml index c341a201..2bbd96b5 100644 --- a/app/argo/nwm1-subset-minio.yml +++ b/app/argo/nwm1-subset-minio.yml @@ -62,8 +62,6 @@ spec: artifacts: - name: subset-results path: /srv/output - archive: - none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.output-bucket}}' diff --git a/app/argo/nwm2-subset-minio.yml b/app/argo/nwm2-subset-minio.yml index e25940e1..701794d4 100644 --- a/app/argo/nwm2-subset-minio.yml +++ b/app/argo/nwm2-subset-minio.yml @@ -62,8 +62,6 @@ spec: artifacts: - name: subset-results path: /srv/output - archive: - none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.output-bucket}}' diff --git a/app/argo/parflow-subset-minio.yml b/app/argo/parflow-subset-minio.yml index 6898020e..0d544aa6 100644 --- a/app/argo/parflow-subset-minio.yml +++ b/app/argo/parflow-subset-minio.yml @@ -9,7 +9,9 @@ spec: metadata: {} steps: - - name: huc-to-shp - template: huc-to-shp + templateRef: + name: huc-to-shp + template: huc-to-shp arguments: parameters: - name: output-path @@ -40,36 +42,6 @@ spec: value: '{{workflow.parameters.output-path}}' - name: output-path value: '{{workflow.parameters.output-path}}' - - name: huc-to-shp - inputs: - parameters: - - name: output-path - - name: output-bucket - - name: hucs - outputs: - artifacts: - - name: shape-results - path: /output - archive: - none: {} - s3: - endpoint: api.minio.cuahsi.io - bucket: '{{inputs.parameters.output-bucket}}' - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.output-path}}' - metadata: {} - container: - name: '' - image: us-central1-docker.pkg.dev/apps-320517/subsetter/huc-to-shp:0.0.4 - args: - - '{{inputs.parameters.hucs}}' - - /output/watershed.shp - resources: {} - name: parflow-subset-v1 inputs: parameters: @@ -105,8 +77,6 @@ spec: artifacts: - name: subset-results path: /srv/shape - archive: - none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.output-bucket}}'