diff --git a/app/api/README.md b/app/api/README.md new file mode 100644 index 00000000..667ff7c8 --- /dev/null +++ b/app/api/README.md @@ -0,0 +1,28 @@ +# Subsetter FastAPI + +A python FastAPI application that can submit the subsetter workflow templates to an argo instance updated with the subsetter workflow templates at `./argo/`. + +The Dockerfile declares a python base image and installs the dependencies declared in `requirements.txt` and `requirements-dev.txt` and starts up the FastApi application at port 8000. + +`subsetter/main.py` is the entrypoint to the FastAPI application, configures the routers. The file also contains a startup event hook that initialized the mongodb database with [beanie ODM](https://beanie-odm.dev/). The startup event hook also sets up a minio client for the [CUAHSI MinIO instance](https://console.minio.cuahsi.io). The minio client is used for synchronizing user specific access policies and keys/secrets. + +API documentation is rendered at https://subsetter-api-jbzfw6l52q-uc.a.run.app/redoc (This will be updated to https://api.subsetter.cuahsi.io/redocs pending certificate creation). OpenAPI spec documentation is generated from the code defining the api endpoints (FastAPI) and input/output models (Pydantic). + +User authentication is achieved by configuring the [fastapi_users](https://github.com/fastapi-users/fastapi-users) module with [CUAHSI SSO](https://auth.cuahsi.org/) using the `OpenID Connect` protocol. On registration a S3 bucket is created for the user on [CUAHSI MinIO](https://console.minio.cuahsi.io) (TODO: create a default quota of 5 GB). An admin may increase the quota on a case by case basis. + +The Subsetter API is divided into 4 routers defined at `subsetter/app/routers/`. + +## Routers +### Access Control Router +The `access_control` router contains prototyped synchronization of view/edit access to paths on MinIO that have a HydroShare resource that references a path on the CUAHSI MinIO instance. In the [mongo_discovery-access-control](https://github.com/hydroshare/hydroshare/compare/develop...mongo-discovery-access-control) HydroShare branch, event hooks are created for exporting Resource and User access to a mongo database. This mongo database is accessed to look up the resources which a user has view/edit privileges and generates the view/edit policies that are assigned to the user on CUAHSI MinIO storage. This means a path in a user's bucket may be registered on HydroShare and enjoy the same access control capabilities of a HydroShare Composite Resource. + +### Argo Router +Contains the api endpoints for submitting a subsetter workflow, tracking submissions, and generating a presigned download url to the resulting datasets. + +### Discovery Router +A copy of the IGUIDE discovery router that includes endpoints for searching resource metadata. The Subsetter workflows run the hydroshare metadata extraction tool to extract metadata the same metadata that a HydroShare composite resource will extract from recognized file formats. The resulting metadata can then be written to the Discovery database on Atlas. TODO: collect the metadata extracted from subsetter outputs into a discovery database. + +### Storage Router +Contins the endpoints to generate presigned urls for PUT and GET of objects on S3. This is not currently used but could be used to create a resource landing page for resources stored on S3 equivalent to a resource on HydroShare. + + diff --git a/app/api/subsetter/app/db.py b/app/api/subsetter/app/db.py index c373b07d..d0086574 100644 --- a/app/api/subsetter/app/db.py +++ b/app/api/subsetter/app/db.py @@ -40,6 +40,9 @@ class Submission(BaseModel): finishedAt: Optional[str] = None estimatedDuration: Optional[int] = None + def output_path(self, base_path): + return f"{base_path}/{self.workflow_name}/{self.workflow_id}" + class User(BeanieBaseUser, Document): oauth_accounts: List[OAuthAccount] = Field(default_factory=list) diff --git a/app/api/subsetter/app/routers/argo/__init__.py b/app/api/subsetter/app/routers/argo/__init__.py index 346b1425..23780433 100644 --- a/app/api/subsetter/app/routers/argo/__init__.py +++ b/app/api/subsetter/app/routers/argo/__init__.py @@ -1 +1 @@ -from subsetter.app.routers.argo.router import router +from .router import router diff --git a/app/api/subsetter/app/routers/argo/router.py b/app/api/subsetter/app/routers/argo/router.py index c36701a2..0469514c 100644 --- a/app/api/subsetter/app/routers/argo/router.py +++ b/app/api/subsetter/app/routers/argo/router.py @@ -1,12 +1,14 @@ import json import logging as log +import tempfile import uuid -from typing import Annotated +from typing import Annotated, Any import argo_workflows import google.cloud.logging as logging from argo_workflows.api import workflow_service_api from fastapi import APIRouter, Depends, Query +from pydantic import BaseModel from subsetter.app.db import Submission, User from subsetter.app.models import ( @@ -28,6 +30,7 @@ router = APIRouter() NAMESPACE = 'workflows' +OUTPUT_BASE_PATH = "argo_workflows" configuration = argo_workflows.Configuration(host=get_settings().argo_host) configuration.api_key['BearerToken'] = get_settings().argo_bearer_token @@ -36,14 +39,14 @@ api_instance = workflow_service_api.WorkflowServiceApi(api_client) -def parflow_submission_body(hucs: list, bucket_name: str, workflow_name: str): +def parflow_submission_body(hucs: list, bucket_name: str, workflow_name: str, output_path): return { "resourceKind": "WorkflowTemplate", "resourceName": "parflow-subset-v1-by-huc-minio", "submitOptions": { "name": workflow_name, "parameters": [ - f"output-path=argo_workflows/parflow/{workflow_name}", + f"output-path={output_path}", f"output-bucket={bucket_name}", "hucs=" + ",".join(hucs), ], @@ -52,7 +55,7 @@ def parflow_submission_body(hucs: list, bucket_name: str, workflow_name: str): def nwm1_submission_body( - y_south: float, x_west: float, y_north: float, x_east: float, bucket_name: str, workflow_name: str + y_south: float, x_west: float, y_north: float, x_east: float, bucket_name: str, workflow_name: str, output_path: str ): return { "resourceKind": "WorkflowTemplate", @@ -61,7 +64,7 @@ def nwm1_submission_body( "name": workflow_name, "parameters": [ f"output-bucket={bucket_name}", - f"output-path=argo_workflows/nwm1/{workflow_name}", + f"output-path={output_path}", f"y_south={y_south}", f"x_west={x_west}", f"y_north={y_north}", @@ -72,7 +75,7 @@ def nwm1_submission_body( def nwm2_submission_body( - y_south: float, x_west: float, y_north: float, x_east: float, bucket_name: str, workflow_name: str + y_south: float, x_west: float, y_north: float, x_east: float, bucket_name: str, workflow_name: str, output_path: str ): return { "resourceKind": "WorkflowTemplate", @@ -81,7 +84,7 @@ def nwm2_submission_body( "name": workflow_name, "parameters": [ f"output-bucket={bucket_name}", - f"output-path=argo_workflows/nwm2/{workflow_name}", + f"output-path={output_path}", f"y_south={y_south}", f"x_west={x_west}", f"y_north={y_north}", @@ -91,17 +94,12 @@ def nwm2_submission_body( } -def metadata_extraction_submission_body(bucket_key: str, path_key: str, workflow_name: str): +def metadata_extraction_submission_body(bucket: str, input_path: str, output_path: str): return { "resourceKind": "WorkflowTemplate", "resourceName": "metadata-extractor", "submitOptions": { - "name": workflow_name, - "parameters": [ - f"job-id={workflow_name}", - f"bucket={bucket_key}", - f"path={path_key}", - ], + "parameters": [f"bucket={bucket}", f"input-path={input_path}", f"output-path={output_path}"], }, } @@ -111,13 +109,13 @@ async def submit_parflow( hucs: Annotated[list[str] | None, Query()], user: User = Depends(current_active_user) ) -> SubmissionResponseModel: workflow_id = str(uuid.uuid4()) + submission = Submission(workflow_id=workflow_id, workflow_name="parflow") api_response = api_instance.submit_workflow( namespace=get_settings().argo_namespace, - body=parflow_submission_body(hucs, user.bucket_name, workflow_id), + body=parflow_submission_body(hucs, user.bucket_name, workflow_id, submission.output_path(OUTPUT_BASE_PATH)), _preload_content=False, ) log.info(api_response.json()) - submission = Submission(workflow_id=workflow_id, workflow_name="parflow") return await upsert_submission(user, submission) @@ -127,13 +125,15 @@ async def submit_nwm1( ) -> SubmissionResponseModel: # y_south, x_west, y_north, x_east = transform_latlon(y_south, x_west, y_north, x_east) workflow_id = str(uuid.uuid4()) + submission = Submission(workflow_id=workflow_id, workflow_name="nwm1") api_response = api_instance.submit_workflow( namespace=get_settings().argo_namespace, - body=nwm1_submission_body(y_south, x_west, y_north, x_east, user.bucket_name, workflow_id), + body=nwm1_submission_body( + y_south, x_west, y_north, x_east, user.bucket_name, workflow_id, submission.output_path(OUTPUT_BASE_PATH) + ), _preload_content=False, ) log.info(api_response.json()) - submission = Submission(workflow_id=workflow_id, workflow_name="nwm1") return await upsert_submission(user, submission) @@ -143,16 +143,50 @@ async def submit_nwm2( ) -> SubmissionResponseModel: # y_south, x_west, y_north, x_east = transform_latlon(y_south, x_west, y_north, x_east) workflow_id = str(uuid.uuid4()) + submission = Submission(workflow_id=workflow_id, workflow_name="nwm2") api_response = api_instance.submit_workflow( namespace=get_settings().argo_namespace, - body=nwm2_submission_body(y_south, x_west, y_north, x_east, user.bucket_name, workflow_id), + body=nwm2_submission_body( + y_south, x_west, y_north, x_east, user.bucket_name, workflow_id, submission.output_path(OUTPUT_BASE_PATH) + ), _preload_content=False, ) log.info(api_response.json()) - submission = Submission(workflow_id=workflow_id, workflow_name="nwm2") return await upsert_submission(user, submission) +class ExtractMetadataRequestBody(BaseModel): + workflow_id: str + metadata: Any = None + + +@router.post('/extract/metadata') +async def extract_metadata(metadata_request: ExtractMetadataRequestBody, user: User = Depends(current_active_user)): + submission = next( + submission for submission in user.submissions if submission.workflow_id == metadata_request.workflow_id + ) + if not submission: + raise Exception(f"No Submission found for id {metadata_request.workflow_id}") + if metadata_request.metadata: + with tempfile.NamedTemporaryFile(delete=False) as fp: + metadata_json_str = json.dumps(metadata_request.metadata) + fp.write(str.encode(metadata_json_str)) + fp.close() + get_minio_client().fput_object( + user.bucket_name, f"{submission.output_path(OUTPUT_BASE_PATH)}/hs_user_meta.json", fp.name + ) + + api_response = api_instance.submit_workflow( + namespace=get_settings().argo_namespace, + body=metadata_extraction_submission_body( + user.bucket_name, + submission.output_path(OUTPUT_BASE_PATH), + f"{submission.output_path(OUTPUT_BASE_PATH)}_hs_metadata.tgz", + ), + _preload_content=False, + ) + + async def upsert_submission(user: User, submission: Submission) -> Submission: api_response = api_instance.get_workflow( namespace=get_settings().argo_namespace, name=submission.workflow_id, _preload_content=False @@ -214,20 +248,9 @@ async def logs(workflow_params: WorkflowDep) -> LogsResponseModel: log_options_container="main", _preload_content=False, ) - log.info(api_response.json()) return {"logs": parse_logs(api_response)} -@router.get('/url/{workflow_id}', description="Create a download url") -async def signed_url_minio(workflow_params: WorkflowDep) -> UrlResponseModel: - submission = workflow_params.user.get_submission(workflow_params.workflow_id) - url = get_minio_client().presigned_get_object( - "subsetter-outputs", - f"{workflow_params.user.bucket_name}/{submission.workflow_name}/{submission.workflow_id}/all.gz", - ) - return {'url': url} - - @router.get('/argo/{workflow_id}') async def argo_metadata(workflow_params: WorkflowDep): api_response = api_instance.get_workflow( diff --git a/app/api/subsetter/app/routers/discovery/__init__.py b/app/api/subsetter/app/routers/discovery/__init__.py index e69de29b..23780433 100644 --- a/app/api/subsetter/app/routers/discovery/__init__.py +++ b/app/api/subsetter/app/routers/discovery/__init__.py @@ -0,0 +1 @@ +from .router import router diff --git a/app/api/subsetter/app/routers/hydroshare/__init__.py b/app/api/subsetter/app/routers/hydroshare/__init__.py new file mode 100644 index 00000000..23780433 --- /dev/null +++ b/app/api/subsetter/app/routers/hydroshare/__init__.py @@ -0,0 +1 @@ +from .router import router diff --git a/app/api/subsetter/app/routers/hydroshare/router.py b/app/api/subsetter/app/routers/hydroshare/router.py new file mode 100644 index 00000000..3ded1ef8 --- /dev/null +++ b/app/api/subsetter/app/routers/hydroshare/router.py @@ -0,0 +1,50 @@ +import json +import tempfile +from typing import Any, Union + +import google.cloud.logging as logging +from fastapi import APIRouter, Depends +from pydantic import BaseModel + +from subsetter.app.db import User +from subsetter.app.users import current_active_user +from subsetter.config import get_minio_client, get_settings + +if get_settings().cloud_run: + logging_client = logging.Client() + logging_client.setup_logging() + +router = APIRouter() + + +class HydroShareMetadata(BaseModel): + title: str + description: str + + +class DatasetMetadataRequestModel(BaseModel): + file_path: str + # bucket_name: str + metadata: Union[HydroShareMetadata, Any] + + +@router.post('/dataset/metadata') +async def create_metadata(metadata_request: DatasetMetadataRequestModel, user: User = Depends(current_active_user)): + with tempfile.NamedTemporaryFile(delete=False) as fp: + metadata_json_str = json.dumps(metadata_request.metadata) + print(metadata_json_str) + fp.write(str.encode(metadata_json_str)) + fp.close() + get_minio_client().fput_object(user.bucket_name, metadata_request.file_path, fp.name) + + +@router.put('/dataset/metadata') +async def update_metadata(metadata_request: DatasetMetadataRequestModel, user: User = Depends(current_active_user)): + get_minio_client().remove_object(user.bucket_name, metadata_request.file_path) + return await create_metadata(metadata_request, user) + + +class DatasetExtractRequestModel(BaseModel): + file_path: str = None + # bucket_name: str + metadata: Union[HydroShareMetadata, Any] = None diff --git a/app/api/subsetter/app/routers/storage/__init__.py b/app/api/subsetter/app/routers/storage/__init__.py index c05399a8..23780433 100644 --- a/app/api/subsetter/app/routers/storage/__init__.py +++ b/app/api/subsetter/app/routers/storage/__init__.py @@ -1 +1 @@ -from subsetter.app.routers.storage.router import router +from .router import router diff --git a/app/api/subsetter/app/users.py b/app/api/subsetter/app/users.py index 4ad939a9..c421318b 100644 --- a/app/api/subsetter/app/users.py +++ b/app/api/subsetter/app/users.py @@ -66,6 +66,7 @@ async def on_after_request_verify(self, user: User, token: str, request: Optiona async def get_user_manager(user_db: BeanieUserDatabase = Depends(get_user_db)): yield UserManager(user_db) + bearer_transport = BearerTransport(tokenUrl="auth/jwt/login") diff --git a/app/api/subsetter/main.py b/app/api/subsetter/main.py index a25d7e34..33c8588e 100644 --- a/app/api/subsetter/main.py +++ b/app/api/subsetter/main.py @@ -1,4 +1,3 @@ -import os import subprocess from beanie import init_beanie @@ -8,6 +7,7 @@ from subsetter.app.db import User, db from subsetter.app.routers.access_control import router as access_control_router from subsetter.app.routers.argo import router as argo_router +from subsetter.app.routers.hydroshare import router as hydroshare_router from subsetter.app.routers.storage import router as storage_router from subsetter.app.schemas import UserRead, UserUpdate from subsetter.app.users import SECRET, auth_backend, cuahsi_oauth_client, fastapi_users @@ -55,6 +55,12 @@ tags=["minio"], ) +app.include_router( + hydroshare_router, + # prefix="/auth/cuahsi", + tags=["hydroshare"], +) + app.include_router( fastapi_users.get_oauth_router( cuahsi_oauth_client, diff --git a/app/argo/.images/minio-view-subsetter-ouput.png b/app/argo/.images/minio-view-subsetter-ouput.png new file mode 100644 index 00000000..351b1a2b Binary files /dev/null and b/app/argo/.images/minio-view-subsetter-ouput.png differ diff --git a/app/argo/.images/workflow-graph.png b/app/argo/.images/workflow-graph.png new file mode 100644 index 00000000..fc822f9f Binary files /dev/null and b/app/argo/.images/workflow-graph.png differ diff --git a/app/argo/README.md b/app/argo/README.md index d78a0fbe..e4748ffa 100644 --- a/app/argo/README.md +++ b/app/argo/README.md @@ -1,4 +1,42 @@ -Minio Artifact setup; +# Argo Workflows for Subsetting + + +Argo Workflows are containerized DAG workflows declared in yaml that run on Kubernetes. Each node in the workflow is a docker container with S3 storage. + +*An example graph of the parflow subsetter workflow* +![Alt text](.images/workflow-graph.png) + +The `templates/` directory contains workflows which declare composable [templates](https://argo-workflows.readthedocs.io/en/latest/workflow-templates/). They are referenced by the workflows in the `workflows/` directory with a [templateRef](https://argo-workflows.readthedocs.io/en/latest/workflow-templates/#referencing-other-workflowtemplates) + +Artifact storage is S3 w/MinIO. We use artifacts to store input and output files for our workflows. Each user is given a bucket (TODO: configurable Version Control and Quotas) for storing output data of their workflows. The output of one workflow may be used as input to subsequent workflow runs. + +The 3 supported subsetter workflows (nwm1, nwm2, parflow) write the result to S3 storage in their own bucket at `/argo_workflows/{workflow_template}/{GUID}`. A GUID is generated for each run of a subsetter workflow and is used as the workflow run name. An example parflow subsetter output viewed in the MinIO viewer is shown below. +![Example user bucket with parflow subsetter output](.images/minio-view-subsetter-ouput.png) + +The subsetter input datasets are stored on the [CUAHSI MinIO instance](https://console.minio.cuahsi.io). This bucket has public read access. The workflows use these datasets as input artifacts within a subsetter workflow. A workflow conveninently maps an artifact to a path within a container that can be used as input our output locations to a program running in the container. + +*Example output declaration with configurable output locations. [ArtifactRepositoryRef](https://argo-workflows.readthedocs.io/en/latest/artifact-repository-ref/) could be used to simplify artifact use.* + +```yaml +outputs: + artifacts: + - name: subsetter-result + path: /output + s3: + endpoint: api.minio.cuahsi.io + bucket: '{{inputs.parameters.output-bucket}}' + accessKeySecret: + name: minio-credentials + key: accessKey + secretKeySecret: + name: minio-credentials + key: secretKey + key: '{{inputs.parameters.output-path}}' +``` + +# `minio-credentials` access key/secret setup; 1. Create an access key/secret in the minio UI at -2. Save the key/secret as a secret in kubernetes in the workflows namespace -`kubectl create secret generic minio-credentials --namespace workflows --from-literal=accessKey='' --from-literal=secretKey='` \ No newline at end of file +2. Save the key/secret as a secret in kubernetes in the `workflows` namespace +`kubectl create secret generic minio-credentials --namespace workflows --from-literal=accessKey='' --from-literal=secretKey='` + +These workflows should eventually be setup to automatically sync to https://workflows.argo.cuahsi.io \ No newline at end of file diff --git a/app/argo/aorc-huc-metadata.yml b/app/argo/aorc-huc-metadata.yml deleted file mode 100644 index e2301c63..00000000 --- a/app/argo/aorc-huc-metadata.yml +++ /dev/null @@ -1,69 +0,0 @@ -metadata: - name: collect-aorc-forcing-full-v1.1 - namespace: workflows -spec: - templates: - - name: aorc-subset-huc - inputs: {} - outputs: {} - metadata: {} - steps: - - - name: huc-to-shp - templateRef: - name: huc-to-shp - template: huc-to-shp - arguments: - parameters: - - name: artifact-path - value: '{{workflow.parameters.output_path}}' - - name: hucs - value: '{{workflow.parameters.hucs}}' - - - name: collect-aorc-forcing - templateRef: - name: collect-aorc-forcing-v1.1 - template: collect-aorc - arguments: - parameters: - - name: start-date - value: '{{workflow.parameters.start-date}}' - - name: end-date - value: '{{workflow.parameters.end-date}}' - - name: output-path - value: '{{workflow.parameters.output_path}}' - - name: shape-file-path - value: '{{workflow.parameters.output_path}}/shape.gz' - - name: aorc-secret - value: '{{workflow.parameters.aorc-secret}}' - - - name: metadata-extraction - templateRef: - name: metadata-extractor - template: metadata-extractor - arguments: - parameters: - - name: bucket - value: subsetter-outputs - - name: path - value: '{{workflow.parameters.output_path}}/aorc_output.gz' - - name: output-path - value: '{{workflow.parameters.output_path}}/metadata.gz' - entrypoint: aorc-subset-huc - arguments: - parameters: - - name: start-date - value: '2020-01-01' - - name: end-date - value: '2020-01-2' - - name: output_path - value: sblack/templatetest - - name: hucs - value: 102600070103,102600070104,102600090205,102600050206,102600070102 - - name: aorc-secret - value: changethis - templateDefaults: - inputs: {} - outputs: {} - metadata: {} - retryStrategy: - limit: '0' - timeout: 600s - diff --git a/app/argo/aorc.yml b/app/argo/aorc.yml deleted file mode 100644 index 7db5072b..00000000 --- a/app/argo/aorc.yml +++ /dev/null @@ -1,197 +0,0 @@ -metadata: - generateName: collect-aorc-forcing-v1- - namespace: workflows -spec: - templates: - - name: aorc-subset-huc - inputs: {} - outputs: {} - metadata: {} - steps: - - - name: huc-to-shp - template: huc-to-shp - arguments: - parameters: - - name: artifact-path - value: '{{workflow.parameters.output_path}}' - - name: hucs - value: '{{workflow.parameters.hucs}}' - - - name: collect-aorc - template: collect-aorc - arguments: - parameters: - - name: start-date - value: '{{workflow.parameters.start-date}}' - - name: end-date - value: '{{workflow.parameters.end-date}}' - - name: artifact-path - value: '{{workflow.parameters.output_path}}' - - name: aorc-secret - value: '{{workflow.parameters.aorc-secret}}' - - - name: metadata-extractor - template: metadata-extractor - arguments: - parameters: - - name: artifact-path - value: '{{workflow.parameters.output_path}}' - - name: huc-to-shp - inputs: - parameters: - - name: artifact-path - - name: hucs - outputs: - artifacts: - - name: shape-results - path: /output - s3: - endpoint: api.minio.cuahsi.io - bucket: subsetter-outputs - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.artifact-path}}/shape.gz' - metadata: {} - container: - name: '' - image: us-central1-docker.pkg.dev/apps-320517/subsetter/huc-to-shp:0.0.4 - args: - - '{{inputs.parameters.hucs}}' - - /output/watershed.shp - resources: {} - - - name: collect-aorc - inputs: - parameters: - - name: start-date - - name: end-date - - name: artifact-path - - name: aorc-secret - artifacts: - - name: shapefile-input - path: /srv/shp-data - s3: - endpoint: api.minio.cuahsi.io - bucket: subsetter-outputs - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.artifact-path}}/shape.gz' - outputs: - artifacts: - - name: aorc-output-artifact - path: /output - s3: - endpoint: api.minio.cuahsi.io - bucket: subsetter-outputs - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.artifact-path}}/aorc_output.gz' - metadata: {} - container: - image: us-central1-docker.pkg.dev/apps-320517/subsetter/aorc:0.0.2 - env: - - name: KEY - value: AKIATL6IACWUADAFYDMC - - name: SECRET - value: '{{inputs.parameters.aorc-secret}}' - - name: BUCKET_URL - value: s3://aorc-v1.1-zarr-1-year/ - - name: SHAPE_FILE - value: /srv/shp-data/watershed.shp - - name: START_DATE - value: '{{inputs.parameters.start-date}}' - - name: END_DATE - value: '{{inputs.parameters.end-date}}' - - name: OUTPUT_FILE - value: /output/results.nc - - name: N_WORKERS - value: '2' - - name: MEMORY_LIMIT - value: 4GB - command: - - /bin/sh - - '-c' - args: - - >- - python /app/collect-aorc-forcing-v1.1.py - ports: - - containerPort: 8787 - resources: - requests: - cpu: '2' - ephemeral-storage: 10Gi - memory: 16Gi - - name: metadata-extractor - inputs: - parameters: - - name: artifact-path - artifacts: - - name: extractor-input - path: /tmp/subset-data - s3: - endpoint: api.minio.cuahsi.io - bucket: subsetter-outputs - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.artifact-path}}/aorc_output.gz' - outputs: - artifacts: - - name: extractor-output - path: /tmp/subset-data - s3: - endpoint: api.minio.cuahsi.io - bucket: subsetter-outputs - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: '{{inputs.parameters.artifact-path}}/all.gz' - metadata: {} - container: - name: '' - image: scootna/hsextract:0.1 - command: - - /bin/sh - - '-c' - args: - - >- - python3 hsextract/main.py extract /tmp/subset-data && mv - /tmp/subset-data/.hs /tmp/subset-data/hs - resources: {} - entrypoint: aorc-subset-huc - arguments: - parameters: - - name: start-date - value: '2020-01-01' - - name: end-date - value: '2020-01-2' - - name: output_path - value: qwerty - - name: hucs - value: 102600070103,102600070104,102600090205,102600050206,102600070102 - - name: aorc-secret - value: changethis - templateDefaults: - inputs: {} - outputs: {} - metadata: {} - retryStrategy: - limit: '0' - timeout: 600s - diff --git a/app/argo/aorc-only.yml b/app/argo/templates/aorc-only.yml similarity index 88% rename from app/argo/aorc-only.yml rename to app/argo/templates/aorc-only.yml index e58a4e00..0481d6ef 100644 --- a/app/argo/aorc-only.yml +++ b/app/argo/templates/aorc-only.yml @@ -1,5 +1,5 @@ metadata: - name: collect-aorc-forcing-v1.1 + name: collect-aorc-forcing-v1.2 namespace: workflows spec: templates: @@ -16,12 +16,14 @@ spec: value: '{{workflow.parameters.end-date}}' - name: aorc-secret value: '{{workflow.parameters.aorc-secret}}' + - name: output-bucket + value: '{{workflow.parameters.output-bucket}}' artifacts: - name: shapefile-input path: /srv/shp-data s3: endpoint: api.minio.cuahsi.io - bucket: subsetter-outputs + bucket: '{{inputs.parameters.output-bucket}}' accessKeySecret: name: minio-credentials key: accessKey @@ -33,9 +35,11 @@ spec: artifacts: - name: aorc-output-artifact path: /output + archive: + none: {} s3: endpoint: api.minio.cuahsi.io - bucket: subsetter-outputs + bucket: '{{inputs.parameters.output-bucket}}' accessKeySecret: name: minio-credentials key: accessKey @@ -85,10 +89,12 @@ spec: value: '2020-01-01' - name: end-date value: '2020-01-2' + - name: output-bucket + value: subsetter_outputs - name: output_path value: sblack/qwerty - name: shape-file-path - value: sblack/qwerty/shape.gz + value: qwerty/shape.gz - name: aorc-secret value: changethis/zuFe6XApPIZrbnKWfliP templateDefaults: diff --git a/app/argo/huc-to-shp.yml b/app/argo/templates/huc-to-shp.yml similarity index 93% rename from app/argo/huc-to-shp.yml rename to app/argo/templates/huc-to-shp.yml index fce28074..b9ea6c20 100644 --- a/app/argo/huc-to-shp.yml +++ b/app/argo/templates/huc-to-shp.yml @@ -16,8 +16,6 @@ spec: artifacts: - name: shape-results path: /output - archive: - none: {} s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.output-bucket}}' @@ -27,7 +25,7 @@ spec: secretKeySecret: name: minio-credentials key: secretKey - key: '{{inputs.parameters.output-path}}/shape' + key: '{{inputs.parameters.output-path}}' metadata: {} container: name: '' diff --git a/app/argo/metadata-extraction.yml b/app/argo/templates/metadata-extraction.yml similarity index 81% rename from app/argo/metadata-extraction.yml rename to app/argo/templates/metadata-extraction.yml index 8b731c2b..63d234f6 100644 --- a/app/argo/metadata-extraction.yml +++ b/app/argo/templates/metadata-extraction.yml @@ -1,5 +1,5 @@ metadata: - name: metadata-extractor + name: metadata-extractor-path namespace: workflows spec: templates: @@ -12,6 +12,8 @@ spec: value: '{{workflow.parameters.input-path}}' - name: output-path value: '{{workflow.parameters.output-path}}' + - name: base-url + value: '{{workflow.parameters.base-url}}' artifacts: - name: input-data path: /tmp/resource @@ -28,7 +30,7 @@ spec: outputs: artifacts: - name: output-metadata - path: /tmp/resource + path: /tmp/resource/.hs s3: endpoint: api.minio.cuahsi.io bucket: '{{inputs.parameters.bucket}}' @@ -39,16 +41,18 @@ spec: name: minio-credentials key: secretKey key: '{{inputs.parameters.output-path}}' + archive: + none: {} metadata: {} container: name: '' - image: scootna/hsextract:0.4.1 + image: scootna/hsextract:0.4.7 command: - /bin/sh - '-c' args: - >- - python3 hsextract/main.py extract /tmp/resource + python3 hsextract/main.py extract /tmp/resource {{inputs.parameters.base-url}} resources: {} entrypoint: metadata-extractor arguments: @@ -56,10 +60,11 @@ spec: - name: bucket - name: input-path - name: output-path + - name: base-url templateDefaults: inputs: {} outputs: {} metadata: {} retryStrategy: limit: '0' - timeout: 600s + timeout: 3600s diff --git a/app/argo/transfer-with-artifacts.yml b/app/argo/transfer-with-artifacts.yml deleted file mode 100644 index 64cda68e..00000000 --- a/app/argo/transfer-with-artifacts.yml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: wget-files- - namespace: workflows -spec: - entrypoint: wget-files - templates: - - name: wget-files - inputs: - parameters: - - name: fileserver-url - value: https://www.nco.ncep.noaa.gov/pmb/codes/nwprod/nwm.v3.0.6/parm/domain_hawaii/ - script: - image: mwendler/wget - command: ["sh", "-c"] - args: ["mkdir /mnt/input && cd /mnt/input && wget --no-check-certificate -r -np -nH --cut-dirs=4 -R index.html {{inputs.parameters.fileserver-url}}"] - outputs: - artifacts: - - archive: - none: {} - name: nwm3-input-data - path: /mnt/input - gcs: - bucket: subsetter-static-input - key: "nwm.v3.0.6" - serviceAccountKeySecret: - name: my-gcs-creds - key: serviceAccountKey \ No newline at end of file diff --git a/app/argo/transfer-with-compression.yml b/app/argo/transfer-with-compression.yml deleted file mode 100644 index 8c6ea280..00000000 --- a/app/argo/transfer-with-compression.yml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: compress-dataset- - namespace: workflows -spec: - entrypoint: compress-dataset - templates: - - name: compress-dataset - inputs: - artifacts: - - name: pfconus-data-uncompressed - path: /mnt/input - gcs: - bucket: subsetter-static-input - key: "nwm.v2.0.0" - serviceAccountKeySecret: - name: my-gcs-creds - key: serviceAccountKey - script: - image: scootna/hsextract:0.1 - command: ["sh", "-c"] - args: ["ls -la"] - outputs: - artifacts: - - name: pfconus-data-compressed - path: /mnt/input - s3: - bucket: subsetter-static-input - key: "nwm.v2.0.0.gz" - endpoint: api.minio.cuahsi.io - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey \ No newline at end of file diff --git a/app/argo/workflows/aorc-huc-metadata.yml b/app/argo/workflows/aorc-huc-metadata.yml new file mode 100644 index 00000000..09720440 --- /dev/null +++ b/app/argo/workflows/aorc-huc-metadata.yml @@ -0,0 +1,90 @@ +metadata: + name: collect-aorc-forcing-full-v1.2 + namespace: workflows +spec: + templates: + - name: aorc-subset-huc + inputs: + parameters: + - name: start-date + value: '{{workflow.parameters.start-date}}' + - name: end-date + value: '{{workflow.parameters.end-date}}' + - name: output-path + value: '{{workflow.parameters.output-path}}' + - name: output-bucket + value: '{{workflow.parameters.output-bucket}}' + - name: hucs + value: '{{workflow.parameters.hucs}}' + - name: aorc-secret + value: '{{workflow.parameters.aorc-secret}}' + - name: shape-file-path + value: '{{workflow.parameters.shape-file-path}}' + outputs: {} + metadata: {} + steps: + - - name: huc-to-shp + templateRef: + name: huc-to-shp + template: huc-to-shp + arguments: + parameters: + - name: output-path + value: '{{inputs.parameters.output-path}}' + - name: output-bucket + value: '{{inputs.parameters.output-bucket}}' + - name: hucs + value: '{{inputs.parameters.hucs}}' + - - name: collect-aorc-forcing + templateRef: + name: collect-aorc-forcing-v1.1 + template: collect-aorc + arguments: + parameters: + - name: start-date + value: '{{workfinputslow.parameters.start-date}}' + - name: end-date + value: '{{inputs.parameters.end-date}}' + - name: output-path + value: '{{inputs.parameters.output-path}}' + - name: shape-file-path + value: '{{inputs.parameters.output-path}}/{{inputs.parameters.shape-file-path}}' + - name: aorc-secret + value: '{{inputs.parameters.aorc-secret}}' + - - name: metadata-extraction + templateRef: + name: metadata-extractor + template: metadata-extractor + arguments: + parameters: + - name: bucket + value: '{{inputs.parameters.output-bucket}}' + - name: path + value: '{{inputs.parameters.output-path}}/aorc_output.gz' + - name: output-path + value: '{{inputs.parameters.output-path}}/metadata.gz' + entrypoint: aorc-subset-huc + arguments: + parameters: + - name: start-date + value: '2020-01-01' + - name: end-date + value: '2020-01-2' + - name: output-path + value: templatetest + - name: output-bucket + value: sblack + - name: hucs + value: 102600070103,102600070104,102600090205,102600050206,102600070102 + - name: aorc-secret + value: changethis + - name: shape-file-path + value: shape.gz + templateDefaults: + inputs: {} + outputs: {} + metadata: {} + retryStrategy: + limit: '0' + timeout: 600s + diff --git a/app/argo/nwm1-subset-minio.yml b/app/argo/workflows/nwm1-subset-minio.yml similarity index 51% rename from app/argo/nwm1-subset-minio.yml rename to app/argo/workflows/nwm1-subset-minio.yml index 2bbd96b5..f28fd7ea 100644 --- a/app/argo/nwm1-subset-minio.yml +++ b/app/argo/workflows/nwm1-subset-minio.yml @@ -8,34 +8,34 @@ spec: outputs: {} metadata: {} steps: - - - name: nwm1-subset-step-name - template: nwm1-subset-template - arguments: - parameters: - - name: output-path - value: '{{workflow.parameters.output-path}}' - - name: y_south - value: '{{workflow.parameters.y_south}}' - - name: x_west - value: '{{workflow.parameters.x_west}}' - - name: y_north - value: '{{workflow.parameters.y_north}}' - - name: x_east - value: '{{workflow.parameters.x_east}}' - - name: output-bucket - value: '{{workflow.parameters.output-bucket}}' - - - name: metadata-extraction - templateRef: - name: metadata-extractor - template: metadata-extractor - arguments: - parameters: - - name: bucket - value: '{{workflow.parameters.output-bucket}}' - - name: input-path - value: '{{workflow.parameters.output-path}}' - - name: output-path - value: '{{workflow.parameters.output-path}}' + - - name: nwm1-subset-step-name + template: nwm1-subset-template + arguments: + parameters: + - name: output-path + value: '{{workflow.parameters.output-path}}' + - name: y_south + value: '{{workflow.parameters.y_south}}' + - name: x_west + value: '{{workflow.parameters.x_west}}' + - name: y_north + value: '{{workflow.parameters.y_north}}' + - name: x_east + value: '{{workflow.parameters.x_east}}' + - name: output-bucket + value: '{{workflow.parameters.output-bucket}}' + - - name: metadata-extraction + templateRef: + name: metadata-extractor + template: metadata-extractor + arguments: + parameters: + - name: bucket + value: '{{workflow.parameters.output-bucket}}' + - name: input-path + value: '{{workflow.parameters.output-path}}' + - name: output-path + value: '{{workflow.parameters.output-path}}_metadata.tgz' - name: nwm1-subset-template inputs: parameters: @@ -45,19 +45,6 @@ spec: - name: x_east - name: output-path - name: output-bucket - artifacts: - - name: nwm1-input-data-new - path: /srv/domain - s3: - endpoint: api.minio.cuahsi.io - bucket: subsetter-static-input - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: nwm.v1.2.4.gz outputs: artifacts: - name: subset-results @@ -72,6 +59,8 @@ spec: name: minio-credentials key: secretKey key: '{{inputs.parameters.output-path}}' + archive: + none: {} metadata: {} container: name: '' @@ -85,9 +74,12 @@ spec: - '{{inputs.parameters.x_west}}' - '{{inputs.parameters.y_north}}' - '{{inputs.parameters.x_east}}' - - /srv/domain + - /srv/domain/nwm.v1.2.4/input - /srv/output resources: {} + volumeMounts: + - name: subsetter-data-volume + mountPath: /srv/domain entrypoint: nwm1-subset-with-extraction-template arguments: parameters: @@ -103,3 +95,7 @@ spec: value: asdf - name: output-bucket value: 'subsetter-outputs' + volumes: + - name: subsetter-data-volume + persistentVolumeClaim: + claimName: subset-accelerator \ No newline at end of file diff --git a/app/argo/nwm2-subset-minio.yml b/app/argo/workflows/nwm2-subset-minio.yml similarity index 85% rename from app/argo/nwm2-subset-minio.yml rename to app/argo/workflows/nwm2-subset-minio.yml index 701794d4..30d76bb3 100644 --- a/app/argo/nwm2-subset-minio.yml +++ b/app/argo/workflows/nwm2-subset-minio.yml @@ -35,7 +35,7 @@ spec: - name: input-path value: '{{workflow.parameters.output-path}}' - name: output-path - value: '{{workflow.parameters.output-path}}' + value: '{{workflow.parameters.output-path}}_metadata.tgz' - name: nwm2-subset-template inputs: parameters: @@ -45,19 +45,6 @@ spec: - name: x_east - name: output-path - name: output-bucket - artifacts: - - name: nwm2-input-data - path: /srv/domain - s3: - endpoint: api.minio.cuahsi.io - bucket: subsetter-static-input - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: nwm.v2.0.0.gz outputs: artifacts: - name: subset-results @@ -85,13 +72,16 @@ spec: - '{{inputs.parameters.x_west}}' - '{{inputs.parameters.y_north}}' - '{{inputs.parameters.x_east}}' - - /srv/domain + - /srv/domain/nwm.v2.0.0/input - /srv/output resources: requests: cpu: '1' ephemeral-storage: 2Gi memory: 1Gi + volumeMounts: + - name: subsetter-data-volume + mountPath: /srv/domain entrypoint: nwm2-subset-with-extraction-template arguments: parameters: @@ -107,6 +97,10 @@ spec: value: 'asdf' - name: output-bucket value: 'subsetter-outputs' + volumes: + - name: subsetter-data-volume + persistentVolumeClaim: + claimName: subset-accelerator templateDefaults: inputs: {} outputs: {} diff --git a/app/argo/parflow-subset-minio.yml b/app/argo/workflows/parflow-subset-minio.yml similarity index 87% rename from app/argo/parflow-subset-minio.yml rename to app/argo/workflows/parflow-subset-minio.yml index 0d544aa6..2e42cebe 100644 --- a/app/argo/parflow-subset-minio.yml +++ b/app/argo/workflows/parflow-subset-minio.yml @@ -41,7 +41,7 @@ spec: - name: input-path value: '{{workflow.parameters.output-path}}' - name: output-path - value: '{{workflow.parameters.output-path}}' + value: '{{workflow.parameters.output-path}}/hs_metadata.tgz' - name: parflow-subset-v1 inputs: parameters: @@ -49,18 +49,6 @@ spec: - name: output-path - name: output-bucket artifacts: - - name: pfinput-data - path: /srv/input - s3: - endpoint: api.minio.cuahsi.io - bucket: subsetter-static-input - accessKeySecret: - name: minio-credentials - key: accessKey - secretKeySecret: - name: minio-credentials - key: secretKey - key: pfconus.v1.0.gz - name: shape-input-data path: /srv/shape s3: @@ -98,7 +86,7 @@ spec: args: - '{{inputs.parameters.label}}' - /srv/shape/watershed.shp - - /srv/input + - /srv/input/pfconus.v1.0/input - /srv/shape resources: limits: @@ -109,6 +97,9 @@ spec: cpu: '1' ephemeral-storage: 1Gi memory: 1Gi + volumeMounts: + - name: subsetter-data-volume + mountPath: /srv/input entrypoint: parflow-subset-huc arguments: parameters: @@ -120,6 +111,10 @@ spec: value: subsetter-outputs - name: hucs value: 102600070103,102600070104,102600090205,102600050206,102600070102 + volumes: + - name: subsetter-data-volume + persistentVolumeClaim: + claimName: subset-accelerator templateDefaults: inputs: {} outputs: {}