diff --git a/.github/workflows/build_test_docker_x86.yaml b/.github/workflows/build_test_docker_x86.yaml index cca6147a..c516925e 100644 --- a/.github/workflows/build_test_docker_x86.yaml +++ b/.github/workflows/build_test_docker_x86.yaml @@ -39,7 +39,7 @@ jobs: - name: Install packages for datastream run: | - curl -L -O https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb && curl -L -O https://github.com/lynker-spatial/hfsubsetCLI/releases/download/v1.1.0/hfsubset-v1.1.0-linux_amd64.tar.gz && tar -xzvf hfsubset-v1.1.0-linux_amd64.tar.gz && sudo mv ./hfsubset /usr/bin/hfsubset && sudo apt install ./mount-s3.deb && sudo apt-get install git pip pigz awscli python3.9 -y + curl -L -O https://github.com/lynker-spatial/hfsubsetCLI/releases/download/v1.1.0/hfsubset-v1.1.0-linux_amd64.tar.gz && tar -xzvf hfsubset-v1.1.0-linux_amd64.tar.gz && sudo mv ./hfsubset /usr/bin/hfsubset && sudo apt-get install git pip pigz awscli python3.9 -y - name: Build docker containers run : | diff --git a/.github/workflows/build_test_push_docker_x86.yaml b/.github/workflows/build_test_push_docker_x86.yaml index d7efc7c5..b4c846a3 100644 --- a/.github/workflows/build_test_push_docker_x86.yaml +++ b/.github/workflows/build_test_push_docker_x86.yaml @@ -33,7 +33,7 @@ jobs: - name: Install packages for datastream run: | - curl -L -O https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb && curl -L -O https://github.com/lynker-spatial/hfsubsetCLI/releases/download/v1.1.0/hfsubset-v1.1.0-linux_amd64.tar.gz && tar -xzvf hfsubset-v1.1.0-linux_amd64.tar.gz && sudo mv ./hfsubset /usr/bin/hfsubset && sudo apt install ./mount-s3.deb && sudo apt-get install git pip pigz awscli python3.9 -y + curl -L -O https://github.com/lynker-spatial/hfsubsetCLI/releases/download/v1.1.0/hfsubset-v1.1.0-linux_amd64.tar.gz && tar -xzvf hfsubset-v1.1.0-linux_amd64.tar.gz && sudo mv ./hfsubset /usr/bin/hfsubset && sudo apt-get install git pip pigz awscli python3.9 -y - name: Build docker containers run : | diff --git a/.github/workflows/test_datastream_options.yml b/.github/workflows/test_datastream_options.yml new file mode 100644 index 00000000..21d397b6 --- /dev/null +++ b/.github/workflows/test_datastream_options.yml @@ -0,0 +1,118 @@ +name: Test Datastream Options + +on: + push: + branches: + - main + + pull_request: + branches: + - main + +jobs: + test-datastream-options: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Configure AWS + run: | + aws configure set aws_access_key_id ${{ secrets.aws_access_key_id }} + aws configure set aws_secret_access_key ${{ secrets.aws_secret_access_key }} + aws configure set region us-east-1 + + - name: Build docker containers + run : | + ./scripts/docker_builds.sh -b + + - name: Install packages for datastream + run: | + curl -L -O https://github.com/lynker-spatial/hfsubsetCLI/releases/download/v1.1.0/hfsubset-v1.1.0-linux_amd64.tar.gz && tar -xzvf hfsubset-v1.1.0-linux_amd64.tar.gz && sudo mv ./hfsubset /usr/bin/hfsubset && sudo apt-get install git pip pigz awscli python3.9 -y + + - name: Get geopackage from hfsubset + run: | + hfsubset -w medium_range -s nextgen -v 2.1.1 -l divides,flowlines,network,nexus,forcing-weights,flowpath-attributes,model-attributes -o palisade.gpkg -t hl "Gages-09106150" + + - name: Base test and NWM_RETRO_V3 + run: | + sudo rm -rf $(pwd)/data/datastream_test + ./scripts/stream.sh -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json + + - name: Cache resource directory + run: | + mkdir ./data/cache + cp -r ./data/datastream_test/datastream-resources ./data/cache + cp -r ./data/cache/datastream-resources ./data/cache/datastream-resources-no-forcings + cp -r ./data/cache/datastream-resources ./data/cache/datastream-resources-missing + sudo rm -rf ./data/cache/datastream-resources-no-forcings/ngen-forcings + + - name: Resource directory test missing all + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + sudo rm -rf ./data/cache/datastream-resources-missing/ngen-forcings + sudo rm -rf ./data/cache/datastream-resources-missing/config/* + ./scripts/stream.sh -r ./data/cache/datastream-resources-missing -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg + + - name: Resource directory test + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + ./scripts/stream.sh -r ./data/cache/datastream-resources -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test + + - name: Forcings sources option test NWM_RETRO_V2 + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + ./scripts/stream.sh -r ./data/cache/datastream-resources-no-forcings -s 201906200100 -e 201906200200 -C NWM_RETRO_V2 -d $(pwd)/data/datastream_test + + + - name: Forcings sources option test NWM_OPERATIONAL_V3 + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + TODAY=$(env TZ=US/Eastern date +'%Y%m%d') + ./scripts/stream.sh -r ./data/cache/datastream-resources-no-forcings -s $TODAY"0100" -e $TODAY"0200" -C NWM_OPERATIONAL_V3 -d $(pwd)/data/datastream_test + + - name: Forcings sources option test NOMADS_OPERATIONAL + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + TODAY=$(env TZ=US/Eastern date +'%Y%m%d') + ./scripts/stream.sh -r ./data/cache/datastream-resources-no-forcings -s $TODAY"0100" -e $TODAY"0200" -C NOMADS_OPERATIONAL -d $(pwd)/data/datastream_test + + - name: Test hfsubset options + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + ./scripts/stream.sh -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -I "Gages-09106150" -i hl -v 2.1.1 -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json + + - name: S3 write out test + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + ./scripts/stream.sh -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -S ngen-datastream -o git_actions_test/ + aws s3api wait object-exists --bucket ngen-datastream --key git_actions_test/ngen-run.tar.gz + aws s3api delete-object --bucket ngen-datastream --key git_actions_test/ngen-run.tar.gz + + - name: DAILY today test + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + ./scripts/stream.sh -s DAILY -C NWM_OPERATIONAL_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json + + - name: DAILY pick day test + if: always() + run: | + sudo rm -rf $(pwd)/data/datastream_test + ./scripts/stream.sh -s DAILY -e 202006200000 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json + + + \ No newline at end of file diff --git a/INSTALL.md b/INSTALL.md index 3c7949bb..8d012e7c 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,55 +1,11 @@ -# Install Instructions for ngen-datastream -These steps are provided [scripted](#scripts) or [step-by-step](#step-by-step). Note some steps are specific to either `x86` or `aarch64` - -## Prerequisites -* Linux OS -* Docker -* git - -These instructions assume launching an instance from a blank Amazon 2023 Linux image. Steps may vary depending on your specific linux distribution. - -## Scripted -1) Clone this repository -``` -git clone https://github.com/CIROH-UA/ngen-datastream.git -``` -2) Execute the startup script -``` -cd ngen-datastream && ./scripts/install.sh -``` -`aws_configure` if you intend to mount an s3 bucket or reference a bucket in the configuration. - -You're ready to run ngen-datastream! - -## Step-by-step -`$USER=($whomami)` - -For `x86`, `PKG_MNGR="dnf"` - -For `aarch64`, `PKG_MNGR="yum"` - -1) Update package manager and install packages -``` -sudo $PKG_MNGR update -y -sudo $PKG_MNGR install git pip python pigz awscli -y -``` -`x86` : `sudo $PKG_MNGR update -y -sudo $PKG_MNGR install dnf-plugins-core -y` - -2) install packages from internet - -`x86` : `curl -L -O https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.rpm` - -`aarch64` : `curl -L -O https://s3.amazonaws.com/mountpoint-s3-release/latest/arm64/mount-s3.rpm` -``` -sudo dnf update -y -sudo dnf ./mount-s3.rpm -``` -3) clone this repository -``` -git clone https://github.com/CIROH-UA/ngen-datastream.git -``` -`aws_configure` if you intend to mount an s3 bucket or reference a bucket in the configuration. - -You're ready to run ngen-datastream! +To run ngen-datastream, clone this repository onto a linux machine and make sure the packages below are installed. + +## Required Packages +* docker +* git +* pip +* python>=3.9 +* pigz +* awscli +* [hfsubset](https://github.com/lynker-spatial/hfsubsetCLI) diff --git a/README.md b/README.md index 876abe9f..1e41b882 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ or run with cli args -f, --NWM_FORCINGS_DIR -F, --NGEN_FORCINGS -N, --NGEN_BMI_CONFS - -S, --S3_MOUNT + -S, --S3_BUCKET -o, --S3_PREFIX -n, --NPROCS -y, --DRYRUN @@ -63,8 +63,8 @@ To see what's happening in `ngen-datastream` step-by-step, see the [breakdown do | NWM_FORCINGS_DIR | Path to local directory containing nwm files. Alternatively, these file could be stored in RESOURCE_DIR as nwm-forcings. | | | NGEN_BMI_CONFS | Path to local directory containing NextGen BMI configuration files. Alternatively, these files could be stored in RESOURCE_DIR under `config/`. See here for [directory structure](#configuration-directory-ngen-runconfig). | | | NGEN_FORCINGS | Path to local ngen forcings directory holding ngen forcing csv's or parquet's. Also accepts tarball or netcdf. Alternatively, this file(s) could be stored in RESOURCE_DIR at `ngen-forcings/`. | | -| S3_MOUNT | Path to mount S3 bucket to. `ngen-datastream` will copy outputs here. | | -| S3_PREFIX | Prefix to prepend to all files when copying to s3 | +| S3_BUCKET | AWS S3 Bucket to write output to | | +| S3_PREFIX | Path within S3 bucket to write to | | DRYRUN | Set to "True" to skip all compute steps. | | NPROCS | Maximum number of processes to use in any step of `ngen-datastream`. Defaults to `nprocs - 2` | | diff --git a/docs/images/statemachine.jpg b/docs/images/statemachine.jpg new file mode 100644 index 00000000..3c1a9941 Binary files /dev/null and b/docs/images/statemachine.jpg differ diff --git a/python_tools/src/python_tools/configure_datastream.py b/python_tools/src/python_tools/configure_datastream.py index 7b031c63..c3d94b13 100644 --- a/python_tools/src/python_tools/configure_datastream.py +++ b/python_tools/src/python_tools/configure_datastream.py @@ -151,6 +151,19 @@ def create_confs(args): conf = config_class2dict(args) realization = args.realization_file geo_base = args.gpkg.split('/')[-1] + + if "OPERATIONAL" in args.forcing_source: + retro_or_op = "operational" + if "V3" in args.forcing_source: + urlbaseinput = 7 + if "NOMADS" in args.forcing_source: + urlbaseinput = 1 + elif "RETRO" in args.forcing_source: + retro_or_op = "retrospective" + if "V2" in args.forcing_source: + urlbaseinput = 1 + if "V3" in args.forcing_source: + urlbaseinput = 4 if conf['globals']['start_date'] == "DAILY": if conf['globals']['end_date'] != "": @@ -158,6 +171,8 @@ def create_confs(args): start_date = datetime.strptime(conf['globals']['end_date'],'%Y%m%d%H%M') else: start_date = datetime.now(tz.timezone('US/Eastern')) + retro_or_op="operational" + urlbaseinput=7 today = start_date.replace(hour=1, minute=0, second=0, microsecond=0) tomorrow = today + timedelta(hours=23) @@ -168,8 +183,8 @@ def create_confs(args): end = tomorrow.strftime('%Y%m%d%H%M') start_realization = today.strftime('%Y-%m-%d %H:%M:%S') end_realization = tomorrow.strftime('%Y-%m-%d %H:%M:%S') - nwm_conf = create_conf_nwm(start, end, "operational",7) - fp_conf = create_conf_fp(start, end, conf['globals']['nprocs'],args.docker_mount,args.forcing_split_vpu,"operational",geo_base) + nwm_conf = create_conf_nwm(start, end, retro_or_op, urlbaseinput) + fp_conf = create_conf_fp(start, end, conf['globals']['nprocs'],args.docker_mount,args.forcing_split_vpu,retro_or_op,geo_base) else: start = conf['globals']['start_date'] end = conf['globals']['end_date'] @@ -185,19 +200,6 @@ def create_confs(args): nwm_conf = {} fp_conf = create_conf_fp(start, end, conf['globals']['nprocs'], args.docker_mount, args.forcing_split_vpu,retro_or_op,geo_base) else: - if "OPERATIONAL" in args.forcing_source: - retro_or_op = "operational" - if "V3" in args.forcing_source: - urlbaseinput = 7 - if "NOMADS" in args.forcing_source: - urlbaseinput = 1 - elif "RETRO" in args.forcing_source: - retro_or_op = "retrospective" - if "V2" in args.forcing_source: - urlbaseinput = 1 - if "V3" in args.forcing_source: - urlbaseinput = 4 - nwm_conf = create_conf_nwm(start,end, retro_or_op,urlbaseinput) fp_conf = create_conf_fp(start, end, conf['globals']['nprocs'], args.docker_mount, args.forcing_split_vpu,retro_or_op,geo_base) diff --git a/python_tools/tests/test_configurer.py b/python_tools/tests/test_configurer.py index 93e7524a..fd0e55ff 100644 --- a/python_tools/tests/test_configurer.py +++ b/python_tools/tests/test_configurer.py @@ -102,5 +102,33 @@ def test_conf_daily(): start = datetime.strptime(data['time']['start_time'],"%Y-%m-%d %H:%M:%S") assert start.day == datetime.today().day + with open(CONF_NWM,'r') as fp: + data = json.load(fp) + + assert data['urlbaseinput'] == 7 + + +def test_conf_daily_pick(): + inputs.start_date = "DAILY" + inputs.end_date = "202006200000" + create_confs(inputs) + assert os.path.exists(CONF_NWM) + assert os.path.exists(CONF_FP) + assert os.path.exists(CONF_DATASTREAM) + assert os.path.exists(REALIZATION_META_USER) + assert os.path.exists(REALIZATION_META_DS) + assert os.path.exists(REALIZATION_RUN) + + with open(REALIZATION_RUN,'r') as fp: + data = json.load(fp) + + start = datetime.strptime(data['time']['start_time'],"%Y-%m-%d %H:%M:%S") + assert start.day == datetime.strptime(inputs.end_date,"%Y%m%d%H%M%S").day + + with open(CONF_NWM,'r') as fp: + data = json.load(fp) + + assert data['urlbaseinput'] == 4 + diff --git a/research_datastream/terraform/ARCHITECTURE.md b/research_datastream/terraform/ARCHITECTURE.md index c03cd36d..1730810e 100644 --- a/research_datastream/terraform/ARCHITECTURE.md +++ b/research_datastream/terraform/ARCHITECTURE.md @@ -3,7 +3,7 @@ This document provides a technical look at the AWS infrastructure that is create The AWS State Machine diagram -![statemachine](../docs/images/statemachine.jpg) +![statemachine](../../docs/images/statemachine.jpg) Each green box represents a distinct AWS Lambda Function that is responsible for a task such as spawning an instance from an Amazon Machine Image or issuing commands to an instance. Together these lambdas coordinate the task of creating a cloud based host to which a user can issue commands. diff --git a/research_datastream/terraform/GETTING_STARTED.md b/research_datastream/terraform/GETTING_STARTED.md index 0b90c925..94a49a00 100644 --- a/research_datastream/terraform/GETTING_STARTED.md +++ b/research_datastream/terraform/GETTING_STARTED.md @@ -33,7 +33,7 @@ This will set your user to `root`. Type `exit` to return back to `ec2-user`. The Second, install required packages ``` -curl -L -O https://s3.amazonaws.com/mountpoint-s3-release/latest/arm64/mount-s3.rpm && curl -L -O https://github.com/lynker-spatial/hfsubsetCLI/releases/download/v1.1.0/hfsubset-v1.1.0-linux_arm64.tar.gz && tar -xzvf hfsubset-v1.1.0-linux_arm64.tar.gz && sudo mv ./hfsubset /usr/bin/hfsubset && sudo dnf install ./mount-s3.rpm git pip pigz awscli python -y +curl -L -O https://github.com/lynker-spatial/hfsubsetCLI/releases/download/v1.1.0/hfsubset-v1.1.0-linux_arm64.tar.gz && tar -xzvf hfsubset-v1.1.0-linux_arm64.tar.gz && sudo mv ./hfsubset /usr/bin/hfsubset && sudo dnf install git pip pigz awscli python -y ``` Next, clone this repository diff --git a/research_datastream/terraform/README.md b/research_datastream/terraform/README.md index 16c5cf32..4aca7373 100644 --- a/research_datastream/terraform/README.md +++ b/research_datastream/terraform/README.md @@ -9,9 +9,10 @@ See [here](https://github.com/CIROH-UA/ngen-datastream/tree/main/terraform/ARCHI * Terraform * Linux -## Building AWS State Machine +## Build AWS Infrastructure +Construct AWS State Machine, Lambdas, Policies, and Roles. See [here](https://github.com/CIROH-UA/ngen-datastream/tree/main/terraform/ARCHITECTURE.md) for a more indepth explanation of the infrastrucutre. 1) Open a terminal, log into AWS account -2) Customize resource names by editing `variables.tfvars`. +2) Customize resource names by editing `variables.tfvars`. Names must be unqiue and not correspond to already existing resources. 3) Build the state machine with Terraform ``` cd terraform @@ -19,7 +20,8 @@ terraform init terraform apply -var-file=./variables.tfvars ``` -## Execute +## Execute AWS State Machine +This command will start the aws state machine, which will start and manage an ec2 instance to run the datastream command. This step will ``` aws stepfunctions start-execution \ --state-machine-arn arn:aws:states:us-east-1:###:stateMachine: \ @@ -27,3 +29,14 @@ aws stepfunctions start-execution \ --input "file://" \ --region us-east-1 ``` + +## Tear Down AWS Infrastructure +``` +terraform destroy -var-file=./variables.tfvars +``` + +## Partial Success (`terraform apply failure`) +`terraform apply` will fail if some of the resources already exist with the names defined in `variables.tfvars`. These resources must be either manually destroyed or imported. A script exists [here](https://github.com/CIROH-UA/ngen-datastream/blob/main/research_datastream/terraform/test/import_resources.sh) to automate importing any existing resources. Remove all spaces from variable file if using this script. +``` +./test/import_resources.sh +``` diff --git a/research_datastream/terraform/lambda_functions/streamcommander/lambda_function.py b/research_datastream/terraform/lambda_functions/streamcommander/lambda_function.py index 6a0098ed..28a7696a 100644 --- a/research_datastream/terraform/lambda_functions/streamcommander/lambda_function.py +++ b/research_datastream/terraform/lambda_functions/streamcommander/lambda_function.py @@ -39,8 +39,6 @@ def lambda_handler(event, context): if "s3_bucket" in ds_options: bucket = ds_options["s3_bucket"] prefix = ds_options["object_prefix"] - event['commands'].append("runuser -l ec2-user -c 'mkdir -p /home/ec2-user/ngen-datastream/data/mount'") - event['commands'].append(f"runuser -l ec2-user -c 'mount-s3 {bucket} /home/ec2-user/ngen-datastream/data/mount'") nprocs = ds_options["nprocs"] start = ds_options["start_time"] end = ds_options["end_time"] @@ -51,7 +49,7 @@ def lambda_handler(event, context): subset_type = ds_options["subset_id_type"] command_str = f"runuser -l ec2-user -c 'cd /home/ec2-user/ngen-datastream && ./scripts/stream.sh -s {start} -e {end} -C {forcing_source} -I {subset_id} -i {subset_type} -v {hf_version} -d $(pwd)/data/datastream -R {realization} -n {nprocs}" if "s3_bucket" in ds_options: - command_str += f" -S $(pwd)/data/mount -o {prefix}" + command_str += f" -S {bucket} -o {prefix}" command_str += '\'' event['commands'].append(command_str) diff --git a/scripts/stream.sh b/scripts/stream.sh index e9c886d1..187cd229 100755 --- a/scripts/stream.sh +++ b/scripts/stream.sh @@ -95,8 +95,8 @@ usage() { echo " -f, --NWM_FORCINGS_DIR " echo " -F, --NGEN_FORCINGS " echo " -N, --NGEN_BMI_CONFS " - echo " -S, --S3_MOUNT " - echo " -o, --S3_PREFIX " + echo " -S, --S3_BUCKET " + echo " -o, --S3_PREFIX " echo " -n, --NPROCS " echo " -y, --DRYRUN " exit 1 @@ -118,7 +118,7 @@ RESOURCE_DIR="" NWM_FORCINGS_DIR="" NGEN_FORCINGS="" NGEN_BMI_CONFS="" -S3_MOUNT="" +S3_BUCKET="" S3_PREFIX="" NPROCS=4 DRYRUN="False" @@ -175,7 +175,7 @@ while [ "$#" -gt 0 ]; do -f|--NWM_FORCINGS_DIR) NWM_FORCINGS_DIR="$2"; shift 2;; -F|--NGEN_FORCINGS) NGEN_FORCINGS="$2"; shift 2;; -N|--NGEN_BMI_CONFS) NGEN_BMI_CONFS="$2"; shift 2;; - -S|--S3_MOUNT) S3_MOUNT="$2"; shift 2;; + -S|--S3_BUCKET) S3_BUCKET="$2"; shift 2;; -o|--S3_PREFIX) S3_PREFIX="$2"; shift 2;; -n|--NPROCS) NPROCS="$2"; shift 2;; -y|--DRYRUN) DRYRUN="$2"; shift 2;; @@ -202,46 +202,6 @@ else echo "No configuration file detected, using cli args" fi -# set paths for daily run -DATE=$(env TZ=US/Eastern date +'%Y%m%d') -if [ $START_DATE == "DAILY" ]; then - if [[ -z "$END_DATE" ]]; then - if [[ -z "$DATA_DIR" ]]; then - DATA_DIR="${PACAKGE_DIR%/}/data/$DATE" - fi - if [[ -n "${S3_MOUNT}" ]]; then - if [[ -z "${S3_PREFIX}" ]]; then - S3_PREFIX="daily/$DATE" - fi - S3_OUT="$S3_MOUNT/$S3_PREFIX" - echo "S3_OUT: " $S3_OUT - mkdir -p $S3_OUT - fi - else - if [[ -z "$DATA_DIR" ]]; then - DATA_DIR="${PACAKGE_DIR%/}/data/${END_DATE::-4}" - fi - if [[ -n "${S3_MOUNT}" ]]; then - if [[ -z "${S3_PREFIX}" ]]; then - S3_PREFIX="daily/${END_DATE::-4}" - fi - S3_OUT="$S3_MOUNT/$S3_PREFIX" - echo "S3_OUT: " $S3_OUT - mkdir -p $S3_OUT - fi - fi -else - if [[ -z "${DATA_DIR}" ]]; then - DATA_DIR="${PACAKGE_DIR%/}/data/$START_DATE-$END_DATE" - fi - if [[ -n "${S3_MOUNT}" ]]; then - S3_OUT="$S3_MOUNT/$S3_PREFIX" - echo "S3_OUT: " $S3_OUT - mkdir -p $S3_OUT - fi -fi - -# create directories if [ -e "$DATA_DIR" ]; then echo "The path $DATA_DIR exists. Please delete it or set a different path." exit 1 @@ -562,8 +522,6 @@ else fi log_time "NGEN_END" $DATASTREAM_PROFILING - - log_time "MERKLE_START" $DATASTREAM_PROFILING if [ "$DRYRUN" == "True" ]; then echo "DRYRUN - MERKDIR EXECUTION SKIPPED" @@ -581,12 +539,23 @@ log_time "TAR_END" $DATASTREAM_PROFILING log_time "DATASTREAM_END" $DATASTREAM_PROFILING -if [ -e "$S3_OUT" ]; then +if [ -n "$S3_BUCKET" ]; then log_time "S3_MOVE_START" $DATASTREAM_PROFILING - cp $NGENRUN_TAR $S3_OUT - cp $DATA_DIR/merkdir.file $S3_OUT - cp -r $DATASTREAM_META $S3_OUT - cp -r $DATASTREAM_RESOURCES $S3_OUT + + echo "Writing data to S3" $S3_OUT $S3_BUCKET $S3_PREFIX + + S3_OUT="s3://$S3_BUCKET/${S3_PREFIX%/}/$TAR_NAME" + aws s3 cp $NGENRUN_TAR $S3_OUT + + S3_OUT="s3://$S3_BUCKET/${S3_PREFIX%/}/merkdir.file" + aws s3 cp $DATA_DIR/merkdir.file $S3_OUT + + S3_OUT="s3://$S3_BUCKET/${S3_PREFIX%/}/datastream-metadata" + aws s3 sync $DATASTREAM_META $S3_OUT + + S3_OUT="s3://$S3_BUCKET/${S3_PREFIX%/}/datastream-resources" + aws s3 sync $DATASTREAM_RESOURCES $S3_OUT + echo "Data exists here: $S3_OUT" log_time "S3_MOVE_END" $DATASTREAM_PROFILING fi