Skip to content

4 - Automated CKAN Jobs #62707

4 - Automated CKAN Jobs

4 - Automated CKAN Jobs #62707

Workflow file for this run

---
name: 4 - Automated CKAN Jobs
on:
schedule:
- cron: '30 7 * * *' # Tracking Update -- every day at 2:30am EST
- cron: '0 2 * * *' # S3 Sitemap Update -- every day at 10pm EST
- cron: '4/15 * * * *' # Harvester Check -- every 15 mins
- cron: '0 3 * * *' # DB-Solr-Sync -- every day at 10pm EST
- cron: '30 6 * * *' # Check Stuck Jobs -- every day at 1:30am EST
- cron: '0 13 * * *' # Generate Reports -- every day @9:00am EST
env:
ERROR: false
# Make sure 'schedule-cron' matches these varaibles.
SCHEDULE_TRACKING: '30 7 * * *'
SCHEDULE_SITEMAP: '0 2 * * *'
SCHEDULE_HARVESTING: '4/15 * * * *'
SCHEDULE_DBSOLR_SYNC: '0 3 * * *'
SCHEDULE_STUCK_JOBS: '30 6 * * *'
SCHEDULE_GEN_REPORT: '0 13 * * *'
jobs:
setup-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.generate-matrix.outputs.matrix }}
steps:
- name: Generate Matrix
id: generate-matrix
run: |
# Matrix Configuration Settings
# - Default App to run tasks on: cataog-admin
# - Default timeout for task to raise issue: 6 hours
# - Default Create an Information Issue for jobs we want to inspect: false
# - Default monitor state: true
# schedule: tracking update
# - Only run on [development, staging, prod]
# - Create Error issue: if runtime longer than 210 mins
# - Create Informational issue: tracking-update-info.md
# schedule: sitemap update
# - Only run on [prod]
# - Run on [catalog-gather] app
# schedule: harvesting update
# - Only run on [development, staging, prod]
# schedule: stuck jobs check
# - Only run on [staging, prod]
# - Create Error issue: if >0 stuck jobs, automated_ckan_error.md
# schedule: db-solr-sync
# - Only run on [staging, prod]
# - Create Error issue: if runtime longer than 30 mins
# - Create Informational issue: db-solr-sync-info.md
# schedule: report generation
# - Only run on [development, staging, prod]
# - Create Error issue: if ckan command errors
MATRIX=$(cat << MAT
{
"schedule": ["${{env.SCHEDULE_TRACKING}}", "${{env.SCHEDULE_SITEMAP}}",
"${{env.SCHEDULE_HARVESTING}}", "${{env.SCHEDULE_STUCK_JOBS}}",
"${{env.SCHEDULE_DBSOLR_SYNC}}", "${{ env.SCHEDULE_GEN_REPORT}}"],
"environ": ["development", "staging", "prod"],
"include": [ {"app": "catalog-admin"},
{"error_seconds": 22000},
{"info_issue": false},
{"issue_template": ".github/automated_ckan_error.md"},
{"monitor": true},
{"schedule": "${{env.SCHEDULE_HARVESTING}}", "command": "ckan harvester run"},
{"schedule": "${{env.SCHEDULE_TRACKING}}", "command": "ckan geodatagov tracking-update"},
{"schedule": "${{env.SCHEDULE_TRACKING}}", "error_seconds": 12600},
{"schedule": "${{env.SCHEDULE_TRACKING}}", "info_issue": true},
{"schedule": "${{env.SCHEDULE_TRACKING}}", "issue_template": ".github/tracking-update-info.md"},
{"schedule": "${{env.SCHEDULE_SITEMAP}}", "command": "ckan geodatagov sitemap-to-s3"},
{"schedule": "${{env.SCHEDULE_SITEMAP}}", "app": "catalog-gather"},
{"schedule": "${{env.SCHEDULE_DBSOLR_SYNC}}", "command": "ckan geodatagov db-solr-sync"},
{"schedule": "${{env.SCHEDULE_DBSOLR_SYNC}}", "error_seconds": 1800},
{"schedule": "${{env.SCHEDULE_DBSOLR_SYNC}}", "info_issue": true},
{"schedule": "${{env.SCHEDULE_DBSOLR_SYNC}}", "issue_template": ".github/db-solr-sync-info.md"},
{"schedule": "${{env.SCHEDULE_STUCK_JOBS}}", "command": "ckan geodatagov check-stuck-jobs"},
{"schedule": "${{env.SCHEDULE_GEN_REPORT}}", "command": "ckan report generate"},
{"schedule": "${{env.SCHEDULE_GEN_REPORT}}", "issue_template": ".github/report-generation-failure.md"},
{"environ": "development", "ram": "1G"},
{"environ": "staging", "ram": "2500M"},
{"environ": "prod", "ram": "3G"}
],
"exclude": [ {"schedule": "${{env.SCHEDULE_SITEMAP}}", "environ": "development"},
{"schedule": "${{env.SCHEDULE_SITEMAP}}", "environ": "staging"},
{"schedule": "${{env.SCHEDULE_DBSOLR_SYNC}}", "environ": "development"},
{"schedule": "${{env.SCHEDULE_STUCK_JOBS}}", "environ": "development"}
],
}
MAT
)
echo "matrix<<EOF" >> $GITHUB_OUTPUT
echo "$MATRIX" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
ckan-auto-command:
needs:
- setup-matrix
strategy:
max-parallel: 1
fail-fast: false
matrix: ${{ fromJSON(needs.setup-matrix.outputs.matrix) }}
name: ${{ matrix.command }} - ${{matrix.environ}}
runs-on: ubuntu-latest
environment: ${{matrix.environ}}
steps:
- name: checkout
uses: actions/checkout@v4
- name: Set Unique Command Name
run: |
INSTANCE_NAME="$(echo ${{ matrix.command }}-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT} | tr -d ' ')"
echo "INSTANCE_NAME=${INSTANCE_NAME}" | tee -a $GITHUB_ENV
echo "START=`date +%s`" | tee -a $GITHUB_ENV
- name: ${{ matrix.command }}
if: ${{ github.event.schedule == matrix.schedule }}
uses: cloud-gov/cg-cli-tools@main
with:
command: >
cf run-task ${{ matrix.app }}
--command "${{ matrix.command }}"
--name "$INSTANCE_NAME"
-m ${{ matrix.ram }} -k 2G
cf_org: gsa-datagov
cf_space: ${{ matrix.environ }}
cf_username: ${{secrets.CF_SERVICE_USER}}
cf_password: ${{secrets.CF_SERVICE_AUTH}}
- name: monitor task output
if: ${{ matrix.monitor && github.event.schedule == matrix.schedule }}
uses: cloud-gov/cg-cli-tools@main
with:
command: >
tools/monitor_cf_logs.sh ${{ matrix.app }} $INSTANCE_NAME
cf_org: gsa-datagov
cf_space: ${{ matrix.environ }}
cf_username: ${{secrets.CF_SERVICE_USER}}
cf_password: ${{secrets.CF_SERVICE_AUTH}}
- name: check runtime
run: |
END=`date +%s`
ELAPSED=$(( END - START))
echo "$START, $END, $ELAPSED"
if [[ $ELAPSED > ${{ matrix.error_seconds }} ]]; then echo ERROR=true | tee -a $GITHUB_ENV; fi;
if [[ $ELAPSED > ${{ matrix.error_seconds }} ]]; then echo "ELAPSED=${ELAPSED}" | tee -a $GITHUB_ENV; fi;
- name: Create Issue for auditing 📑 or failure 😢
if: ${{ failure() || (env.ERROR == true) || (matrix.info_issue && github.event.schedule == matrix.schedule) }}
uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.ADD_TO_PROJECT_PAT }}
GITHUB_JOB: ${{ toJson(github)['job'] }}
GITHUB_ATTEMPTS: ${{ github.run_attempt }}
LAST_COMMIT: ${{ github.sha }}
LAST_RUN_BY: ${{ github.actor }}
RUN_ID: ${{ github.run_id }}
COMMAND: ${{ matrix.command }}
SCHEDULE: ${{ matrix.schedule }}
ENVIRONMENT: ${{ matrix.environ }}
EXEC_TIME: ${{ env.ELAPSED }}
with:
filename: ${{ matrix.issue_template }}
update_existing: true