Skip to content

Commit

Permalink
feat : adding GH action to automatically fetch feeds sent by providers (
Browse files Browse the repository at this point in the history
#304)

* First commit

* Create add_new_or_updated_feeds.yml
  • Loading branch information
fredericsimard authored Oct 3, 2023
1 parent 07b58e2 commit 83dbf1e
Show file tree
Hide file tree
Showing 2 changed files with 315 additions and 0 deletions.
125 changes: 125 additions & 0 deletions .github/workflows/add_new_or_updated_feeds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
name: Add new or updated feeds from Google Sheets/Form

on:
schedule:
- cron: '0 4 * * *' # At 00:00 ETC every day

env:
DATE_FORMAT: "[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}|[0-9]{4}-[0-9]{2}-[0-9]{2}" # this is the format we need to compare dates between the CSV and the local system.
DATE_FORMAT_DESIRED: "MM/dd/yyyy"

USERNAME: "github-actions[bot]" # GitHub username that will create the PR
USERNAME_EMAIL: "41898282+github-actions[bot]@users.noreply.github.com"

ORGANIZATION: MobilityData # organization name
REPO_NAME: mobity-database-catalogs # repository name
BASE: "main"

REVIEWERS_JSON: "[\"emmambd\"]" # List of GitHub usernames of the reviewers, in a JSON array : ["username1", "username2"]

GTFS_SCHEDULE_CATALOG_PATH_FROM_ROOT: "catalogs/sources/gtfs/schedule/"
GTFS_REALTIME_CATALOG_PATH_FROM_ROOT: "catalogs/sources/gtfs/realtime/"

jobs:
add-new-updated-feeds:
runs-on: ubuntu-latest
steps:
- name: Setup global variables
id: global_vars
run: |
echo "TODAYS_DATE=$(date +%m/%d/%Y)" >> $GITHUB_ENV # Ex.: 07/27/2023
echo "TODAYS_DAY=$(date '+%d')" >> $GITHUB_ENV # Ex.: 27
echo "TODAYS_MONTH=$(date '+%m')" >> $GITHUB_ENV # Ex.: 07
echo "TODAYS_YEAR=$(date '+%Y')" >> $GITHUB_ENV # Ex.: 2023
- name: Create branch name
id: create_branch_name
run: |
echo "BRANCH=${{ env.TODAYS_YEAR }}-${{ env.TODAYS_MONTH }}-${{ env.TODAYS_DAY }}" >> $GITHUB_OUTPUT # Branch name
- name: Load secrets from 1Password
id: onepw_secrets
uses: 1password/[email protected]
with:
export-env: true # Export loaded secrets as environment variables
env:
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
CREDENTIALS: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/ifkeehu5gzi7wy5ub5qvwkaire/credential"

# - name: Get swift version # just for verification, can leave commented out
# run: swift --version
# Swift is installed by default on github runners, tested with v 5.8.1

- name: Checkout repo
id: checkout_repo
uses: actions/checkout@v4
with:
ref: ${{ env.BASE }}
fetch-depth: 0
token: ${{ env.CREDENTIALS }}

- name: Create new branch
shell: bash
run: |
git checkout -b ${{ steps.create_branch_name.outputs.BRANCH }}
git reset --hard ${{ env.BASE }}
- name: Download CSV and process each lines
id: process-csv
run: |
cd ${{ github.workspace }}/scripts
OUTPUT=$(swift process_csv_in_github_action.swift "${{ secrets.CSV_URL }}" "${{ env.DATE_FORMAT }}" "${{ env.DATE_FORMAT_DESIRED }}")
echo "PYTHON_SCRIPT_ARGS=${OUTPUT}" >> $GITHUB_OUTPUT
- name: Setup Python
if: steps.process-csv.outputs.PYTHON_SCRIPT_ARGS != ''
uses: actions/[email protected]
with:
python-version: '3.11' # install the python version needed

- name: Create + activate a Python virtual env & run script
if: steps.process-csv.outputs.PYTHON_SCRIPT_ARGS != ''
env:
PYTHONPATH: ${{ github.workspace }}/tools
PYTHONIOENCODING: "utf8" #ascii
shell: bash
run: |
python -m venv env
source env/bin/activate
pip install virtualenv --quiet
pip install gtfs_kit --quiet
pip install unidecode --quiet
# We use § as the separator because for an unknown reason a newline doesn't work.
sections=$(echo '${{ steps.process-csv.outputs.PYTHON_SCRIPT_ARGS }}' | awk -F'§' '{for (i=1; i<=NF; i++) print $i}')
for section in "${sections[@]}"; do
eval "python -c 'from tools.operations import *; ${section}'"
done
- name: Commit & push
if: steps.process-csv.outputs.PYTHON_SCRIPT_ARGS != ''
uses: EndBug/[email protected]
with:
github_token: ${{ env.CREDENTIALS }}
new_branch: ${{ steps.create_branch_name.outputs.BRANCH }}
author_name: ${{ env.USERNAME }}
author_email: ${{ env.USERNAME_EMAIL }}
committer_name: ${{ env.USERNAME }}
committer_email: ${{ env.USERNAME_EMAIL }}
message: "Automated commit — New/Updated feed"

# - name: Create Pull Request
# if: steps.process-csv.outputs.PYTHON_SCRIPT_ARGS != ''
# uses: peter-evans/[email protected]
# with:
# token: ${{ env.CREDENTIALS }}
# title: "Automated commit — New/Updated feed"
# commit-message: "Automated commit — New/Updated feed"
# body: "New feed(s) were found, and added as a PR for you to review."
# author: "${{ env.USERNAME }} <${{ env.USERNAME_EMAIL }}>"
# reviewers: ${{ env.REVIEWERS_JSON }}
# branch: ${{ steps.create_branch_name.outputs.BRANCH }}
# base: ${{ env.BASE }}
# add-paths: |
# ${{ env.GTFS_SCHEDULE_CATALOG_PATH_FROM_ROOT }}
# ${{ env.GTFS_REALTIME_CATALOG_PATH_FROM_ROOT }}
190 changes: 190 additions & 0 deletions scripts/process_csv_in_github_action.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif

enum column : Int, CaseIterable {
case timestamp = 0
case provider = 1
case regioncity = 2
case currenturl = 3
case updatednewsourceurl = 4
case datatype1 = 5
case request = 6
case downloadurl = 7
case country = 8
case subdivision_name = 9
case municipality = 10
case name = 11
case yournameorg = 12
case license_url = 13
case tripupdatesurl = 14
case servicealertsurl = 15
case genunknownrturl = 16
case authentication_type = 17
case authentication_info_url = 18
case api_key_parameter_name = 19
case note = 20
case gtfsschedulefeatures = 21
case gtfsschedulestatus = 22
case gtfsrealtimestatus = 23
case youremail = 24
case dataproduceremail = 25
case realtimefeatures = 26
case isocountrycode = 27
case feedupdatestatus = 28
}

enum defaults: String {
case date = "01/01/1970"
}

enum requestType: String {
case isAddNewFeed = "New source"
case isUpdateExistingFeed = "Source update"
case isToRemoveFeed = "removed"
}

enum dataType: String {
case schedule = "Schedule"
case realtime = "Realtime"
}

let arguments : [String] = CommandLine.arguments

if CommandLine.argc == 4 {

let csvLineSeparator : String = "\n"
let csvColumnSeparator : String = ","

let csvURLStringArg : String = arguments[1] // the first argument [0] is the name of the script, we can ignore in this context.
let dateFormatGREPArg : String = arguments[2]
let dateFormatDesiredArg : String = arguments[3]

guard let csvURLasURL : URL = URL(string: csvURLStringArg) else {
print("\n ERROR: The specified URL does not appear to exist :\n \(csvURLStringArg)\n")
exit(1)
}

let dateFormatter : DateFormatter = DateFormatter(); let today : Date = Date()
dateFormatter.dateFormat = dateFormatDesiredArg
let todayDate : String = dateFormatter.string(from: today) // Ex.: 07/27/2023

let csvData : String = try String(contentsOf: csvURLasURL, encoding:.utf8)

let csvLines : [String] = csvData.components(separatedBy: csvLineSeparator) ; var csvArray : [[String]] = [[]]
for currentLine : String in csvLines {
csvArray.append(currentLine.components(separatedBy: csvColumnSeparator))
}

var PYTHON_SCRIPT_OUTPUT : String = ""
let dateFormatAsRegex : Regex<AnyRegexOutput> = try Regex(dateFormatGREPArg)

for line : [String] in csvArray {

var PYTHON_SCRIPT_ARGS_TEMP : String = ""

if line.count >= column.allCases.count {

let timestamp : String = line[column.timestamp.rawValue].trimmingCharacters(in: .whitespacesAndNewlines)
let provider : String = line[column.provider.rawValue]
let datatype1 : String = line[column.datatype1.rawValue]
let request : String = line[column.request.rawValue]
let country : String = line[column.country.rawValue]
let subdivision_name : String = line[column.subdivision_name.rawValue]
let municipality : String = line[column.municipality.rawValue]
let name : String = line[column.name.rawValue]
let license_url : String = line[column.license_url.rawValue]
let downloadURL : String = line[column.downloadurl.rawValue]
let authentication_type : String = line[column.authentication_type.rawValue]
let authentication_info_url : String = line[column.authentication_info_url.rawValue]
let api_key_parameter_name : String = line[column.api_key_parameter_name.rawValue]
let note : String = line[column.note.rawValue]
let gtfsschedulefeatures : String = line[column.gtfsschedulefeatures.rawValue]
let gtfsschedulestatus : String = line[column.gtfsschedulestatus.rawValue]
let gtfsrealtimestatus : String = line[column.gtfsrealtimestatus.rawValue]
let realtimefeatures : String = line[column.realtimefeatures.rawValue]

let dateFromCurrentLine : String = extractDate(from: timestamp, usingGREP: dateFormatAsRegex, desiredDateFormat: dateFormatDesiredArg)

if dateFromCurrentLine == todayDate { // ...the row has been added today, process it.

if request.contains(requestType.isAddNewFeed.rawValue) { // add new feed

if datatype1.contains(dataType.schedule.rawValue) { // add_gtfs_schedule_source

PYTHON_SCRIPT_ARGS_TEMP = "add_gtfs_schedule_source(provider=\(provider), country_code=\(country), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), subdivision_name=\(subdivision_name), municipality=\(municipality), license_url=\(license_url), name=\(name), status=\(gtfsschedulestatus), features=\(gtfsschedulefeatures))"

} else if datatype1.contains(dataType.realtime.rawValue) { // add_gtfs_realtime_source
// Emma: entity_type matches the realtime Data type options of Vehicle Positions, Trip Updates, or Service Alerts. If one of those three are selected, add it. If not, omit it.

PYTHON_SCRIPT_ARGS_TEMP = "add_gtfs_realtime_source(entity_type=\(datatype1), provider=\(provider), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), name=\(name), static_reference=\"TO_BE_PROVIDED\", note=\(note), status=\(gtfsrealtimestatus), features=\(realtimefeatures))"

}

} else if request.contains(requestType.isUpdateExistingFeed.rawValue) { // update existing feed

if datatype1.contains(dataType.schedule.rawValue) { // update_gtfs_schedule_source

PYTHON_SCRIPT_ARGS_TEMP = "update_gtfs_schedule_source(mdb_source_id=, provider=\(provider), name=\(name), country_code=\(country), subdivision_name=\(subdivision_name), municipality=\(municipality), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), status=\(gtfsschedulestatus), features=\(gtfsschedulefeatures))"

} else if datatype1.contains(dataType.realtime.rawValue) { // update_gtfs_realtime_source

PYTHON_SCRIPT_ARGS_TEMP = "update_gtfs_realtime_source(mdb_source_id=, entity_type=\(datatype1), provider=\(provider), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), name=\(name), static_reference=\"TO_BE_PROVIDED\", note=\(note), status=\(gtfsrealtimestatus), features=\(realtimefeatures))"
}

} else if request.contains(requestType.isToRemoveFeed.rawValue) { // remove feed

if datatype1.contains(dataType.schedule.rawValue) { // update_gtfs_schedule_source

PYTHON_SCRIPT_ARGS_TEMP = "update_gtfs_schedule_source(mdb_source_id=, provider=\(provider), name=\"**** Requested for removal ****\", country_code=\(country), subdivision_name=\(subdivision_name), municipality=\(municipality), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), status=\(gtfsschedulestatus), features=\(gtfsschedulefeatures))"

} else if datatype1.contains(dataType.realtime.rawValue) { // update_gtfs_realtime_source

PYTHON_SCRIPT_ARGS_TEMP = "update_gtfs_realtime_source(mdb_source_id=, entity_type=\(datatype1), provider=\(provider), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), name=\"**** Requested for removal ****\", static_reference=\"TO_BE_PROVIDED\", note=\(note), status=\(gtfsrealtimestatus), features=\(realtimefeatures))"

}

} else { // ... assume this is a new feed by default :: add_gtfs_schedule_source

PYTHON_SCRIPT_ARGS_TEMP = "add_gtfs_schedule_source(provider=\(provider), country_code=\(country), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), subdivision_name=\(subdivision_name), municipality=\(municipality), license_url=\(license_url), name=\(name), status=\(gtfsschedulestatus), features=\(gtfsschedulefeatures))"

}

}

} // END of the row has been added today, process it.

if PYTHON_SCRIPT_ARGS_TEMP.count > 0 { PYTHON_SCRIPT_OUTPUT = ( PYTHON_SCRIPT_OUTPUT + "§" + PYTHON_SCRIPT_ARGS_TEMP ) }

} // END FOR LOOP

// return final output so the action can grab it and pass it on to the Python script.
print(PYTHON_SCRIPT_OUTPUT.dropFirst())

} else {
print("Incorrect number of arguments provided to the script. Expected 3: a string with the URL, a date format and the date format desired.")
exit(1)
}

// MARK: - FUNCTIONS

func extractDate(from theDateToConvert: String, usingGREP dateFormatAsGREP: Regex<AnyRegexOutput>, desiredDateFormat desiredFormat: String) -> String {
if let match : Regex<Regex<AnyRegexOutput>.RegexOutput>.Match = theDateToConvert.firstMatch(of: dateFormatAsGREP) {
// find first match
let matchOutput : String = String(match.output[0].substring!)

// date formatter and find date
let dateFormatter : DateFormatter = DateFormatter()
dateFormatter.dateFormat = desiredFormat
let date : Date? = dateFormatter.date(from: matchOutput)

// default date if formatter fails, otherwise return correctly formatted date
var returnDate : String = defaults.date.rawValue
if date != nil { returnDate = dateFormatter.string(from: date!) }
return returnDate
}

// return default date
return defaults.date.rawValue
}

0 comments on commit 83dbf1e

Please sign in to comment.