-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat : adding GH action to automatically fetch feeds sent by providers (
#304) * First commit * Create add_new_or_updated_feeds.yml
- Loading branch information
1 parent
07b58e2
commit 83dbf1e
Showing
2 changed files
with
315 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
name: Add new or updated feeds from Google Sheets/Form | ||
|
||
on: | ||
schedule: | ||
- cron: '0 4 * * *' # At 00:00 ETC every day | ||
|
||
env: | ||
DATE_FORMAT: "[0-9]{1,2}/[0-9]{1,2}/[0-9]{4}|[0-9]{4}-[0-9]{2}-[0-9]{2}" # this is the format we need to compare dates between the CSV and the local system. | ||
DATE_FORMAT_DESIRED: "MM/dd/yyyy" | ||
|
||
USERNAME: "github-actions[bot]" # GitHub username that will create the PR | ||
USERNAME_EMAIL: "41898282+github-actions[bot]@users.noreply.github.com" | ||
|
||
ORGANIZATION: MobilityData # organization name | ||
REPO_NAME: mobity-database-catalogs # repository name | ||
BASE: "main" | ||
|
||
REVIEWERS_JSON: "[\"emmambd\"]" # List of GitHub usernames of the reviewers, in a JSON array : ["username1", "username2"] | ||
|
||
GTFS_SCHEDULE_CATALOG_PATH_FROM_ROOT: "catalogs/sources/gtfs/schedule/" | ||
GTFS_REALTIME_CATALOG_PATH_FROM_ROOT: "catalogs/sources/gtfs/realtime/" | ||
|
||
jobs: | ||
add-new-updated-feeds: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Setup global variables | ||
id: global_vars | ||
run: | | ||
echo "TODAYS_DATE=$(date +%m/%d/%Y)" >> $GITHUB_ENV # Ex.: 07/27/2023 | ||
echo "TODAYS_DAY=$(date '+%d')" >> $GITHUB_ENV # Ex.: 27 | ||
echo "TODAYS_MONTH=$(date '+%m')" >> $GITHUB_ENV # Ex.: 07 | ||
echo "TODAYS_YEAR=$(date '+%Y')" >> $GITHUB_ENV # Ex.: 2023 | ||
- name: Create branch name | ||
id: create_branch_name | ||
run: | | ||
echo "BRANCH=${{ env.TODAYS_YEAR }}-${{ env.TODAYS_MONTH }}-${{ env.TODAYS_DAY }}" >> $GITHUB_OUTPUT # Branch name | ||
- name: Load secrets from 1Password | ||
id: onepw_secrets | ||
uses: 1password/[email protected] | ||
with: | ||
export-env: true # Export loaded secrets as environment variables | ||
env: | ||
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} | ||
CREDENTIALS: "op://rbiv7rvkkrsdlpcrz3bmv7nmcu/ifkeehu5gzi7wy5ub5qvwkaire/credential" | ||
|
||
# - name: Get swift version # just for verification, can leave commented out | ||
# run: swift --version | ||
# Swift is installed by default on github runners, tested with v 5.8.1 | ||
|
||
- name: Checkout repo | ||
id: checkout_repo | ||
uses: actions/checkout@v4 | ||
with: | ||
ref: ${{ env.BASE }} | ||
fetch-depth: 0 | ||
token: ${{ env.CREDENTIALS }} | ||
|
||
- name: Create new branch | ||
shell: bash | ||
run: | | ||
git checkout -b ${{ steps.create_branch_name.outputs.BRANCH }} | ||
git reset --hard ${{ env.BASE }} | ||
- name: Download CSV and process each lines | ||
id: process-csv | ||
run: | | ||
cd ${{ github.workspace }}/scripts | ||
OUTPUT=$(swift process_csv_in_github_action.swift "${{ secrets.CSV_URL }}" "${{ env.DATE_FORMAT }}" "${{ env.DATE_FORMAT_DESIRED }}") | ||
echo "PYTHON_SCRIPT_ARGS=${OUTPUT}" >> $GITHUB_OUTPUT | ||
- name: Setup Python | ||
if: steps.process-csv.outputs.PYTHON_SCRIPT_ARGS != '' | ||
uses: actions/[email protected] | ||
with: | ||
python-version: '3.11' # install the python version needed | ||
|
||
- name: Create + activate a Python virtual env & run script | ||
if: steps.process-csv.outputs.PYTHON_SCRIPT_ARGS != '' | ||
env: | ||
PYTHONPATH: ${{ github.workspace }}/tools | ||
PYTHONIOENCODING: "utf8" #ascii | ||
shell: bash | ||
run: | | ||
python -m venv env | ||
source env/bin/activate | ||
pip install virtualenv --quiet | ||
pip install gtfs_kit --quiet | ||
pip install unidecode --quiet | ||
# We use § as the separator because for an unknown reason a newline doesn't work. | ||
sections=$(echo '${{ steps.process-csv.outputs.PYTHON_SCRIPT_ARGS }}' | awk -F'§' '{for (i=1; i<=NF; i++) print $i}') | ||
for section in "${sections[@]}"; do | ||
eval "python -c 'from tools.operations import *; ${section}'" | ||
done | ||
- name: Commit & push | ||
if: steps.process-csv.outputs.PYTHON_SCRIPT_ARGS != '' | ||
uses: EndBug/[email protected] | ||
with: | ||
github_token: ${{ env.CREDENTIALS }} | ||
new_branch: ${{ steps.create_branch_name.outputs.BRANCH }} | ||
author_name: ${{ env.USERNAME }} | ||
author_email: ${{ env.USERNAME_EMAIL }} | ||
committer_name: ${{ env.USERNAME }} | ||
committer_email: ${{ env.USERNAME_EMAIL }} | ||
message: "Automated commit — New/Updated feed" | ||
|
||
# - name: Create Pull Request | ||
# if: steps.process-csv.outputs.PYTHON_SCRIPT_ARGS != '' | ||
# uses: peter-evans/[email protected] | ||
# with: | ||
# token: ${{ env.CREDENTIALS }} | ||
# title: "Automated commit — New/Updated feed" | ||
# commit-message: "Automated commit — New/Updated feed" | ||
# body: "New feed(s) were found, and added as a PR for you to review." | ||
# author: "${{ env.USERNAME }} <${{ env.USERNAME_EMAIL }}>" | ||
# reviewers: ${{ env.REVIEWERS_JSON }} | ||
# branch: ${{ steps.create_branch_name.outputs.BRANCH }} | ||
# base: ${{ env.BASE }} | ||
# add-paths: | | ||
# ${{ env.GTFS_SCHEDULE_CATALOG_PATH_FROM_ROOT }} | ||
# ${{ env.GTFS_REALTIME_CATALOG_PATH_FROM_ROOT }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
import Foundation | ||
#if canImport(FoundationNetworking) | ||
import FoundationNetworking | ||
#endif | ||
|
||
enum column : Int, CaseIterable { | ||
case timestamp = 0 | ||
case provider = 1 | ||
case regioncity = 2 | ||
case currenturl = 3 | ||
case updatednewsourceurl = 4 | ||
case datatype1 = 5 | ||
case request = 6 | ||
case downloadurl = 7 | ||
case country = 8 | ||
case subdivision_name = 9 | ||
case municipality = 10 | ||
case name = 11 | ||
case yournameorg = 12 | ||
case license_url = 13 | ||
case tripupdatesurl = 14 | ||
case servicealertsurl = 15 | ||
case genunknownrturl = 16 | ||
case authentication_type = 17 | ||
case authentication_info_url = 18 | ||
case api_key_parameter_name = 19 | ||
case note = 20 | ||
case gtfsschedulefeatures = 21 | ||
case gtfsschedulestatus = 22 | ||
case gtfsrealtimestatus = 23 | ||
case youremail = 24 | ||
case dataproduceremail = 25 | ||
case realtimefeatures = 26 | ||
case isocountrycode = 27 | ||
case feedupdatestatus = 28 | ||
} | ||
|
||
enum defaults: String { | ||
case date = "01/01/1970" | ||
} | ||
|
||
enum requestType: String { | ||
case isAddNewFeed = "New source" | ||
case isUpdateExistingFeed = "Source update" | ||
case isToRemoveFeed = "removed" | ||
} | ||
|
||
enum dataType: String { | ||
case schedule = "Schedule" | ||
case realtime = "Realtime" | ||
} | ||
|
||
let arguments : [String] = CommandLine.arguments | ||
|
||
if CommandLine.argc == 4 { | ||
|
||
let csvLineSeparator : String = "\n" | ||
let csvColumnSeparator : String = "," | ||
|
||
let csvURLStringArg : String = arguments[1] // the first argument [0] is the name of the script, we can ignore in this context. | ||
let dateFormatGREPArg : String = arguments[2] | ||
let dateFormatDesiredArg : String = arguments[3] | ||
|
||
guard let csvURLasURL : URL = URL(string: csvURLStringArg) else { | ||
print("\n ERROR: The specified URL does not appear to exist :\n \(csvURLStringArg)\n") | ||
exit(1) | ||
} | ||
|
||
let dateFormatter : DateFormatter = DateFormatter(); let today : Date = Date() | ||
dateFormatter.dateFormat = dateFormatDesiredArg | ||
let todayDate : String = dateFormatter.string(from: today) // Ex.: 07/27/2023 | ||
|
||
let csvData : String = try String(contentsOf: csvURLasURL, encoding:.utf8) | ||
|
||
let csvLines : [String] = csvData.components(separatedBy: csvLineSeparator) ; var csvArray : [[String]] = [[]] | ||
for currentLine : String in csvLines { | ||
csvArray.append(currentLine.components(separatedBy: csvColumnSeparator)) | ||
} | ||
|
||
var PYTHON_SCRIPT_OUTPUT : String = "" | ||
let dateFormatAsRegex : Regex<AnyRegexOutput> = try Regex(dateFormatGREPArg) | ||
|
||
for line : [String] in csvArray { | ||
|
||
var PYTHON_SCRIPT_ARGS_TEMP : String = "" | ||
|
||
if line.count >= column.allCases.count { | ||
|
||
let timestamp : String = line[column.timestamp.rawValue].trimmingCharacters(in: .whitespacesAndNewlines) | ||
let provider : String = line[column.provider.rawValue] | ||
let datatype1 : String = line[column.datatype1.rawValue] | ||
let request : String = line[column.request.rawValue] | ||
let country : String = line[column.country.rawValue] | ||
let subdivision_name : String = line[column.subdivision_name.rawValue] | ||
let municipality : String = line[column.municipality.rawValue] | ||
let name : String = line[column.name.rawValue] | ||
let license_url : String = line[column.license_url.rawValue] | ||
let downloadURL : String = line[column.downloadurl.rawValue] | ||
let authentication_type : String = line[column.authentication_type.rawValue] | ||
let authentication_info_url : String = line[column.authentication_info_url.rawValue] | ||
let api_key_parameter_name : String = line[column.api_key_parameter_name.rawValue] | ||
let note : String = line[column.note.rawValue] | ||
let gtfsschedulefeatures : String = line[column.gtfsschedulefeatures.rawValue] | ||
let gtfsschedulestatus : String = line[column.gtfsschedulestatus.rawValue] | ||
let gtfsrealtimestatus : String = line[column.gtfsrealtimestatus.rawValue] | ||
let realtimefeatures : String = line[column.realtimefeatures.rawValue] | ||
|
||
let dateFromCurrentLine : String = extractDate(from: timestamp, usingGREP: dateFormatAsRegex, desiredDateFormat: dateFormatDesiredArg) | ||
|
||
if dateFromCurrentLine == todayDate { // ...the row has been added today, process it. | ||
|
||
if request.contains(requestType.isAddNewFeed.rawValue) { // add new feed | ||
|
||
if datatype1.contains(dataType.schedule.rawValue) { // add_gtfs_schedule_source | ||
|
||
PYTHON_SCRIPT_ARGS_TEMP = "add_gtfs_schedule_source(provider=\(provider), country_code=\(country), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), subdivision_name=\(subdivision_name), municipality=\(municipality), license_url=\(license_url), name=\(name), status=\(gtfsschedulestatus), features=\(gtfsschedulefeatures))" | ||
|
||
} else if datatype1.contains(dataType.realtime.rawValue) { // add_gtfs_realtime_source | ||
// Emma: entity_type matches the realtime Data type options of Vehicle Positions, Trip Updates, or Service Alerts. If one of those three are selected, add it. If not, omit it. | ||
|
||
PYTHON_SCRIPT_ARGS_TEMP = "add_gtfs_realtime_source(entity_type=\(datatype1), provider=\(provider), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), name=\(name), static_reference=\"TO_BE_PROVIDED\", note=\(note), status=\(gtfsrealtimestatus), features=\(realtimefeatures))" | ||
|
||
} | ||
|
||
} else if request.contains(requestType.isUpdateExistingFeed.rawValue) { // update existing feed | ||
|
||
if datatype1.contains(dataType.schedule.rawValue) { // update_gtfs_schedule_source | ||
|
||
PYTHON_SCRIPT_ARGS_TEMP = "update_gtfs_schedule_source(mdb_source_id=, provider=\(provider), name=\(name), country_code=\(country), subdivision_name=\(subdivision_name), municipality=\(municipality), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), status=\(gtfsschedulestatus), features=\(gtfsschedulefeatures))" | ||
|
||
} else if datatype1.contains(dataType.realtime.rawValue) { // update_gtfs_realtime_source | ||
|
||
PYTHON_SCRIPT_ARGS_TEMP = "update_gtfs_realtime_source(mdb_source_id=, entity_type=\(datatype1), provider=\(provider), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), name=\(name), static_reference=\"TO_BE_PROVIDED\", note=\(note), status=\(gtfsrealtimestatus), features=\(realtimefeatures))" | ||
} | ||
|
||
} else if request.contains(requestType.isToRemoveFeed.rawValue) { // remove feed | ||
|
||
if datatype1.contains(dataType.schedule.rawValue) { // update_gtfs_schedule_source | ||
|
||
PYTHON_SCRIPT_ARGS_TEMP = "update_gtfs_schedule_source(mdb_source_id=, provider=\(provider), name=\"**** Requested for removal ****\", country_code=\(country), subdivision_name=\(subdivision_name), municipality=\(municipality), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), status=\(gtfsschedulestatus), features=\(gtfsschedulefeatures))" | ||
|
||
} else if datatype1.contains(dataType.realtime.rawValue) { // update_gtfs_realtime_source | ||
|
||
PYTHON_SCRIPT_ARGS_TEMP = "update_gtfs_realtime_source(mdb_source_id=, entity_type=\(datatype1), provider=\(provider), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), license_url=\(license_url), name=\"**** Requested for removal ****\", static_reference=\"TO_BE_PROVIDED\", note=\(note), status=\(gtfsrealtimestatus), features=\(realtimefeatures))" | ||
|
||
} | ||
|
||
} else { // ... assume this is a new feed by default :: add_gtfs_schedule_source | ||
|
||
PYTHON_SCRIPT_ARGS_TEMP = "add_gtfs_schedule_source(provider=\(provider), country_code=\(country), direct_download_url=\(downloadURL), authentication_type=\(authentication_type), authentication_info_url=\(authentication_info_url), api_key_parameter_name=\(api_key_parameter_name), subdivision_name=\(subdivision_name), municipality=\(municipality), license_url=\(license_url), name=\(name), status=\(gtfsschedulestatus), features=\(gtfsschedulefeatures))" | ||
|
||
} | ||
|
||
} | ||
|
||
} // END of the row has been added today, process it. | ||
|
||
if PYTHON_SCRIPT_ARGS_TEMP.count > 0 { PYTHON_SCRIPT_OUTPUT = ( PYTHON_SCRIPT_OUTPUT + "§" + PYTHON_SCRIPT_ARGS_TEMP ) } | ||
|
||
} // END FOR LOOP | ||
|
||
// return final output so the action can grab it and pass it on to the Python script. | ||
print(PYTHON_SCRIPT_OUTPUT.dropFirst()) | ||
|
||
} else { | ||
print("Incorrect number of arguments provided to the script. Expected 3: a string with the URL, a date format and the date format desired.") | ||
exit(1) | ||
} | ||
|
||
// MARK: - FUNCTIONS | ||
|
||
func extractDate(from theDateToConvert: String, usingGREP dateFormatAsGREP: Regex<AnyRegexOutput>, desiredDateFormat desiredFormat: String) -> String { | ||
if let match : Regex<Regex<AnyRegexOutput>.RegexOutput>.Match = theDateToConvert.firstMatch(of: dateFormatAsGREP) { | ||
// find first match | ||
let matchOutput : String = String(match.output[0].substring!) | ||
|
||
// date formatter and find date | ||
let dateFormatter : DateFormatter = DateFormatter() | ||
dateFormatter.dateFormat = desiredFormat | ||
let date : Date? = dateFormatter.date(from: matchOutput) | ||
|
||
// default date if formatter fails, otherwise return correctly formatted date | ||
var returnDate : String = defaults.date.rawValue | ||
if date != nil { returnDate = dateFormatter.string(from: date!) } | ||
return returnDate | ||
} | ||
|
||
// return default date | ||
return defaults.date.rawValue | ||
} |