diff --git a/hub_utils/main.py b/hub_utils/main.py index 340390a..f1ad88e 100644 --- a/hub_utils/main.py +++ b/hub_utils/main.py @@ -332,6 +332,7 @@ def download_metadata( local_path: str, variant_path_list: str = None, all_sdk: bool = True, + ignore_list_str: str = "", ): """ NOTE: USED FOR @@ -340,6 +341,7 @@ def download_metadata( """ util = Utilities() s3 = S3() + ignore_list = ignore_list_str.split(",") if not variant_path_list: variant_path_list = ",".join(SDK_SUFFIX_LIST) if all_sdk: @@ -347,9 +349,12 @@ def download_metadata( [ i["plugin-name"].split(".yml")[0] for i in util.get_variant_names(None, "sdk") + if i["plugin-name"].split(".yml")[0] not in ignore_list ] ) for yaml_file in variant_path_list.split(","): + if not yaml_file: + continue suffix = util.get_suffix(yaml_file) local_file_path = f"{local_path}/{suffix}.json" s3.download_latest(os.environ.get("AWS_S3_BUCKET"), suffix, local_file_path) diff --git a/tests/_data/meltano/extractors/tap-github/meltanolabs.yml b/tests/_data/meltano/extractors/tap-github/meltanolabs.yml new file mode 100644 index 0000000..3503563 --- /dev/null +++ b/tests/_data/meltano/extractors/tap-github/meltanolabs.yml @@ -0,0 +1,127 @@ +capabilities: +- about +- batch +- catalog +- discover +- schema-flattening +- state +- stream-maps +description: Code hosting platform +domain_url: https://docs.github.com/en/rest +keywords: +- api +- free service +- meltano_sdk +label: GitHub +logo_url: /assets/logos/extractors/github.png +maintenance_status: active +name: tap-github +namespace: tap_github +pip_url: git+https://github.com/MeltanoLabs/tap-github.git +quality: gold +repo: https://github.com/MeltanoLabs/tap-github +select: +- '*.*' +- '!traffic_*.*' +settings: +- description: List of GitHub tokens to authenticate with. Streams will loop through + them when hitting rate limits. + kind: array + label: Additional Auth Tokens + name: additional_auth_tokens +- description: GitHub token to authenticate with. + kind: password + label: Auth Token + name: auth_token +- description: Compression format to use for batch files. + kind: options + label: Batch Config Encoding Compression + name: batch_config.encoding.compression + options: + - label: Gzip + value: gzip + - label: None + value: none +- description: Format to use for batch files. + kind: options + label: Batch Config Encoding Format + name: batch_config.encoding.format + options: + - label: Jsonl + value: jsonl +- description: Prefix to use when writing batch files. + kind: string + label: Batch Config Storage Prefix + name: batch_config.storage.prefix +- description: Root path to use when writing batch files. + kind: string + label: Batch Config Storage Root + name: batch_config.storage.root +- description: "'True' to enable schema flattening and automatically expand nested + properties." + kind: boolean + label: Flattening Enabled + name: flattening_enabled +- description: The max depth to flatten schemas. + kind: integer + label: Flattening Max Depth + name: flattening_max_depth +- description: The log level of the API response metrics. + kind: string + label: Metrics Log Level + name: metrics_log_level +- description: An array of strings containing the github organizations to be included + kind: array + label: Organizations + name: organizations +- description: Add a buffer to avoid consuming all query points for the token at hand. + Defaults to 1000. + kind: integer + label: Rate Limit Buffer + name: rate_limit_buffer +- description: An array of strings containing the github repos to be included + kind: array + label: Repositories + name: repositories +- description: An array of search descriptor objects with the following properties. + "name" - a human readable name for the search query. "query" - a github search + string (generally the same as would come after ?q= in the URL) + kind: array + label: Searches + name: searches +- description: Set to true to skip API calls for the parent streams (such as repositories) + if it is not selected but children are + kind: boolean + label: Skip Parent Streams + name: skip_parent_streams +- description: '' + kind: date_iso8601 + label: Start Date + name: start_date +- description: '' + kind: object + label: Stream Map Config + name: stream_map_config +- description: '' + kind: object + label: Stream Maps + name: stream_maps +- description: '' + kind: string + label: User Agent + name: user_agent +- description: A list of GitHub user ids. + kind: array + label: User IDs + name: user_ids +- description: A list of GithHub usernames. + kind: array + label: User Usernames + name: user_usernames +settings_group_validation: +- - repositories +- - organizations +- - searches +- - user_usernames +- - user_ids +variant: meltanolabs diff --git a/tests/_data/meltano/extractors/tap-hubspot/hotgluexyz.yml b/tests/_data/meltano/extractors/tap-hubspot/hotgluexyz.yml new file mode 100644 index 0000000..faaeb95 --- /dev/null +++ b/tests/_data/meltano/extractors/tap-hubspot/hotgluexyz.yml @@ -0,0 +1,57 @@ +capabilities: +- about +- discover +- catalog +- state +- stream-maps +- schema-flattening +description: Inbound Marketing software +domain_url: https://developers.hubspot.com/docs/api/overview +executable: tap-hubspot-beta +keywords: +- api +- meltano_sdk +label: Hubspot +logo_url: /assets/logos/extractors/hubspot.png +maintenance_status: active +name: tap-hubspot +namespace: tap_hubspot +pip_url: git+https://gitlab.com/hotglue/tap-hubspot-beta.git +quality: gold +repo: https://gitlab.com/hotglue/tap-hubspot-beta +settings: +- description: HubSpot Access token. See the Hubspot + docs if you need help finding this token. + kind: password + label: Access Token + name: access_token +- description: The client ID used for authentication. + documentation: https://developers.hubspot.com/docs/api/working-with-oauth + label: Client ID + name: client_id +- description: The client secret used for authentication. + kind: password + label: Client Secret + name: client_secret +- description: This is the URL that the user will be redirected to after they authorize + your app for the requested scopes + documentation: https://developers.hubspot.com/docs/api/working-with-oauth + label: Redirect URI + name: redirect_uri +- description: This is the refresh token provided by HubSpot. + kind: password + label: Refresh Token + name: refresh_token +- description: The seconds until the token expires. + kind: integer + label: Expires In + name: expires_in +- description: The time to start syncing data from if no existing state is found. + label: Start Date + name: start_date +settings_group_validation: +- - client_id + - client_secret + - redirect_uri + - refresh_token +variant: hotgluexyz diff --git a/tests/_data/meltano/extractors/tap-hubspot/meltanolabs.yml b/tests/_data/meltano/extractors/tap-hubspot/meltanolabs.yml new file mode 100644 index 0000000..1572a43 --- /dev/null +++ b/tests/_data/meltano/extractors/tap-hubspot/meltanolabs.yml @@ -0,0 +1,82 @@ +capabilities: +- about +- batch +- catalog +- discover +- schema-flattening +- state +- stream-maps +description: Inbound Marketing software +domain_url: https://developers.hubspot.com/docs/api/overview +executable: tap-hubspot +keywords: +- meltano_sdk +label: Hubspot +logo_url: /assets/logos/extractors/hubspot.png +maintenance_status: active +name: tap-hubspot +namespace: tap_hubspot +next_steps: '' +pip_url: git+https://github.com/MeltanoLabs/tap-hubspot.git +quality: gold +repo: https://github.com/MeltanoLabs/tap-hubspot +settings: +- description: Token to authenticate against the API service + kind: password + label: Access Token + name: access_token +- description: Compression format to use for batch files. + kind: options + label: Batch Config Encoding Compression + name: batch_config.encoding.compression + options: + - label: Gzip + value: gzip + - label: None + value: none +- description: Format to use for batch files. + kind: options + label: Batch Config Encoding Format + name: batch_config.encoding.format + options: + - label: Jsonl + value: jsonl +- description: Prefix to use when writing batch files. + kind: string + label: Batch Config Storage Prefix + name: batch_config.storage.prefix +- description: Root path to use when writing batch files. + kind: string + label: Batch Config Storage Root + name: batch_config.storage.root +- description: Latest record date to sync + kind: date_iso8601 + label: End Date + name: end_date +- description: "'True' to enable schema flattening and automatically expand nested + properties." + kind: boolean + label: Flattening Enabled + name: flattening_enabled +- description: The max depth to flatten schemas. + kind: integer + label: Flattening Max Depth + name: flattening_max_depth +- description: Earliest record date to sync + kind: date_iso8601 + label: Start Date + name: start_date +- description: User-defined config values to be used within map expressions. + kind: object + label: Stream Map Config + name: stream_map_config +- description: Config object for stream maps capability. For more information check + out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html). + kind: object + label: Stream Maps + name: stream_maps +settings_group_validation: +- - access_token +settings_preamble: '' +usage: '' +variant: meltanolabs diff --git a/tests/test_main.py b/tests/test_main.py index e70ecf6..f915630 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -25,6 +25,34 @@ def test_download_metadata(patch): f"{local_path}/extractors/tap-csv/meltanolabs.json" ) +@patch.object(S3, "download_latest") +def test_download_metadata_ignore(patch): + + expected_bucket = "TEST_BUCKET" + os.environ["AWS_S3_BUCKET"] = expected_bucket + local_path = f"{PATH}/data/output_path" + hub_yml_path = f"{PATH}/data/hub_data/_data/extractors/tap-csv/meltanolabs.yml" + os.environ["HUB_ROOT_PATH"] = f"./tests/" + download_metadata( + local_path, + # variant_path_list=hub_yml_path, + all_sdk=True, + ignore_list_str="extractors/tap-hubspot/hotgluexyz" + ) + assert patch.call_count == 2 + patch.assert_has_calls([ + call( + expected_bucket, + "extractors/tap-hubspot/meltanolabs", + f"{local_path}/extractors/tap-hubspot/meltanolabs.json" + ), + call( + expected_bucket, + "extractors/tap-github/meltanolabs", + f"{local_path}/extractors/tap-github/meltanolabs.json" + ) + ]) + @patch.object(S3, "download_latest") def test_download_metadata_list(patch):