Skip to content

Commit

Permalink
Replace underscore with dash in source_name (#130)
Browse files Browse the repository at this point in the history
* Replace underscore with dash in source_name

In both the new and old version of `Kildomaten` we sanitize source names by replacing underscores with dashes, see:
- https://github.com/statisticsnorway/terraform-dapla-source-data-processor/blob/c38815d77401470ca26c576316f02a511a11fe8d/main.tf#L30
- https://github.com/statisticsnorway/terraform-dapla-source-data-automation/blob/f268e4ec03dbd4c6d250348e12a1f7c8d8d7daed/main.tf#L31

Users should not have to concern themselves with this impl detail ,so this PR abstract it away by doing the same sanitization here.

* Fix import sorting error introduced by commit hooks

* Add function that extracts the environment name from project id

* Explicitly type the dictionary

And update `Kuben` test case with valid project id
  • Loading branch information
Andilun authored Mar 12, 2024
1 parent 4ec50a5 commit 7a7fb71
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dapla-toolbelt"
version = "2.0.9"
version = "2.0.10"
description = "Dapla Toolbelt"
authors = ["Dapla Developers <[email protected]>"]
license = "MIT"
Expand Down
28 changes: 27 additions & 1 deletion src/dapla/pubsub.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,28 @@ def _extract_project_name(project_id: str) -> str:
)


def _extract_env(project_id: str) -> t.Literal["test", "prod"]:
"""Extracts the environment name from a GCP `Kuben` project ID.
This function assumes the project ID follows the `Kuben` project naming convention,
if it does the character before the last hyphen will represent the environment.
Args:
project_id (str): The project ID of a GCP `Kuben` project.
Returns:
t.Literal['test', 'prod']: the environment name.
Raises:
ValueError: If the project ID does not follow the `Kuben` format
"""
char_to_env_map: dict[str, t.Literal["test", "prod"]] = {"t": "test", "p": "prod"}
env_char = project_id.split("-")[-2]
if env_char not in char_to_env_map.keys():
raise ValueError("Invalid project id")
return char_to_env_map[env_char]


def trigger_source_data_processing(
project_id: str, source_name: str, folder_prefix: str, kuben: bool = False
) -> None:
Expand All @@ -172,10 +194,14 @@ def trigger_source_data_processing(
project_name = _extract_project_name(project_id)

if kuben:
bucket_id = f"ssb-{project_name.rsplit('-', 1)[0]}-data-kilde-test"
env = _extract_env(project_id)
bucket_id = f"ssb-{project_name.rsplit('-', 1)[0]}-data-kilde-{env}"
else:
bucket_id = f"ssb-{project_name}-data-kilde"

# GCP resources for `Kildomaten` are created with dash as seperator instead of underscore
source_name = source_name.replace("_", "-")

_publish_gcs_objects_to_pubsub(
project_id, bucket_id, folder_prefix, topic_id=f"update-{source_name}"
)
19 changes: 17 additions & 2 deletions tests/test_pubsub.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from google.cloud import pubsub_v1

import dapla.pubsub
from dapla.pubsub import _extract_env
from dapla.pubsub import _extract_project_name
from dapla.pubsub import _generate_pubsub_data
from dapla.pubsub import _get_callback
Expand Down Expand Up @@ -83,7 +84,7 @@ def test_trigger_source_data_processing(
def test_trigger_source_data_processing_kuben(
self, mock_publish_gcs_objects_to_pubsub: Mock
) -> None:
kuben_project_id = "my-team-t-jhdfb"
kuben_project_id = "dapla-kildomaten-p-zz"

dapla.trigger_source_data_processing(
kuben_project_id, self.source_folder_name, self.folder_prefix, True
Expand All @@ -94,7 +95,7 @@ def test_trigger_source_data_processing_kuben(
# Check that _publish_gcs_objects_to_pubsub has been called with expected parameters
mock_publish_gcs_objects_to_pubsub.assert_called_with(
kuben_project_id,
"ssb-my-team-data-kilde-test",
"ssb-dapla-kildomaten-data-kilde-prod",
self.folder_prefix,
topic_id=self.topic_id,
)
Expand Down Expand Up @@ -123,3 +124,17 @@ def test_extract_project_name(project_id: str, expected_project_name: str) -> No
def test_invalid_project_id(invalid_project_id: str) -> None:
with pytest.raises(ValueError):
_extract_project_name(invalid_project_id)


@pytest.mark.parametrize(
"project_id, expected_project_id",
[("dapla-kildomaten-p-zz", "prod"), ("dapla-t-zz", "test")],
)
def test_extract_env(project_id: str, expected_project_id: str) -> None:
assert _extract_env(project_id) == expected_project_id


def test_extract_env_invalid_project() -> None:
project_id = "dapla-kildomaten-p"
with pytest.raises(ValueError):
_extract_env(project_id)

0 comments on commit 7a7fb71

Please sign in to comment.