Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/artifact from dbt cloud #67

Merged
merged 6 commits into from
Jan 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions dbterd/adapters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dbterd import default
from dbterd.adapters import adapter
from dbterd.adapters.dbt_cloud import DbtCloudArtifact
from dbterd.adapters.dbt_invocation import DbtInvocation
from dbterd.adapters.filter import has_unsupported_rule
from dbterd.helpers import cli_messaging
Expand Down Expand Up @@ -57,6 +58,9 @@
if kwargs.get("dbt_auto_artifacts"):
self.dbt.get_artifacts_for_erd()
artifacts_dir = f"{dbt_project_dir}/target"
elif kwargs.get("dbt_cloud"):
artifacts_dir = f"{dbt_project_dir}/target"
DbtCloudArtifact(**kwargs).get(artifacts_dir=artifacts_dir)

Check warning on line 63 in dbterd/adapters/base.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/base.py#L62-L63

Added lines #L62 - L63 were not covered by tests
else:
unsupported, rule = has_unsupported_rule(
rules=select.extend(exclude) if exclude else select
Expand Down
101 changes: 101 additions & 0 deletions dbterd/adapters/dbt_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os
import json

import click
import requests

from dbterd.helpers import file
from dbterd.helpers.log import logger


class DbtCloudArtifact:
"""dbt Cloud Artifact class using
dbt CLoud Administrative API
https://docs.getdbt.com/docs/dbt-cloud-apis/admin-cloud-api.

And use Retrieve Run Artifact endpoint, for example, with v2 spec
https://docs.getdbt.com/dbt-cloud/api-v2#/operations/Retrieve%20Run%20Artifact
"""

def __init__(self, **kwargs) -> None:
"""Initialize the base attributes to interact with API service"""
self.host_url = kwargs.get("dbt_cloud_host_url")
self.service_token = kwargs.get("dbt_cloud_service_token")
self.account_id = kwargs.get("dbt_cloud_account_id")
self.run_id = kwargs.get("dbt_cloud_run_id")
self.api_version = kwargs.get("dbt_cloud_api_version")

Check warning on line 26 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L22-L26

Added lines #L22 - L26 were not covered by tests

@property
def request_headers(self) -> dict:
"""API Header"""
return {"Authorization": f"Token {self.service_token}"}

Check warning on line 31 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L31

Added line #L31 was not covered by tests

@property
def api_endpoint(self) -> dict:
"""Base API endpoint to a specific artifact object"""
return (

Check warning on line 36 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L36

Added line #L36 was not covered by tests
"https://{host_url}/api/{api_version}/"
"accounts/{account_id}/"
"runs/{run_id}/"
"artifacts/{{path}}"
).format(
host_url=self.host_url,
api_version=self.api_version,
account_id=self.account_id,
run_id=self.run_id,
)

@property
def manifest_api_endpoint(self) -> dict:
"""Full API endpoint to the `manifest.json` file"""
return self.api_endpoint.format(path="manifest.json")

Check warning on line 51 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L51

Added line #L51 was not covered by tests

@property
def catalog_api_endpoint(self) -> dict:
"""Full API endpoint to the `catalog.json` file"""
return self.api_endpoint.format(path="catalog.json")

Check warning on line 56 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L56

Added line #L56 was not covered by tests

def download_artifact(self, artifact: str, artifacts_dir: str) -> bool:
"""Request API to download the artifact file

Args:
artifact (str): The artifact name e.g. manifest or catalog

Returns:
bool: True is success, False if any errors
"""
artifact_api_endpoint = getattr(self, f"{artifact}_api_endpoint")
logger.info(f"Dowloading...[URL: {artifact_api_endpoint}]")
try:
r = requests.get(url=artifact_api_endpoint, headers=self.request_headers)
logger.info(f"Completed [status: {r.status_code}]")

Check warning on line 71 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L67-L71

Added lines #L67 - L71 were not covered by tests

if r.status_code != 200:
logger.error(f"Failed to retrieve artifacts [error: {vars(r)}]")
return False

Check warning on line 75 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L73-L75

Added lines #L73 - L75 were not covered by tests

file.write_json(

Check warning on line 77 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L77

Added line #L77 was not covered by tests
data=json.dumps(r.json(), indent=2),
path=f"{artifacts_dir}/{artifact}.json",
)
except click.BadParameter as e:
logger.error(f"Error occurred while downloading [error: {str(e)}]")
return False

Check warning on line 83 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L81-L83

Added lines #L81 - L83 were not covered by tests

return True

Check warning on line 85 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L85

Added line #L85 was not covered by tests

def get(self, artifacts_dir: str = None) -> bool:
"""Download `manifest.json` and `catalog.json` to the local dir

Args:
artifacts_dir (str, optional): Local dir where the artifacts get downloaded to. Default to CWD/target.

Returns:
bool: True is success, False if any errors
"""
_artifacts_dir = artifacts_dir or f"{os.getcwd()}/target"
r = self.download_artifact(artifact="manifest", artifacts_dir=_artifacts_dir)
if r:
r = self.download_artifact(artifact="catalog", artifacts_dir=_artifacts_dir)

Check warning on line 99 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L96-L99

Added lines #L96 - L99 were not covered by tests

return r

Check warning on line 101 in dbterd/adapters/dbt_cloud.py

View check run for this annotation

Codecov / codecov/patch

dbterd/adapters/dbt_cloud.py#L101

Added line #L101 was not covered by tests
59 changes: 59 additions & 0 deletions dbterd/cli/params.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import functools

import click
Expand Down Expand Up @@ -112,6 +113,64 @@ def common_params(func):
show_default=True,
type=click.STRING,
)
@click.option(
"--dbt-cloud",
help=(
"Flag to download dbt artifact files using dbt Cloud API. "
"This requires the additional parameters to be able to connection to dbt Cloud API"
),
is_flag=True,
default=False,
show_default=True,
)
@click.option(
"--dbt-cloud-host-url",
help=(
"Configure dbt Cloud's Host URL. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_HOST_URL) if not specified. "
"Sample dbt Cloud Run URL: "
"https://<HOST_URL>/deploy/<ACCOUNT_ID>/projects/irrelevant/runs/<RUN_ID>"
),
default=os.environ.get("DBTERD_DBT_CLOUD_HOST_URL", "cloud.getdbt.com"),
show_default=True,
)
@click.option(
"--dbt-cloud-account-id",
help=(
"Configure dbt Cloud's Account ID. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_ACCOUNT_ID) if not specified"
),
default=os.environ.get("DBTERD_DBT_CLOUD_ACCOUNT_ID"),
show_default=True,
)
@click.option(
"--dbt-cloud-run-id",
help=(
"Configure dbt Cloud's completed Run ID. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_RUN_ID) if not specified"
),
default=os.environ.get("DBTERD_DBT_CLOUD_RUN_ID"),
show_default=True,
)
@click.option(
"--dbt-cloud-service-token",
help=(
"Configure dbt Service Token (Permissions: Job Admin). "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_SERVICE_TOKEN) if not specified. "
"Visit https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens to see how to generate it. "
),
default=os.environ.get("DBTERD_DBT_CLOUD_SERVICE_TOKEN"),
show_default=True,
)
@click.option(
"--dbt-cloud-api-version",
help=(
"Configure dbt Cloud Administrative API version. "
"Try to get OS environment variable (DBTERD_DBT_CLOUD_API_VERSION) if not specified."
),
default=os.environ.get("DBTERD_DBT_CLOUD_API_VERSION", "v2"),
show_default=True,
)
@functools.wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs) # pragma: no cover
Expand Down
11 changes: 11 additions & 0 deletions dbterd/helpers/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,14 @@ def read_catalog(path: str, version: int = None):
parser_version = default_parser
parse_func = getattr(parser, parser_version)
return parse_func(catalog=_dict)


def write_json(data, path: str):
"""Persist json data to file

Args:
data (json): Json data
path (str): File path
"""
with open(path, "w") as file:
file.write(data)
14 changes: 14 additions & 0 deletions docs/nav/guide/cli-references.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,20 @@ Currently, it supports the following keys in the format:
dbterd run --entity-name-format table # with table name only
```

### dbterd run --dbt-cloud

Decide to download artifact files from dbt Cloud Job Run instead of compiling locally.

Check [Download artifacts from a Job Run](./dbt-cloud/download-artifact-from-a-job-run.md) for more details.

**Examples:**
=== "CLI"

```bash
dbterd run --dbt-cloud
dbterd run --dbt-cloud --select wildcard:*transaction*
```

## dbterd debug

Shows hidden configured values, which will help us to see what configs are passed into and how they are evaluated to be used.
Expand Down
76 changes: 76 additions & 0 deletions docs/nav/guide/dbt-cloud/download-artifact-from-a-job-run.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Download artifacts from a Job Run

This is a guideline on how to download `manifest.json` and `catalog.json` from a Job Run by using [dbt CLoud Administrative API](https://docs.getdbt.com/docs/dbt-cloud-apis/admin-cloud-api), under the [Retrieve Run Artifact](https://docs.getdbt.com/dbt-cloud/api-v2#/operations/Retrieve%20Run%20Artifact) endpoint. Therefore, we don't need to run `dbt docs generate` locally anymore.

In order to support dbt Cloud users, `dbterd` is now having multiple CLI options starting with `--dbt-cloud` to let us configure the connection to the complete dbt Cloud Job Run.

!!! note "Prerequisites"
- You have a dbt Cloud account with [Team and Enterprise plans](https://www.getdbt.com/pricing/) 💰
- You have a job or go create a new job with a single step 🏃

```bash
dbt docs generate
```
- Make sure that you have at least 1 successful run ✅

## 1. Prepare the environment variables

Behind the scene, the API Endpoint will look like: `https://{host_url}/api/{api_version}/accounts/{account_id}/runs/{run_id}/artifacts/{path}`.

And the dbt Cloud's Job Rub will have the URL constructed as `https://<host_url>/deploy/<account_id>/projects/irrelevant/runs/<run_id>`.

In the above:

| URL Part | Environment Variable | CLI Option | Description |
|-------------------|---------------------------------|---------------------------|---------------------------------------------------------------------------|
| `host_url` | `DBTERD_DBT_CLOUD_HOST_URL` | `--dbt-cloud-host-url` | Host URL, also known as [Access URL](https://docs.getdbt.com/docs/cloud/about-cloud/regions-ip-addresses) (Default to `cloud.getdbt.com`) |
| `account_id` | `DBTERD_DBT_CLOUD_ACCOUNT_ID` | `--dbt-cloud-account-id` | dbt Cloud Account ID |
| `run_id` | `DBTERD_DBT_CLOUD_RUN_ID` | `--dbt-cloud-run-id` | dbt Cloud successful job run ID |
| `api_version` | `DBTERD_DBT_CLOUD_API_VERSION` | `--dbt-cloud-api-version` | dbt Cloud API version (Default to `v2`) |
| `path` | `N/A` | `N/A` | Artifact relative file path. You don't need to care about this part as `dbterd` managed it already |

Besides, we need another one which is very important, the service token:

- Go to **Account settings** / **Service tokens**. Click _+ New token_
- Enter _Service token name_ e.g. "ST_dbterd"
- Click _Add_ and select `Job Admin` permission. Optionally, select the right project or all by default
- Click _Save_
- Copy token & Pass it to the Environment Variable (`DBTERD_DBT_CLOUD_SERVICE_TOKEN`) or the CLI Option (`--dbt-cloud-service-token`)

Finally, fill in `your_value` and execute the (Linux or Macos) command below:

```bash
export DBTERD_DBT_CLOUD_SERVICE_TOKEN=your_value
export DBTERD_DBT_CLOUD_HOST_URL=your_value
export DBTERD_DBT_CLOUD_ACCOUNT_ID=your_value
export DBTERD_DBT_CLOUD_RUN_ID=your_value
export DBTERD_DBT_CLOUD_RUN_ID=your_value
```

## 2. Genrate ERD file

We're going to use `--dbt-cloud` option to tell `dbterd` to use dbt Cloud API with all above variables.

The command will be looks like:

```bash
dbterd run -s <dbterd selection> --dbt-cloud
```

> NOTE: You can not use `--dbt` option together with `--dbt-cloud`

and then, here is the sample console log:

```log
dbterd - INFO - Run with dbterd==1.0.0 (main.py:54)
dbterd - INFO - Using dbt project dir at: C:\Sources\dbterd (base.py:46)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/2442752/artifacts/manifest.json] (dbt_cloud.py:68)
dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71)
dbterd - INFO - Dowloading...[URL: https://hidden/api/v2/accounts/hidden/runs/2442752/artifacts/catalog.json] (dbt_cloud.py:68)
dbterd - INFO - Completed [status: 200] (dbt_cloud.py:71)
dbterd - INFO - Using dbt artifact dir at: hidden (base.py:73)
dbterd - INFO - Collected 4 table(s) and 3 relationship(s) (test_relationship.py:59)
dbterd - INFO - C:\Sources\dbterd\target/output.dbml (base.py:170)
```

Voila! Happy ERD 🎉!
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ nav:
- Metadata:
- Ignore Tests: nav/metadata/ignore_in_erd.md
- Relationship Types: nav/metadata/relationship_type.md
- dbt Cloud:
- Download artifacts from a Job Run: nav/guide/dbt-cloud/download-artifact-from-a-job-run.md
- Development:
- Contribution Guideline: nav/development/contributing-guide.md
- Change Log: nav/development/CHANGELOG.md
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/adapters/test_dbt_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# from unittest import mock

# import click
# import pytest

# from dbterd.adapters.dbt_cloud import DbtCloudArtifact


class TestDbtCloudArtifact:
def test_download_artifact(self):
pass

def test_get(self):
pass
5 changes: 5 additions & 0 deletions tests/unit/helpers/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,8 @@ def test_read_catalog_error(self, mock_open_json, version):
with pytest.raises(ValueError):
file.read_catalog(path="path/to/catalog", version=version)
mock_open_json.assert_called_with("path/to/catalog/catalog.json")

@mock.patch("builtins.open")
def test_write_json(self, mock_open):
file.write_json(data={}, path="path/to/catalog/catalog.json")
mock_open.assert_called_with("path/to/catalog/catalog.json", "w")
Loading