Skip to content

Commit

Permalink
[components] Add --rebuild-component-registry option, --skip-venv o…
Browse files Browse the repository at this point in the history
…ption, build cache on code loc construction (#26591)

## Summary & Motivation

Add a top-level `--rebuild-component-registry` option that clears the
cache and refetches the component registry for the current environment.
The reason this is called `--rebuild-component-registry` instead of
`--rebuild-cache` is:

- It is scoped to just the current environment, whereas the cache
encompasses multiple environments
- We may cache other info later

Also add `--skip-venv` option to `dg generate code-location` to avoid
automatic venv setup and cache population on construction. Mostly useful
for testing.

We also invoke this immediately after `uv sync` when constructing a new
code location, so that the cache is loaded.

## How I Tested These Changes

New unit tests + manual test of code location generation.
  • Loading branch information
smackesey authored Dec 20, 2024
1 parent a14cb87 commit baefba6
Show file tree
Hide file tree
Showing 8 changed files with 219 additions and 50 deletions.
10 changes: 8 additions & 2 deletions python_modules/libraries/dagster-dg/dagster_dg/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,15 @@ def __init__(self, root_path: Path, logging_enabled: bool):
self._root_path.mkdir(parents=True, exist_ok=True)
self._logging_enabled = logging_enabled

def clear(self) -> None:
def clear_key(self, key: Tuple[str, ...]) -> None:
path = self._get_path(key)
if path.exists():
path.unlink()
self.log(f"CACHE [clear-key]: {path}")

def clear_all(self) -> None:
shutil.rmtree(self._root_path)
self.log(f"CACHE [clear]: {self._root_path}")
self.log(f"CACHE [clear-all]: {self._root_path}")

def get(self, key: Tuple[str, ...]) -> Optional[str]:
path = self._get_path(key)
Expand Down
65 changes: 61 additions & 4 deletions python_modules/libraries/dagster-dg/dagster_dg/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
from pathlib import Path

import click
Expand All @@ -7,6 +8,14 @@
from dagster_dg.cli.info import info_cli
from dagster_dg.cli.list import list_cli
from dagster_dg.config import DgConfig, set_config_on_cli_context
from dagster_dg.context import (
DgContext,
ensure_uv_lock,
fetch_component_registry,
is_inside_code_location_directory,
make_cache_key,
resolve_code_location_root_directory,
)
from dagster_dg.utils import DgClickGroup
from dagster_dg.version import __version__

Expand Down Expand Up @@ -49,6 +58,15 @@ def create_dg_cli():
help="Clear the cache before running the command.",
default=False,
)
@click.option(
"--rebuild-component-registry",
is_flag=True,
help=(
"Recompute and cache the set of available component types for the current environment."
" Note that this also happens automatically whenever the cache is detected to be stale."
),
default=False,
)
@click.option(
"--cache-dir",
type=Path,
Expand All @@ -64,6 +82,7 @@ def group(
disable_cache: bool,
cache_dir: Path,
clear_cache: bool,
rebuild_component_registry: bool,
):
"""CLI tools for working with Dagster components."""
context.ensure_object(dict)
Expand All @@ -73,19 +92,57 @@ def group(
disable_cache=disable_cache,
cache_dir=cache_dir,
)
if clear_cache:
DgCache.from_config(config).clear()
set_config_on_cli_context(context, config)

if clear_cache and rebuild_component_registry:
click.echo(
click.style(
"Cannot specify both --clear-cache and --rebuild-component-registry.", fg="red"
)
)
sys.exit(1)
elif clear_cache:
DgCache.from_config(config).clear_all()
if context.invoked_subcommand is None:
context.exit(0)
elif rebuild_component_registry:
if context.invoked_subcommand is not None:
click.echo(
click.style(
"Cannot specify --rebuild-component-registry with a subcommand.", fg="red"
)
)
sys.exit(1)
_rebuild_component_registry(context)
elif context.invoked_subcommand is None:
click.echo(context.get_help())
context.exit(0)

set_config_on_cli_context(context, config)

return group


def _rebuild_component_registry(cli_context: click.Context):
dg_context = DgContext.from_cli_context(cli_context)
if not is_inside_code_location_directory(Path.cwd()):
click.echo(
click.style(
"This command must be run inside a Dagster code location directory.", fg="red"
)
)
sys.exit(1)
if not dg_context.cache:
click.echo(
click.style("Cache is disabled. This command cannot be run without a cache.", fg="red")
)
sys.exit(1)
root_path = resolve_code_location_root_directory(Path.cwd())
ensure_uv_lock(root_path)
key = make_cache_key(root_path, "component_registry_data")
dg_context.cache.clear_key(key)
# This will trigger a rebuild of the component registry
fetch_component_registry(Path.cwd(), dg_context)


ENV_PREFIX = "DAGSTER_DG"
cli = create_dg_cli()

Expand Down
10 changes: 8 additions & 2 deletions python_modules/libraries/dagster-dg/dagster_dg/cli/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,15 @@ def generate_deployment_command(path: Path) -> None:
" the location of the local Dagster clone will be read from the `DAGSTER_GIT_REPO_DIR` environment variable."
),
)
@click.option(
"--skip-venv",
is_flag=True,
default=False,
help="Do not create a virtual environment for the code location.",
)
@click.pass_context
def generate_code_location_command(
cli_context: click.Context, name: str, use_editable_dagster: Optional[str]
cli_context: click.Context, name: str, use_editable_dagster: Optional[str], skip_venv: bool
) -> None:
"""Generate a Dagster code location file structure and a uv-managed virtual environment scoped
to the code location.
Expand Down Expand Up @@ -117,7 +123,7 @@ def generate_code_location_command(
else:
editable_dagster_root = None

generate_code_location(code_location_path, editable_dagster_root)
generate_code_location(code_location_path, dg_context, editable_dagster_root, skip_venv)


@generate_cli.command(name="component-type", cls=DgClickCommand)
Expand Down
52 changes: 33 additions & 19 deletions python_modules/libraries/dagster-dg/dagster_dg/context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import hashlib
import json
import os
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Final, Iterable, Optional, Tuple
Expand All @@ -15,8 +16,10 @@
from dagster_dg.error import DgError
from dagster_dg.utils import (
execute_code_location_command,
get_uv_command_env,
hash_directory_metadata,
hash_file_metadata,
pushd,
)


Expand All @@ -43,13 +46,13 @@ def _is_deployment_root_directory(path: Path) -> bool:

def is_inside_code_location_directory(path: Path) -> bool:
try:
_resolve_code_location_root_directory(path)
resolve_code_location_root_directory(path)
return True
except DgError:
return False


def _resolve_code_location_root_directory(path: Path) -> Path:
def resolve_code_location_root_directory(path: Path) -> Path:
current_path = path.absolute()
while not _is_code_location_root_directory(current_path):
current_path = current_path.parent
Expand Down Expand Up @@ -134,6 +137,31 @@ def make_cache_key(code_location_path: Path, data_type: CachableDataType) -> Tup
return ("_".join(path_parts), env_hash, data_type)


def ensure_uv_lock(root_path: Path) -> None:
with pushd(root_path):
if not (root_path / "uv.lock").exists():
subprocess.run(["uv", "sync"], check=True, env=get_uv_command_env())


def fetch_component_registry(path: Path, dg_context: DgContext) -> RemoteComponentRegistry:
root_path = resolve_code_location_root_directory(path)

cache = dg_context.cache
if cache:
cache_key = make_cache_key(root_path, "component_registry_data")

raw_registry_data = cache.get(cache_key) if cache else None
if not raw_registry_data:
raw_registry_data = execute_code_location_command(
root_path, ["list", "component-types"], dg_context
)
if cache:
cache.set(cache_key, raw_registry_data)

registry_data = json.loads(raw_registry_data)
return RemoteComponentRegistry.from_dict(registry_data)


@dataclass
class CodeLocationDirectoryContext:
"""Class encapsulating contextual information about a components code location directory.
Expand All @@ -155,23 +183,9 @@ class CodeLocationDirectoryContext:

@classmethod
def from_path(cls, path: Path, dg_context: DgContext) -> Self:
root_path = _resolve_code_location_root_directory(path)

cache = dg_context.cache
if cache:
cache_key = make_cache_key(root_path, "component_registry_data")

raw_registry_data = cache.get(cache_key) if cache else None
if not raw_registry_data:
raw_registry_data = execute_code_location_command(
root_path, ["list", "component-types"], dg_context
)
if cache:
cache.set(cache_key, raw_registry_data)

registry_data = json.loads(raw_registry_data)
component_registry = RemoteComponentRegistry.from_dict(registry_data)

root_path = resolve_code_location_root_directory(path)
ensure_uv_lock(root_path)
component_registry = fetch_component_registry(path, dg_context)
return cls(
root_path=root_path,
name=path.name,
Expand Down
26 changes: 15 additions & 11 deletions python_modules/libraries/dagster-dg/dagster_dg/generate.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
import json
import os
import subprocess
import textwrap
from pathlib import Path
from typing import Any, Mapping, Optional

import click

from dagster_dg.context import CodeLocationDirectoryContext, DgContext
from dagster_dg.utils import (
camelcase,
execute_code_location_command,
generate_subtree,
get_uv_command_env,
pushd,
from dagster_dg.context import (
CodeLocationDirectoryContext,
DgContext,
ensure_uv_lock,
fetch_component_registry,
)
from dagster_dg.utils import camelcase, execute_code_location_command, generate_subtree

# ########################
# ##### DEPLOYMENT
Expand Down Expand Up @@ -89,7 +87,12 @@ def get_pyproject_toml_uv_sources(editable_dagster_root: str) -> str:
""")


def generate_code_location(path: Path, editable_dagster_root: Optional[str] = None) -> None:
def generate_code_location(
path: Path,
dg_context: DgContext,
editable_dagster_root: Optional[str] = None,
skip_venv: bool = False,
) -> None:
click.echo(f"Creating a Dagster code location at {path}.")

dependencies = get_pyproject_toml_dependencies(use_editable_dagster=bool(editable_dagster_root))
Expand All @@ -112,8 +115,9 @@ def generate_code_location(path: Path, editable_dagster_root: Optional[str] = No
)

# Build the venv
with pushd(path):
subprocess.run(["uv", "sync"], check=True, env=get_uv_command_env())
if not skip_venv:
ensure_uv_lock(path)
fetch_component_registry(path, dg_context) # Populate the cache


# ########################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest
import tomli
from dagster_dg.context import CodeLocationDirectoryContext, DgContext
from dagster_dg.utils import discover_git_root, ensure_dagster_dg_tests_import
from dagster_dg.utils import discover_git_root, ensure_dagster_dg_tests_import, pushd

ensure_dagster_dg_tests_import()

Expand Down Expand Up @@ -40,7 +40,12 @@ def test_generate_deployment_command_already_exists_fails() -> None:


def test_generate_code_location_inside_deployment_success() -> None:
with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner):
# Don't use the test component lib because it is not present in published dagster-components,
# which this test is currently accessing since we are not doing an editable install.
with (
ProxyRunner.test(use_test_component_lib=False) as runner,
isolated_example_deployment_foo(runner),
):
result = runner.invoke("generate", "code-location", "bar")
assert_runner_result(result)
assert Path("code_locations/bar").exists()
Expand All @@ -60,9 +65,16 @@ def test_generate_code_location_inside_deployment_success() -> None:
# No tool.uv.sources added without --use-editable-dagster
assert "uv" not in toml["tool"]

# Check cache was populated
with pushd("code_locations/bar"):
result = runner.invoke("--verbose", "list", "component-types")
assert "CACHE [hit]" in result.output


def test_generate_code_location_outside_deployment_success() -> None:
with ProxyRunner.test() as runner, runner.isolated_filesystem():
# Don't use the test component lib because it is not present in published dagster-components,
# which this test is currently accessing since we are not doing an editable install.
with ProxyRunner.test(use_test_component_lib=False) as runner, runner.isolated_filesystem():
result = runner.invoke("generate", "code-location", "bar")
assert_runner_result(result)
assert Path("bar").exists()
Expand Down Expand Up @@ -110,6 +122,24 @@ def test_generate_code_location_editable_dagster_success(mode: str, monkeypatch)
}


def test_generate_code_location_skip_venv_success() -> None:
# Don't use the test component lib because it is not present in published dagster-components,
# which this test is currently accessing since we are not doing an editable install.
with ProxyRunner.test() as runner, runner.isolated_filesystem():
result = runner.invoke("generate", "code-location", "--skip-venv", "bar")
assert_runner_result(result)
assert Path("bar").exists()
assert Path("bar/bar").exists()
assert Path("bar/bar/lib").exists()
assert Path("bar/bar/components").exists()
assert Path("bar/bar_tests").exists()
assert Path("bar/pyproject.toml").exists()

# Check venv created
assert not Path("bar/.venv").exists()
assert not Path("bar/uv.lock").exists()


def test_generate_code_location_editable_dagster_no_env_var_no_value_fails(monkeypatch) -> None:
monkeypatch.setenv("DAGSTER_GIT_REPO_DIR", "")
with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner):
Expand All @@ -120,9 +150,9 @@ def test_generate_code_location_editable_dagster_no_env_var_no_value_fails(monke

def test_generate_code_location_already_exists_fails() -> None:
with ProxyRunner.test() as runner, isolated_example_deployment_foo(runner):
result = runner.invoke("generate", "code-location", "bar")
result = runner.invoke("generate", "code-location", "bar", "--skip-venv")
assert_runner_result(result)
result = runner.invoke("generate", "code-location", "bar")
result = runner.invoke("generate", "code-location", "bar", "--skip-venv")
assert_runner_result(result, exit_0=False)
assert "already exists" in result.output

Expand Down
Loading

0 comments on commit baefba6

Please sign in to comment.