diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b00d8238a..c2131a5a1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,7 +20,6 @@ env: WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }} BEAKER_WORKSPACE: ai2/tango-testing - BEAKER_DEFAULT_CLUSTER: ai2/allennlp-cirrascale BEAKER_IMAGE: petew/tango-testing GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -288,7 +287,7 @@ jobs: image: beaker: ${{ env.BEAKER_IMAGE }} context: - cluster: ${{ env.BEAKER_DEFAULT_CLUSTER }} + preemptible: true resources: gpuCount: 2 envVars: diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b842499e..aa674066c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fixed a bunch of dependencies +- Upgraded to new version of wandb ## [v1.3.2](https://github.com/allenai/tango/releases/tag/v1.3.2) - 2023-10-27 diff --git a/docs/source/first_steps.md b/docs/source/first_steps.md index 16989205c..cb7d34200 100644 --- a/docs/source/first_steps.md +++ b/docs/source/first_steps.md @@ -247,7 +247,7 @@ Computing...: 100%|##########| 100/100 [00:05<00:00, 18.99it/s] ✓ The output for "add_numbers" is in workspace/runs/live-tarpon/add_numbers ``` -The last line in the output tells us where we can find the result of our "add_numbers" step. `live-parpon` is +The last line in the output tells us where we can find the result of our "add_numbers" step. `live-tarpon` is the name of the run. Run names are randomly generated and may be different on your machine. `add_numbers` is the name of the step in your config. The whole path is a symlink to a directory, which contains (among other things) a file `data.json`: diff --git a/pyproject.toml b/pyproject.toml index 5504b1260..adfa6ca25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,7 @@ flax = [ "tensorflow-cpu>=2.9.1" ] wandb = [ - "wandb>=0.12,<0.14.3", + "wandb>=0.16", "retry" ] beaker = [ diff --git a/tango/integrations/transformers/__init__.py b/tango/integrations/transformers/__init__.py index 950de3680..817c97f7a 100644 --- a/tango/integrations/transformers/__init__.py +++ b/tango/integrations/transformers/__init__.py @@ -93,6 +93,7 @@ transformers::constant transformers::constant_with_warmup transformers::cosine + transformers::cosine_with_min_lr transformers::cosine_with_restarts transformers::inverse_sqrt transformers::linear diff --git a/tango/integrations/wandb/step_cache.py b/tango/integrations/wandb/step_cache.py index 35851b2c1..56ea8b84c 100644 --- a/tango/integrations/wandb/step_cache.py +++ b/tango/integrations/wandb/step_cache.py @@ -72,7 +72,7 @@ def _step_artifact_name(self, step: Union[Step, StepInfo]) -> str: def _step_result_remote( # type: ignore self, step: Union[Step, StepInfo] - ) -> Optional[wandb.apis.public.Artifact]: + ) -> Optional[wandb.Artifact]: artifact_kind = (step.metadata or {}).get("artifact_kind", ArtifactKind.STEP_RESULT.value) try: return self.wandb_client.artifact( @@ -88,9 +88,7 @@ def _step_result_remote( # type: ignore def create_step_result_artifact(self, step: Step, objects_dir: Optional[PathOrStr] = None): self._upload_step_remote(step, objects_dir) - def get_step_result_artifact( - self, step: Union[Step, StepInfo] - ) -> Optional[wandb.apis.public.Artifact]: + def get_step_result_artifact(self, step: Union[Step, StepInfo]) -> Optional[wandb.Artifact]: artifact_kind = (step.metadata or {}).get("artifact_kind", ArtifactKind.STEP_RESULT.value) try: return self.wandb_client.artifact( @@ -144,7 +142,7 @@ def use_step_result_artifact(self, step: Union[Step, StepInfo]) -> None: def _download_step_remote(self, step_result, target_dir: PathOrStr): try: - step_result.download(root=target_dir, recursive=True) + step_result.download(root=target_dir) except (WandbError, ValueError): raise RemoteNotFoundError() diff --git a/tango/integrations/wandb/util.py b/tango/integrations/wandb/util.py index 29d5ae644..7f5b3d211 100644 --- a/tango/integrations/wandb/util.py +++ b/tango/integrations/wandb/util.py @@ -1,4 +1,5 @@ import os +import re import warnings from enum import Enum @@ -13,7 +14,17 @@ def is_missing_artifact_error(err: WandbError): Check if a specific W&B error is caused by a 404 on the artifact we're looking for. """ # This is brittle, but at least we have a test for it. - return "does not contain artifact" in err.message + + # This is a workaround for a bug in the wandb API + if err.message == "'NoneType' object has no attribute 'get'": + return True + + if re.search(r"^artifact '.*' not found in '.*'$", err.message): + return True + + return ("does not contain artifact" in err.message) or ( + "Unable to fetch artifact with name" in err.message + ) def check_environment():