From 5d05dbbf9d74f0588046888c454b0a6538c6bd22 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 5 Dec 2024 10:32:08 -0800 Subject: [PATCH 1/2] =?UTF-8?q?Use=20jsonschema=20=E2=89=A54.18.0=20and=20?= =?UTF-8?q?new=20referencing=20library?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In v4.18.0, jsonschema.RefResolver was deprecated in favor of the new referencing library.¹ The intro² and API³ docs were helpful in determining the necessary changes. I've tested that our new usage is not backwards compatible with v4.17.3 and thus updated the minimum requirement to v4.18.0. I chose v0.29.1 as the minimum supported version of referencing because that was the version released alongside jsonschema v4.18.0. The default behavior no longer tries to access the network, so I've reworded the retrieval function comment and error message. Local reference mismatches are now a "PointerToNowhere" error instead of an "Unresolvable JSON pointer" error. It shows the entire schema JSON in the output which can seem unnecessarily verbose, but I think it's fine since this is only intended to show on internal errors with the schema. ¹ https://github.com/python-jsonschema/jsonschema/blob/93e0caa5752947ec77333da81a634afe41a022ed/CHANGELOG.rst#v4180 ² https://python-jsonschema.readthedocs.io/en/stable/referencing/#introduction-to-the-referencing-api ³ https://referencing.readthedocs.io/en/stable/api/#referencing.Registry.with_contents --- augur/validate.py | 29 ++++++++++++++++------------- setup.py | 3 ++- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/augur/validate.py b/augur/validate.py index 364a5d785..da758f2b5 100644 --- a/augur/validate.py +++ b/augur/validate.py @@ -9,6 +9,7 @@ import jsonschema.exceptions import re from itertools import groupby +from referencing import Registry from textwrap import indent from typing import Iterable, Union from augur.data import as_file @@ -48,22 +49,24 @@ def load_json_schema(path, refs=None): for k, v in refs.items(): with as_file(v) as file, open_file(file, "r") as fh: schema_store[k] = json.load(fh) - resolver = jsonschema.RefResolver.from_schema(schema,store=schema_store) - schema_validator = Validator(schema, resolver=resolver) + + # Create a dummy retrieval function to handle URIs not present in + # schema_store. This often indicates a typo (the $ref doesn't match the + # key of the schema_store) or we forgot to add a local mapping for a new + # $ref. + def retrieve(uri): + # Take advantage of the fact that BaseException is not handled by + # Registry.get_or_retrieve. This means the custom error message is + # printed instead of the less helpful default: + # jsonschema.exceptions._WrappedReferencingError: Unresolvable: https://… + raise BaseException(f"The schema used for validation could not resolve a local file for {uri!r}. " + + "Please check the schema used and update the appropriate schema_store as needed." ) + + registry = Registry(retrieve=retrieve).with_contents(schema_store.items()) + schema_validator = Validator(schema, registry=registry) else: schema_validator = Validator(schema) - # By default $ref URLs which we don't define in a schema_store are fetched - # by jsonschema. This often indicates a typo (the $ref doesn't match the key - # of the schema_store) or we forgot to add a local mapping for a new $ref. - # Either way, Augur should not be accessing the network. - def resolve_remote(url): - # The exception type is not important as jsonschema will catch & re-raise as a RefResolutionError - raise Exception(f"The schema used for validation attempted to fetch the remote URL {url!r}. " + - "Augur should resolve schema references to local files, please check the schema used " + - "and update the appropriate schema_store as needed." ) - schema_validator.resolver.resolve_remote = resolve_remote - return schema_validator diff --git a/setup.py b/setup.py index 81aad167d..57b6a5f1c 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ "cvxopt >=1.1.9, ==1.*", "importlib_resources >=5.3.0; python_version < '3.11'", "isodate ==0.6.*", - "jsonschema >=3.0.0, ==3.*", + "jsonschema >=4.18.0, ==4.*", "networkx >= 2.5, <4", "numpy ==1.*", "packaging >=19.2", @@ -65,6 +65,7 @@ "phylo-treetime >=0.11.2, <0.12", "pyfastx >=1.0.0, <3.0", "python_calamine >=0.2.0", + "referencing >=0.29.1, <1.0", "scipy ==1.*", "xopen[zstd] >=1.7.0, <3" # TODO: Deprecated, remove v1 support around November 2024 ], From 901fcc82dd46fd6398c7c08eec221720b642a50f Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:36:07 -0800 Subject: [PATCH 2/2] Update changelog --- CHANGES.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index e8f0d0aec..fe85ebddf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,15 @@ ## __NEXT__ +### Major Changes + +- Drop support for older versions of jsonschema (<4.18.0). [#1691] (@victorlin) + +### Bug fixes + +- export: validation will no longer crash with `KeyError: 'tree'` when newer versions of jsonschema (≥4.18.0) are installed. [#1691] (@victorlin) + +[#1691]: https://github.com/nextstrain/augur/pull/1691 ## 26.2.0 (20 November 2024)