Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: OME-Zarr format version conversion #121

Merged
merged 5 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/python.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,24 @@ pip install "ngff-zarr[tensorstore]"
nz.to_ngff_zarr('cthead1.ome.zarr', multiscales, use_tensorstore=True)
```

## Convert OME-Zarr versions

To convert from OME-Zarr version 0.4, which uses the Zarr Format Specification
2, to 0.5, which uses the Zarr Format Specification 3, or vice version, specify
the desired version when writing.

```python
# Convert from 0.4 to 0.5
multiscales = from_ngff_zarr('cthead1.ome.zarr')
to_ngff_zarr('cthead1_zarr3.ome.zarr', multiscales, version='0.5')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️ cc: @will-moore @normanrz regarding the implementation conversation for release.

```

```python
# Convert from 0.5 to 0.4
multiscales = from_ngff_zarr('cthead1.ome.zarr')
to_ngff_zarr('cthead1_zarr2.ome.zarr', multiscales, version='0.4')
```

[dataclass]: https://docs.python.org/3/library/dataclasses.html
[dataclasses]: https://docs.python.org/3/library/dataclasses.html
[Dask arrays]: https://docs.dask.org/en/stable/array.html
Expand Down
6 changes: 6 additions & 0 deletions ngff_zarr/from_ngff_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
from typing import Union, Optional
import packaging.version
import sys

import dask.array
import zarr
Expand Down Expand Up @@ -91,6 +92,11 @@ def from_ngff_zarr(
datasets = []
for dataset in metadata["datasets"]:
data = dask.array.from_zarr(store, component=dataset["path"])
# Convert endianness to native if needed
if (sys.byteorder == "little" and data.dtype.byteorder == ">") or (
sys.byteorder == "big" and data.dtype.byteorder == "<"
):
data = data.astype(data.dtype.newbyteorder())

scale = {d: 1.0 for d in dims}
translation = {d: 0.0 for d in dims}
Expand Down
22 changes: 20 additions & 2 deletions ngff_zarr/to_ngff_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import zarr
import zarr.storage
from ._zarr_open_array import open_array
from .v04.zarr_metadata import Metadata as Metadata_v04
from .v05.zarr_metadata import Metadata as Metadata_v05

# Zarr Python 3
if hasattr(zarr.storage, "StoreLike"):
Expand Down Expand Up @@ -182,7 +184,23 @@ def to_ngff_zarr(
if version != "0.4" and version != "0.5":
raise ValueError(f"Unsupported version: {version}")

metadata_dict = asdict(multiscales.metadata)
metadata = multiscales.metadata
if version == "0.4" and isinstance(metadata, Metadata_v05):
metadata = Metadata_v04(
axes=metadata.axes,
datasets=metadata.datasets,
coordinateTransformations=metadata.coordinateTransformations,
name=metadata.name,
)
if version == "0.5" and isinstance(metadata, Metadata_v04):
metadata = Metadata_v05(
axes=metadata.axes,
datasets=metadata.datasets,
coordinateTransformations=metadata.coordinateTransformations,
name=metadata.name,
)

metadata_dict = asdict(metadata)
metadata_dict = _pop_metadata_optionals(metadata_dict)
metadata_dict["@type"] = "ngff:Image"
zarr_format = 2 if version == "0.4" else 3
Expand Down Expand Up @@ -224,7 +242,7 @@ def to_ngff_zarr(
progress.update_multiscales_task_completed(index + 1)
image = next_image
arr = image.data
path = multiscales.metadata.datasets[index].path
path = metadata.datasets[index].path
parent = str(PurePosixPath(path).parent)
if parent not in (".", "/"):
array_dims_group = root.create_group(parent)
Expand Down
6 changes: 6 additions & 0 deletions ngff_zarr/validate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict
from pathlib import Path
import json
from packaging import version as packaging_version

from importlib_resources import files as file_resources

Expand Down Expand Up @@ -40,5 +41,10 @@ def validate(
registry = Registry().with_resource(
NGFF_URI, resource=Resource.from_contents(schema)
)
if packaging_version.parse(version) >= packaging_version.parse("0.5"):
version_schema = load_schema(version=version, model="_version")
registry = registry.with_resource(
NGFF_URI, resource=Resource.from_contents(version_schema)
)
validator = Draft202012Validator(schema, registry=registry)
validator.validate(ngff_dict)
82 changes: 41 additions & 41 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions test/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from zarr.storage import MemoryStore
from deepdiff import DeepDiff

test_data_ipfs_cid = "bafybeiccqqioflsdnpna3kewhskyjcitqdk6n3yrzhnhj5qzpjk5edc2be"
test_data_sha256 = "4921b0e38b09ea480d377a89f1b4074f8e783b5703a651c60ef1e495c877f716"
test_data_ipfs_cid = "bafybeib2s7ls6yscm2uqxby5vbhbfsyxn3ev7soewi3hji4uiki7v6cbiy"
test_data_sha256 = "58c0219f194cd976acee1ebd19ea78b03aada3f96a54302c8fb8515a349d9613"

test_dir = Path(__file__).resolve().parent
extract_dir = "data"
Expand Down
Loading
Loading