Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for zarr-python 3.0 #15

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,16 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest tox
pip install tox
pip install -e .
- name: Test with tox
run: |
tox -p
PYTHON_VERSION=$(python --version | cut -d' ' -f2)

if [[ "$PYTHON_VERSION" == "3.9"* ]]; then
echo "Detected Python version: $PYTHON_VERSION"
echo "Skipping tests with xarray versions that require newer python"
tox -p --skip-env py-xarray202501
else
tox -p
fi
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
requirements = [
"click>=7.0.0",
"fsspec>=0.7.0",
"xarray>=0.15.0",
"zarr>=2.3.0,<3",
"xarray>=0.19.0",
"zarr>=2.3.0",
]

test_requirements = ["pytest"]
Expand Down
33 changes: 23 additions & 10 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
from zarrdump.core import dump, _open_with_xarray_or_zarr

from click.testing import CliRunner
import fsspec
import pytest
import numpy as np
import xarray as xr
import zarr


ZARR_MAJOR_VERSION = zarr.__version__.split(".")[0]


def test_version():
assert zarrdump.__version__ == "0.4.2"

Expand All @@ -31,7 +33,10 @@ def tmp_zarr_group(tmpdir):
def write_group_to_zarr(consolidated=False):
path = str(tmpdir.join("test.zarr"))
z = zarr.open_group(path)
arr = z.create_dataset("var1", shape=(3, 5))
if ZARR_MAJOR_VERSION >= "3":
arr = z.create_array("var1", shape=(3, 5), dtype=np.float32)
else:
arr = z.create_dataset("var1", shape=(3, 5), dtype=np.float32)
arr[:] = 1.0
if consolidated:
zarr.consolidate_metadata(path)
Expand All @@ -43,17 +48,15 @@ def write_group_to_zarr(consolidated=False):
@pytest.mark.parametrize("consolidated", [True, False])
def test__open_with_xarray_or_zarr_on_zarr_group(tmp_zarr_group, consolidated):
group, path = tmp_zarr_group(consolidated=consolidated)
m = fsspec.get_mapper(path)
opened_group, is_xarray_dataset = _open_with_xarray_or_zarr(m, consolidated)
opened_group, is_xarray_dataset = _open_with_xarray_or_zarr(path)
np.testing.assert_allclose(group["var1"], opened_group["var1"])
assert not is_xarray_dataset


@pytest.mark.parametrize("consolidated", [True, False])
def test__open_with_xarray_or_zarr_on_xarray_ds(tmp_xarray_ds, consolidated):
ds, path = tmp_xarray_ds(consolidated=consolidated)
m = fsspec.get_mapper(path)
opened_ds, is_xarray_dataset = _open_with_xarray_or_zarr(m, consolidated)
opened_ds, is_xarray_dataset = _open_with_xarray_or_zarr(path)
np.testing.assert_allclose(ds["var1"], opened_ds["var1"])
assert is_xarray_dataset

Expand All @@ -68,22 +71,33 @@ def test_dump_non_existent_url():
@pytest.mark.parametrize("options", [[], ["-v", "var1"]])
def test_dump_executes_on_zarr_group(tmp_zarr_group, options):
runner = CliRunner()
_, path = tmp_zarr_group()
_, path = tmp_zarr_group(consolidated=True)
result = runner.invoke(dump, [path] + options)
assert result.exit_code == 0
if "-v" in options:
assert "Array" in result.output
else:
assert "Group" in result.output


@pytest.mark.parametrize("options", [[], ["-v", "var1"], ["--info"]])
def test_dump_executes_on_xarray_dataset(tmp_xarray_ds, options):
runner = CliRunner()
_, path = tmp_xarray_ds()
_, path = tmp_xarray_ds(consolidated=True)
result = runner.invoke(dump, [path] + options)
assert result.exit_code == 0
if "-v" in options:
expected_content = "<xarray.DataArray"
elif "--info" in options:
expected_content = "xarray.Dataset"
else:
expected_content = "<xarray.Dataset>"
assert expected_content in result.output


def test_dump_disallowed_options(tmp_xarray_ds):
runner = CliRunner()
_, path = tmp_xarray_ds()
_, path = tmp_xarray_ds(consolidated=True)
result = runner.invoke(dump, [path, "-v", "var1", "-i"])
assert result.exit_code == 1
assert result.output == "Error: Cannot use both '-v' and '-i' options\n"
Expand All @@ -96,7 +110,6 @@ def test_dump_max_rows_default(tmp_xarray_ds):
assert len(result.output.split("\n")) > 30


@pytest.mark.skipif(xr.__version__ < "0.18.0", reason="need xarray v0.18.0 or higher")
def test_dump_max_rows_limited(tmp_xarray_ds):
runner = CliRunner()
_, path = tmp_xarray_ds(consolidated=True, n_vars=30)
Expand Down
14 changes: 8 additions & 6 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,28 @@
# and then run "tox" from this directory.

[tox]
envlist = py-xarray{16,19,21,202203,202206,202306,202312}
envlist = py-xarray{19,21,202206,202306,202312,202501}

[testenv]
deps =
click
pytest
fsspec
zarr
xarray16: xarray>=0.16.0,<0.17.0
xarray19: xarray>=0.19.0,<0.20.0
xarray21: xarray>=0.21.0,<0.22.0
xarray202203: xarray>=2022.03.0,<2022.04.0
xarray202206: xarray>=2022.06.0,<2022.07.0
xarray202306: xarray>=2023.06.0,<2023.07.0
xarray202312: xarray>=2023.12.0,<2024.01.0
xarray16: numpy<2
xarray202501: xarray==2025.01.1
xarray19: numpy<2
xarray21: numpy<2
xarray202203: numpy<2
xarray202206: numpy<2
xarray202306: numpy<2
xarray19: zarr<3
xarray21: zarr<3
xarray202206: zarr<3
xarray202306: zarr<3
xarray202312: zarr<3
xarray202501: zarr>=3
commands =
pytest
40 changes: 16 additions & 24 deletions zarrdump/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import xarray as xr
import zarr

ZARR_MAJOR_VERSION = zarr.__version__.split(".")[0]


@click.command()
@click.argument("url")
Expand All @@ -16,9 +18,7 @@ def dump(url: str, variable: str, max_rows: int, info: bool):
if not fs.exists(url):
raise click.ClickException(f"No file or directory at {url}")

m = fs.get_mapper(url)
consolidated = _metadata_is_consolidated(m)
object_, object_is_xarray = _open_with_xarray_or_zarr(m, consolidated)
object_, object_is_xarray = _open_with_xarray_or_zarr(url)

if variable is not None:
if info:
Expand All @@ -31,32 +31,24 @@ def dump(url: str, variable: str, max_rows: int, info: bool):
if object_is_xarray and info:
object_.info()
else:
try:
with xr.set_options(display_max_rows=max_rows):
print(object_)
except ValueError:
# xarray<v0.18.0 does not have display_max_rows option
with xr.set_options(display_max_rows=max_rows):
print(object_)


def _metadata_is_consolidated(m: fsspec.FSMap) -> bool:
try:
zarr.open_consolidated(m)
consolidated = True
except KeyError:
# group with un-consolidated metadata, or array
consolidated = False
return consolidated


def _open_with_xarray_or_zarr(
m: fsspec.FSMap, consolidated: bool
) -> Tuple[Union[xr.Dataset, zarr.hierarchy.Group, zarr.core.Array], bool]:
url: str,
) -> Tuple[Union[xr.Dataset, zarr.Group, zarr.Array], bool]:
if ZARR_MAJOR_VERSION >= "3":
exceptions = (ValueError,)
else:
exceptions = (KeyError, TypeError)

try:
result = xr.open_zarr(m, consolidated=consolidated)
result = xr.open_zarr(url)
is_xarray_dataset = True
except (KeyError, TypeError):
# xarray requires _ARRAY_DIMENSIONS attribute, assuming missing if KeyError
result = zarr.open_consolidated(m) if consolidated else zarr.open(m)
except exceptions:
# xarray cannot open dataset, fall back to using zarr directly
result = zarr.open(url)
is_xarray_dataset = False

return result, is_xarray_dataset
Loading