From ddb7b2724d1c24f37acae579b49b789338681944 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sat, 28 Feb 2026 10:42:15 -0500 Subject: [PATCH 1/7] implement store server and node server --- pyproject.toml | 5 + src/zarr/core/keys.py | 181 +++++++++++++++++++++++ src/zarr/experimental/serve.py | 205 ++++++++++++++++++++++++++ tests/test_experimental/test_serve.py | 189 ++++++++++++++++++++++++ 4 files changed, 580 insertions(+) create mode 100644 src/zarr/core/keys.py create mode 100644 src/zarr/experimental/serve.py create mode 100644 tests/test_experimental/test_serve.py diff --git a/pyproject.toml b/pyproject.toml index 068caa1f0d..8a0cd004e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,11 @@ gpu = [ "cupy-cuda12x", ] cli = ["typer"] +server = [ + "starlette", + "httpx", + "uvicorn", +] # Testing extras test = [ "coverage>=7.10", diff --git a/src/zarr/core/keys.py b/src/zarr/core/keys.py new file mode 100644 index 0000000000..0d18887598 --- /dev/null +++ b/src/zarr/core/keys.py @@ -0,0 +1,181 @@ +"""Utilities for determining the set of valid store keys for zarr nodes. + +A zarr node (array or group) implicitly defines a subset of keys in the +underlying store. For an **array** the valid keys are: + +* metadata documents (``zarr.json`` for v3, ``.zarray`` / ``.zattrs`` for v2) +* chunk (or shard) keys whose decoded coordinates fall within the storage grid + +For a **group** the valid keys are: + +* its own metadata documents +* any path ``/`` where ```` is a direct member and + ```` is recursively valid for that child +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON + +if TYPE_CHECKING: + from zarr.core.array import Array + from zarr.core.group import Group + +_METADATA_KEYS_V3 = frozenset({ZARR_JSON}) +_METADATA_KEYS_V2 = frozenset({ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON}) + + +def metadata_keys(zarr_format: int) -> frozenset[str]: + """Return the set of metadata key basenames for a given zarr format version. + + Parameters + ---------- + zarr_format : int + The zarr format version (2 or 3). + + Returns + ------- + frozenset of str + """ + if zarr_format == 3: + return _METADATA_KEYS_V3 + return _METADATA_KEYS_V2 + + +def decode_chunk_key(array: Array, key: str) -> tuple[int, ...] | None: + """Try to decode *key* into chunk coordinates for *array*. + + Parameters + ---------- + array : Array + The array whose chunk key encoding should be used. + key : str + The candidate chunk key string. + + Returns + ------- + tuple of int, or None + The decoded coordinates, or ``None`` if *key* is not a valid chunk key. + """ + from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding + from zarr.core.metadata.v2 import ArrayV2Metadata + + try: + if isinstance(array.metadata, ArrayV2Metadata): + parts = key.split(array.metadata.dimension_separator) + return tuple(int(p) for p in parts) + + encoding = array.metadata.chunk_key_encoding + if isinstance(encoding, DefaultChunkKeyEncoding): + # Default v3 keys have the form "c012". + prefix = "c" + encoding.separator + if key == "c": + return () + if not key.startswith(prefix): + return None + return tuple(int(p) for p in key[len(prefix) :].split(encoding.separator)) + if isinstance(encoding, V2ChunkKeyEncoding): + return tuple(int(p) for p in key.split(encoding.separator)) + + # Unknown encoding — fall back to the encoding's own decode. + return encoding.decode_chunk_key(key) + except (ValueError, TypeError, NotImplementedError): + return None + + +def is_valid_chunk_key(array: Array, key: str) -> bool: + """Check whether *key* is a valid chunk key for *array*. + + Tries to decode the key and checks that the resulting coordinates fall + within the storage grid (shard grid if sharding is used, chunk grid + otherwise). + + Parameters + ---------- + array : Array + The array to validate against. + key : str + The candidate chunk key string. + + Returns + ------- + bool + """ + coords = decode_chunk_key(array, key) + if coords is None: + return False + grid = array._shard_grid_shape + if len(coords) != len(grid): + return False + return all(0 <= c < g for c, g in zip(coords, grid, strict=True)) + + +def is_valid_array_key(array: Array, key: str) -> bool: + """Check whether *key* is a valid store key for *array*. + + Valid keys are metadata documents and chunk keys. + + Parameters + ---------- + array : Array + The array to validate against. + key : str + The candidate key, relative to the array's root. + + Returns + ------- + bool + """ + if key in metadata_keys(array.metadata.zarr_format): + return True + return is_valid_chunk_key(array, key) + + +def is_valid_node_key(node: Array | Group, key: str) -> bool: + """Check whether *key* is a valid store key relative to *node*. + + For an ``Array``, valid keys are metadata documents and chunk keys. + + For a ``Group``, valid keys are the group's own metadata documents, or + a path of the form ``/`` where ```` is a direct + member and ```` is recursively valid for that child. + + Parameters + ---------- + node : Array or Group + The zarr node to validate against. + key : str + The candidate key, relative to the node's root. + + Returns + ------- + bool + """ + from zarr.core.array import Array + from zarr.core.group import Group + + if isinstance(node, Array): + return is_valid_array_key(node, key) + + # Group + if key in metadata_keys(node.metadata.zarr_format): + return True + + # Try to match the first path component against a child member. + if "/" in key: + child_name, remainder = key.split("/", 1) + else: + # A bare name with no slash can't be a valid group-level key — + # groups contain children (which have subkeys), not bare keys. + return False + + try: + child = node[child_name] + except KeyError: + return False + + if isinstance(child, (Array, Group)): + return is_valid_node_key(child, remainder) + return False diff --git a/src/zarr/experimental/serve.py b/src/zarr/experimental/serve.py new file mode 100644 index 0000000000..ec9c0aca14 --- /dev/null +++ b/src/zarr/experimental/serve.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal, TypedDict + +from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest +from zarr.core.buffer import cpu +from zarr.core.keys import is_valid_node_key + +if TYPE_CHECKING: + from starlette.requests import Request + from starlette.responses import Response + + from zarr.abc.store import ByteRequest, Store + from zarr.core.array import Array + from zarr.core.group import Group + + +class CorsOptions(TypedDict): + allow_origins: list[str] + allow_methods: list[str] + + +HTTPMethod = Literal["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"] + + +def _parse_range_header(range_header: str) -> ByteRequest | None: + """Parse an HTTP Range header into a ByteRequest. + + Parameters + ---------- + range_header : str + The value of the Range header, e.g. ``"bytes=0-99"`` or ``"bytes=-100"``. + + Returns + ------- + ByteRequest or None + A ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``, + or ``None`` if the header cannot be parsed. + """ + if not range_header.startswith("bytes="): + return None + range_spec = range_header[len("bytes=") :] + if range_spec.startswith("-"): + # suffix request: bytes=-N + suffix = int(range_spec[1:]) + return SuffixByteRequest(suffix=suffix) + parts = range_spec.split("-", 1) + if len(parts) != 2: + return None + start_str, end_str = parts + start = int(start_str) + if end_str == "": + # offset request: bytes=N- + return OffsetByteRequest(offset=start) + # range request: bytes=N-M (HTTP end is inclusive, ByteRequest end is exclusive) + end = int(end_str) + 1 + return RangeByteRequest(start=start, end=end) + + +async def _get_response(store: Store, path: str, byte_range: ByteRequest | None = None) -> Response: + """Fetch a key from the store and return an HTTP response.""" + from starlette.responses import Response + + proto = cpu.buffer_prototype + content_type = "application/json" if path.endswith("zarr.json") else "application/octet-stream" + + buf = await store.get(path, proto, byte_range=byte_range) + if buf is None: + return Response(status_code=404) + + status_code = 206 if byte_range is not None else 200 + return Response(content=buf.to_bytes(), status_code=status_code, media_type=content_type) + + +async def _handle_request(request: Request) -> Response: + """Handle a request, optionally filtering by node validity.""" + from starlette.responses import Response + + store: Store = request.app.state.store + node: Array | Group | None = request.app.state.node + prefix: str = request.app.state.prefix + path = request.path_params.get("path", "") + + # If serving a node, validate the key before touching the store. + if node is not None and not is_valid_node_key(node, path): + return Response(status_code=404) + + # Resolve the full store key by prepending the node's prefix. + store_key = f"{prefix}/{path}" if prefix else path + + if request.method == "PUT": + body = await request.body() + buf = cpu.buffer_prototype.buffer.from_bytes(body) + await store.set(store_key, buf) + return Response(status_code=204) + + range_header = request.headers.get("range") + byte_range: ByteRequest | None = None + if range_header is not None: + byte_range = _parse_range_header(range_header) + if byte_range is None: + return Response(status_code=416) + + return await _get_response(store, store_key, byte_range) + + +def _make_starlette_app( + *, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, +) -> Any: + """Create a Starlette app with the request handler.""" + try: + from starlette.applications import Starlette + from starlette.middleware.cors import CORSMiddleware + from starlette.routing import Route + except ImportError as e: + raise ImportError( + "The zarr server requires the 'starlette' package. " + "Install it with: pip install zarr[server]" + ) from e + + if methods is None: + methods = {"GET"} + + app = Starlette( + routes=[Route("/{path:path}", _handle_request, methods=list(methods))], + ) + + if cors_options is not None: + app.add_middleware( + CORSMiddleware, + allow_origins=cors_options["allow_origins"], + allow_methods=cors_options["allow_methods"], + ) + return app + + +def serve_store( + store: Store, + *, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, +) -> Any: + """Create a Starlette ASGI app that serves every key in a zarr ``Store``. + + Parameters + ---------- + store : Store + The zarr store to serve. + methods : set of HTTPMethod, optional + The HTTP methods to accept. Defaults to ``{"GET"}``. + cors_options : CorsOptions, optional + If provided, CORS middleware will be added with the given options. + + Returns + ------- + Starlette + An ASGI application. + """ + app = _make_starlette_app(methods=methods, cors_options=cors_options) + app.state.store = store + app.state.node = None + app.state.prefix = "" + return app + + +def serve_node( + node: Array | Group, + *, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, +) -> Any: + """Create a Starlette ASGI app that serves only the keys belonging to a + zarr ``Array`` or ``Group``. + + For an ``Array``, the served keys are the metadata document(s) and all + chunk (or shard) keys whose coordinates fall within the array's grid. + + For a ``Group``, the served keys are the group's own metadata plus any + path that resolves through the group's members to a valid array metadata + document or chunk key. + + Requests for keys outside this set receive a 404 response, even if the + underlying store contains data at that path. + + Parameters + ---------- + node : Array or Group + The zarr array or group to serve. + methods : set of HTTPMethod, optional + The HTTP methods to accept. Defaults to ``{"GET"}``. + cors_options : CorsOptions, optional + If provided, CORS middleware will be added with the given options. + + Returns + ------- + Starlette + An ASGI application. + """ + app = _make_starlette_app(methods=methods, cors_options=cors_options) + app.state.store = node.store_path.store + app.state.node = node + app.state.prefix = node.store_path.path + return app diff --git a/tests/test_experimental/test_serve.py b/tests/test_experimental/test_serve.py new file mode 100644 index 0000000000..2ea11530c4 --- /dev/null +++ b/tests/test_experimental/test_serve.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import numpy as np +import pytest + +import zarr +from zarr.core.buffer import cpu +from zarr.core.sync import sync +from zarr.storage import MemoryStore + +pytest.importorskip("starlette") +pytest.importorskip("httpx") + +from starlette.testclient import TestClient + +from zarr.experimental.serve import serve_node, serve_store + + +@pytest.fixture +def memory_store() -> MemoryStore: + return MemoryStore() + + +@pytest.fixture +def group_with_arrays(memory_store: MemoryStore) -> zarr.Group: + """Create a group containing a regular array and a sharded array.""" + root = zarr.open_group(memory_store, mode="w") + zarr.create_array(root.store_path / "regular", shape=(4, 4), chunks=(2, 2), dtype="f8") + zarr.create_array( + root.store_path / "sharded", + shape=(8, 8), + chunks=(2, 2), + shards=(4, 4), + dtype="i4", + ) + return root + + +class TestServeNodeDoesNotExposeNonZarrKeys: + """serve_node must never expose keys that are not part of the zarr hierarchy.""" + + def test_non_zarr_key_returns_404( + self, memory_store: MemoryStore, group_with_arrays: zarr.Group + ) -> None: + # Plant a non-zarr file in the store. + non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"secret data") + sync(memory_store.set("secret.txt", non_zarr_buf)) + + app = serve_node(group_with_arrays) + client = TestClient(app) + + # The non-zarr key must not be accessible. + response = client.get("/secret.txt") + assert response.status_code == 404 + + def test_non_zarr_key_nested_returns_404( + self, memory_store: MemoryStore, group_with_arrays: zarr.Group + ) -> None: + # Plant a non-zarr file under a path that shares a prefix with a + # real array, but is not a valid chunk or metadata key. + non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"not a chunk") + sync(memory_store.set("regular/notes.txt", non_zarr_buf)) + + app = serve_node(group_with_arrays) + client = TestClient(app) + + response = client.get("/regular/notes.txt") + assert response.status_code == 404 + + def test_valid_metadata_is_accessible(self, group_with_arrays: zarr.Group) -> None: + app = serve_node(group_with_arrays) + client = TestClient(app) + + # Root group metadata + response = client.get("/zarr.json") + assert response.status_code == 200 + + # Array metadata + response = client.get("/regular/zarr.json") + assert response.status_code == 200 + + def test_valid_chunk_is_accessible(self, group_with_arrays: zarr.Group) -> None: + # Write some data so the chunk actually exists in the store. + arr = group_with_arrays["regular"] + arr[:] = np.ones((4, 4)) + + app = serve_node(group_with_arrays) + client = TestClient(app) + + # c/0/0 is a valid chunk key for a (4,4) array with (2,2) chunks. + response = client.get("/regular/c/0/0") + assert response.status_code == 200 + + +class TestShardedArrayByteRangeReads: + """Byte-range reads against a sharded array served via serve_node.""" + + def test_range_read_returns_206(self, group_with_arrays: zarr.Group) -> None: + arr = group_with_arrays["sharded"] + arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) + + app = serve_node(group_with_arrays) + client = TestClient(app) + + # c/0/0 is the first shard key for an (8,8) array with (4,4) shards. + full_response = client.get("/sharded/c/0/0") + assert full_response.status_code == 200 + full_body = full_response.content + + # Request the first 8 bytes. + range_response = client.get("/sharded/c/0/0", headers={"Range": "bytes=0-7"}) + assert range_response.status_code == 206 + assert range_response.content == full_body[:8] + + def test_suffix_range_read(self, group_with_arrays: zarr.Group) -> None: + arr = group_with_arrays["sharded"] + arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) + + app = serve_node(group_with_arrays) + client = TestClient(app) + + full_response = client.get("/sharded/c/0/0") + full_body = full_response.content + + # Request the last 4 bytes. + range_response = client.get("/sharded/c/0/0", headers={"Range": "bytes=-4"}) + assert range_response.status_code == 206 + assert range_response.content == full_body[-4:] + + def test_offset_range_read(self, group_with_arrays: zarr.Group) -> None: + arr = group_with_arrays["sharded"] + arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) + + app = serve_node(group_with_arrays) + client = TestClient(app) + + full_response = client.get("/sharded/c/0/0") + full_body = full_response.content + + # Request everything from byte 4 onward. + range_response = client.get("/sharded/c/0/0", headers={"Range": "bytes=4-"}) + assert range_response.status_code == 206 + assert range_response.content == full_body[4:] + + +class TestWriteViaPut: + """serve_store and serve_node can be configured to accept PUT writes.""" + + def test_put_writes_to_store(self, memory_store: MemoryStore) -> None: + app = serve_store(memory_store, methods={"GET", "PUT"}) + client = TestClient(app) + + payload = b"hello zarr" + response = client.put("/some/key", content=payload) + assert response.status_code == 204 + + # Verify the data landed in the store. + buf = sync(memory_store.get("some/key", cpu.buffer_prototype)) + assert buf is not None + assert buf.to_bytes() == payload + + def test_put_then_get_roundtrip(self, memory_store: MemoryStore) -> None: + app = serve_store(memory_store, methods={"GET", "PUT"}) + client = TestClient(app) + + payload = b"\x00\x01\x02\x03" + client.put("/data/blob", content=payload) + + response = client.get("/data/blob") + assert response.status_code == 200 + assert response.content == payload + + def test_put_rejected_when_not_configured(self, memory_store: MemoryStore) -> None: + # Default methods is {"GET"} only. + app = serve_store(memory_store) + client = TestClient(app) + + response = client.put("/some/key", content=b"data") + assert response.status_code == 405 + + def test_put_on_node_validates_key( + self, memory_store: MemoryStore, group_with_arrays: zarr.Group + ) -> None: + app = serve_node(group_with_arrays, methods={"GET", "PUT"}) + client = TestClient(app) + + # Writing to a non-zarr key should be rejected. + response = client.put("/not_a_zarr_key.bin", content=b"data") + assert response.status_code == 404 From 8bec48d9e7d848f860af3be420f8a4b994fc019b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 1 Mar 2026 14:59:19 -0500 Subject: [PATCH 2/7] update tests and add v2 -> v3 example --- examples/serve_v2_v3/README.md | 27 ++ examples/serve_v2_v3/serve_v2_v3.py | 337 +++++++++++++++++++++++ src/zarr/core/chunk_key_encodings.py | 6 +- src/zarr/core/keys.py | 15 +- src/zarr/experimental/serve.py | 35 +-- tests/test_examples.py | 59 ++-- tests/test_experimental/test_serve.py | 370 +++++++++++++++++++++++--- 7 files changed, 753 insertions(+), 96 deletions(-) create mode 100644 examples/serve_v2_v3/README.md create mode 100644 examples/serve_v2_v3/serve_v2_v3.py diff --git a/examples/serve_v2_v3/README.md b/examples/serve_v2_v3/README.md new file mode 100644 index 0000000000..c4790f18ac --- /dev/null +++ b/examples/serve_v2_v3/README.md @@ -0,0 +1,27 @@ +# Serve a Zarr v2 Array as v3 over HTTP + +This example demonstrates how to build a custom read-only `Store` that +translates Zarr v2 data into v3 format on the fly, and serve it over HTTP +using `zarr.experimental.serve.serve_store`. + +The example shows how to: + +- Implement a custom `Store` subclass (`V2AsV3Store`) that wraps an + existing v2 store +- Translate v2 metadata (`.zarray` + `.zattrs`) to v3 `zarr.json` using + the same `_convert_array_metadata` helper that powers `zarr migrate v3` +- Map v3 default chunk keys (`c/0/0`) to their v2 equivalents (`0.0`) +- Serve the translated store over HTTP so that any v3-compatible client + can read v2 data without knowing the original format + +## Running the Example + +```bash +python examples/serve_v2_v3/serve_v2_v3.py +``` + +Or run with uv: + +```bash +uv run examples/serve_v2_v3/serve_v2_v3.py +``` diff --git a/examples/serve_v2_v3/serve_v2_v3.py b/examples/serve_v2_v3/serve_v2_v3.py new file mode 100644 index 0000000000..558ca2acaf --- /dev/null +++ b/examples/serve_v2_v3/serve_v2_v3.py @@ -0,0 +1,337 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "zarr[server]", +# "httpx", +# ] +# /// +# +""" +Serve a Zarr v2 array over HTTP as a Zarr v3 array. + +This example demonstrates how to build a read-only ``Store`` that translates +between Zarr formats on the fly. A v2 array lives in a ``MemoryStore``; the +custom ``V2AsV3Store`` intercepts reads and translates metadata: + +* Requests for ``zarr.json`` are answered by reading the v2 ``.zarray`` / + ``.zattrs`` metadata and converting it to a v3 ``zarr.json`` document + using the same ``_convert_array_metadata`` helper that powers the + ``zarr migrate v3`` CLI command. + +* Chunk keys are passed through unchanged because ``_convert_array_metadata`` + preserves the v2 chunk key encoding (``V2ChunkKeyEncoding``). A v3 + client reads the encoding from ``zarr.json`` and naturally produces the + same keys (e.g. ``0.0``) that the v2 store already contains. + +* The v2 metadata files (``.zarray``, ``.zattrs``) are hidden so only + v3 keys are visible. + +The translated store is then served over HTTP with ``serve_store``. A test +at the bottom opens the served data *as a v3 array* and verifies it can +read the values back. +""" + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +import numpy as np + +import zarr +from zarr.abc.store import ByteRequest, Store +from zarr.core.buffer import Buffer, cpu +from zarr.core.buffer.core import default_buffer_prototype +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.core.sync import sync +from zarr.metadata.migrate_v3 import _convert_array_metadata +from zarr.storage import MemoryStore + +if TYPE_CHECKING: + from collections.abc import AsyncIterator, Iterable + + from zarr.core.buffer import BufferPrototype + + +# --------------------------------------------------------------------------- +# Custom store that presents v2 data as v3 +# --------------------------------------------------------------------------- + +# v2 metadata keys that should be hidden from v3 clients. +_HIDDEN_V2_KEYS = frozenset({ZARRAY_JSON, ZATTRS_JSON}) + + +class V2AsV3Store(Store): + """A read-only store that wraps an existing v2 store and presents it as v3. + + Metadata translation + -------------------- + ``zarr.json`` ← ``.zarray`` + ``.zattrs`` (converted via + ``_convert_array_metadata`` from the CLI migration module) + + Chunk keys + ---------- + Chunk keys are **not** translated. The v3 metadata produced by + ``_convert_array_metadata`` uses ``V2ChunkKeyEncoding`` with the + same separator as the original v2 array, so chunk keys like ``0.0`` + are valid in both formats. + + Visibility + ---------- + The v2 metadata files (``.zarray``, ``.zattrs``) are hidden from + listing and ``get`` so that clients only see v3 keys. + """ + + supports_writes: bool = False + supports_deletes: bool = False + supports_listing: bool = True + + def __init__(self, v2_store: Store) -> None: + super().__init__(read_only=True) + self._v2 = v2_store + + def __eq__(self, other: object) -> bool: + return isinstance(other, V2AsV3Store) and self._v2 == other._v2 + + # -- metadata conversion ----------------------------------------------- + + async def _build_zarr_json(self, prototype: BufferPrototype) -> Buffer | None: + """Read v2 metadata from the wrapped store and return a v3 + ``zarr.json`` buffer.""" + zarray_buf = await self._v2.get(ZARRAY_JSON, prototype) + if zarray_buf is None: + return None + + zarray_dict = json.loads(zarray_buf.to_bytes()) + v2_meta = ArrayV2Metadata.from_dict(zarray_dict) + + # Merge in .zattrs if present. + zattrs_buf = await self._v2.get(ZATTRS_JSON, prototype) + if zattrs_buf is not None: + attrs = json.loads(zattrs_buf.to_bytes()) + if attrs: + v2_meta = v2_meta.update_attributes(attrs) + + # Reuse the same conversion the CLI uses. + v3_meta = _convert_array_metadata(v2_meta) + v3_json = json.dumps(v3_meta.to_dict()).encode() + return prototype.buffer.from_bytes(v3_json) + + # -- Store ABC implementation ------------------------------------------ + + async def get( + self, + key: str, + prototype: BufferPrototype | None = None, + byte_range: ByteRequest | None = None, + ) -> Buffer | None: + if prototype is None: + prototype = default_buffer_prototype() + await self._ensure_open() + + # Synthesise zarr.json from v2 metadata. + if key == ZARR_JSON: + buf = await self._build_zarr_json(prototype) + if buf is None or byte_range is None: + return buf + from zarr.storage._utils import _normalize_byte_range_index + + start, stop = _normalize_byte_range_index(buf, byte_range) + return prototype.buffer.from_buffer(buf[start:stop]) + + # Hide v2 metadata files. + if key in _HIDDEN_V2_KEYS: + return None + + # All other keys (chunk keys) pass through unchanged. + return await self._v2.get(key, prototype, byte_range=byte_range) + + async def get_partial_values( + self, + prototype: BufferPrototype, + key_ranges: Iterable[tuple[str, ByteRequest | None]], + ) -> list[Buffer | None]: + return [await self.get(k, prototype, br) for k, br in key_ranges] + + async def exists(self, key: str) -> bool: + if key == ZARR_JSON: + return await self._v2.exists(ZARRAY_JSON) + if key in _HIDDEN_V2_KEYS: + return False + return await self._v2.exists(key) + + async def set(self, key: str, value: Buffer) -> None: + raise NotImplementedError("V2AsV3Store is read-only") + + async def delete(self, key: str) -> None: + raise NotImplementedError("V2AsV3Store is read-only") + + async def list(self) -> AsyncIterator[str]: + async for key in self._v2.list(): + if key == ZARRAY_JSON: + yield ZARR_JSON + elif key in _HIDDEN_V2_KEYS: + continue + else: + yield key + + async def list_prefix(self, prefix: str) -> AsyncIterator[str]: + async for key in self.list(): + if key.startswith(prefix): + yield key + + async def list_dir(self, prefix: str) -> AsyncIterator[str]: + async for key in self.list(): + if not key.startswith(prefix): + continue + remainder = key[len(prefix) :] + if "/" not in remainder: + yield key + + +# --------------------------------------------------------------------------- +# Demo / tests +# --------------------------------------------------------------------------- + + +def create_v2_array() -> tuple[MemoryStore, np.ndarray]: + """Create a v2 array with some data and return the store + data.""" + store = MemoryStore() + data = np.arange(16, dtype="float64").reshape(4, 4) + arr = zarr.create_array(store, shape=data.shape, chunks=(2, 2), dtype=data.dtype, zarr_format=2) + arr[:] = data + return store, data + + +def test_metadata_translation() -> None: + """The translated zarr.json should be valid v3 metadata.""" + v2_store, _ = create_v2_array() + v3_store = V2AsV3Store(v2_store) + + buf = sync(v3_store.get(ZARR_JSON, cpu.buffer_prototype)) + assert buf is not None + meta = json.loads(buf.to_bytes()) + + assert meta["zarr_format"] == 3 + assert meta["node_type"] == "array" + assert meta["shape"] == [4, 4] + assert meta["chunk_grid"]["configuration"]["chunk_shape"] == [2, 2] + # The v2 chunk key encoding is preserved. + assert meta["chunk_key_encoding"]["name"] == "v2" + assert any(c["name"] in ("bytes", "zstd", "blosc") for c in meta["codecs"]) + print(" metadata translation: OK") + print(f" zarr.json:\n{json.dumps(meta, indent=2)}") + + +def test_chunk_passthrough() -> None: + """Chunk keys should pass through unchanged (v2 encoding preserved).""" + v2_store, _ = create_v2_array() + v3_store = V2AsV3Store(v2_store) + + # The v2 store has chunk key "0.0"; the v3 store should serve the + # same key since the metadata says V2ChunkKeyEncoding. + v2_buf = sync(v2_store.get("0.0", cpu.buffer_prototype)) + v3_buf = sync(v3_store.get("0.0", cpu.buffer_prototype)) + assert v2_buf is not None + assert v3_buf is not None + assert v2_buf.to_bytes() == v3_buf.to_bytes() + print(" chunk passthrough: OK") + + +def test_v2_metadata_hidden() -> None: + """v2 metadata files should not be visible.""" + v2_store, _ = create_v2_array() + v3_store = V2AsV3Store(v2_store) + + assert sync(v3_store.get(ZARRAY_JSON, cpu.buffer_prototype)) is None + assert sync(v3_store.get(ZATTRS_JSON, cpu.buffer_prototype)) is None + assert not sync(v3_store.exists(ZARRAY_JSON)) + assert not sync(v3_store.exists(ZATTRS_JSON)) + print(" v2 metadata hidden: OK") + + +def test_listing() -> None: + """Store listing should show v3 keys only.""" + v2_store, _ = create_v2_array() + v3_store = V2AsV3Store(v2_store) + + async def _list() -> list[str]: + return [k async for k in v3_store.list()] + + keys = sync(_list()) + assert ZARR_JSON in keys + assert ZARRAY_JSON not in keys + assert ZATTRS_JSON not in keys + # Chunk keys use v2 encoding (unchanged). + assert "0.0" in keys + print(f" listing keys: {sorted(keys)}") + + +def test_serve_roundtrip() -> None: + """Serve the translated store over HTTP and read it back as v3.""" + from starlette.testclient import TestClient + + from zarr.experimental.serve import serve_store + + v2_store, _data = create_v2_array() + v3_store = V2AsV3Store(v2_store) + + app = serve_store(v3_store) + client = TestClient(app) + + # Metadata should be valid v3 JSON. + resp = client.get("/zarr.json") + assert resp.status_code == 200 + assert resp.headers["content-type"] == "application/json" + meta = resp.json() + assert meta["zarr_format"] == 3 + + # Chunks should be accessible via v2-style keys (as the metadata declares). + resp = client.get("/0.0") + assert resp.status_code == 200 + assert len(resp.content) > 0 + + # v2 metadata files should NOT be accessible. + resp = client.get("/.zarray") + assert resp.status_code == 404 + resp = client.get("/.zattrs") + assert resp.status_code == 404 + + print(" HTTP round-trip: OK") + + +def test_open_as_v3_array() -> None: + """Open the translated store as a v3 array and verify the data.""" + v2_store, data = create_v2_array() + v3_store = V2AsV3Store(v2_store) + + arr = zarr.open_array(v3_store) + assert arr.metadata.zarr_format == 3 + np.testing.assert_array_equal(arr[:], data) + print(" open as v3 array: OK") + print(f" data:\n{arr[:]}") + + +if __name__ == "__main__": + print("Creating v2 array and wrapping it with V2AsV3Store...\n") + + print("1. Metadata translation") + test_metadata_translation() + + print("\n2. Chunk key passthrough") + test_chunk_passthrough() + + print("\n3. v2 metadata hidden") + test_v2_metadata_hidden() + + print("\n4. Store listing") + test_listing() + + print("\n5. HTTP round-trip via serve_store") + test_serve_roundtrip() + + print("\n6. Open as v3 array") + test_open_as_v3_array() + + print("\nAll checks passed.") diff --git a/src/zarr/core/chunk_key_encodings.py b/src/zarr/core/chunk_key_encodings.py index 5c9f77118a..ca6154d1ba 100644 --- a/src/zarr/core/chunk_key_encodings.py +++ b/src/zarr/core/chunk_key_encodings.py @@ -79,7 +79,11 @@ def __post_init__(self) -> None: def decode_chunk_key(self, chunk_key: str) -> tuple[int, ...]: if chunk_key == "c": return () - return tuple(map(int, chunk_key[1:].split(self.separator))) + # Strip the "c" prefix (e.g. "c/" or "c.") before splitting. + prefix = "c" + self.separator + if chunk_key.startswith(prefix): + return tuple(map(int, chunk_key[len(prefix) :].split(self.separator))) + raise ValueError(f"Invalid chunk key for default encoding: {chunk_key!r}") def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str: return self.separator.join(map(str, ("c",) + chunk_coords)) diff --git a/src/zarr/core/keys.py b/src/zarr/core/keys.py index 0d18887598..4cc13874ab 100644 --- a/src/zarr/core/keys.py +++ b/src/zarr/core/keys.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON @@ -44,7 +44,7 @@ def metadata_keys(zarr_format: int) -> frozenset[str]: return _METADATA_KEYS_V2 -def decode_chunk_key(array: Array, key: str) -> tuple[int, ...] | None: +def decode_chunk_key(array: Array[Any], key: str) -> tuple[int, ...] | None: """Try to decode *key* into chunk coordinates for *array*. Parameters @@ -85,7 +85,7 @@ def decode_chunk_key(array: Array, key: str) -> tuple[int, ...] | None: return None -def is_valid_chunk_key(array: Array, key: str) -> bool: +def is_valid_chunk_key(array: Array[Any], key: str) -> bool: """Check whether *key* is a valid chunk key for *array*. Tries to decode the key and checks that the resulting coordinates fall @@ -112,7 +112,7 @@ def is_valid_chunk_key(array: Array, key: str) -> bool: return all(0 <= c < g for c, g in zip(coords, grid, strict=True)) -def is_valid_array_key(array: Array, key: str) -> bool: +def is_valid_array_key(array: Array[Any], key: str) -> bool: """Check whether *key* is a valid store key for *array*. Valid keys are metadata documents and chunk keys. @@ -133,7 +133,7 @@ def is_valid_array_key(array: Array, key: str) -> bool: return is_valid_chunk_key(array, key) -def is_valid_node_key(node: Array | Group, key: str) -> bool: +def is_valid_node_key(node: Array[Any] | Group, key: str) -> bool: """Check whether *key* is a valid store key relative to *node*. For an ``Array``, valid keys are metadata documents and chunk keys. @@ -154,7 +154,6 @@ def is_valid_node_key(node: Array | Group, key: str) -> bool: bool """ from zarr.core.array import Array - from zarr.core.group import Group if isinstance(node, Array): return is_valid_array_key(node, key) @@ -176,6 +175,4 @@ def is_valid_node_key(node: Array | Group, key: str) -> bool: except KeyError: return False - if isinstance(child, (Array, Group)): - return is_valid_node_key(child, remainder) - return False + return is_valid_node_key(child, remainder) diff --git a/src/zarr/experimental/serve.py b/src/zarr/experimental/serve.py index ec9c0aca14..1b6d4367a6 100644 --- a/src/zarr/experimental/serve.py +++ b/src/zarr/experimental/serve.py @@ -40,21 +40,24 @@ def _parse_range_header(range_header: str) -> ByteRequest | None: if not range_header.startswith("bytes="): return None range_spec = range_header[len("bytes=") :] - if range_spec.startswith("-"): - # suffix request: bytes=-N - suffix = int(range_spec[1:]) - return SuffixByteRequest(suffix=suffix) - parts = range_spec.split("-", 1) - if len(parts) != 2: + try: + if range_spec.startswith("-"): + # suffix request: bytes=-N + suffix = int(range_spec[1:]) + return SuffixByteRequest(suffix=suffix) + parts = range_spec.split("-", 1) + if len(parts) != 2: + return None + start_str, end_str = parts + start = int(start_str) + if end_str == "": + # offset request: bytes=N- + return OffsetByteRequest(offset=start) + # range request: bytes=N-M (HTTP end is inclusive, ByteRequest end is exclusive) + end = int(end_str) + 1 + return RangeByteRequest(start=start, end=end) + except ValueError: return None - start_str, end_str = parts - start = int(start_str) - if end_str == "": - # offset request: bytes=N- - return OffsetByteRequest(offset=start) - # range request: bytes=N-M (HTTP end is inclusive, ByteRequest end is exclusive) - end = int(end_str) + 1 - return RangeByteRequest(start=start, end=end) async def _get_response(store: Store, path: str, byte_range: ByteRequest | None = None) -> Response: @@ -77,7 +80,7 @@ async def _handle_request(request: Request) -> Response: from starlette.responses import Response store: Store = request.app.state.store - node: Array | Group | None = request.app.state.node + node: Array[Any] | Group | None = request.app.state.node prefix: str = request.app.state.prefix path = request.path_params.get("path", "") @@ -166,7 +169,7 @@ def serve_store( def serve_node( - node: Array | Group, + node: Array[Any] | Group, *, methods: set[HTTPMethod] | None = None, cors_options: CorsOptions | None = None, diff --git a/tests/test_examples.py b/tests/test_examples.py index 152b0a1a88..4c358f8de9 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -19,47 +19,27 @@ ZARR_PROJECT_PATH = Path(".").absolute() -def set_dep(script: str, dependency: str) -> str: - """ - Set a dependency in a PEP-723 script header. - If the package is already in the list, it will be replaced. - If the package is not already in the list, it will be added. +def _get_zarr_extras(script_path: Path) -> str: + """Extract extras from the zarr dependency in a script's PEP 723 header. - Source code modified from - https://packaging.python.org/en/latest/specifications/inline-script-metadata/#reference-implementation + For example, if the script declares ``zarr[server]``, this returns ``[server]``. + If the script declares ``zarr`` with no extras, this returns ``""``. """ - match = re.search(PEP_723_REGEX, script) - + source_text = script_path.read_text() + match = re.search(PEP_723_REGEX, source_text) if match is None: - raise ValueError(f"PEP-723 header not found in {script}") + return "" content = "".join( line[2:] if line.startswith("# ") else line[1:] for line in match.group("content").splitlines(keepends=True) ) - config = tomlkit.parse(content) - for idx, dep in enumerate(tuple(config["dependencies"])): - if Requirement(dep).name == Requirement(dependency).name: - config["dependencies"][idx] = dependency - - new_content = "".join( - f"# {line}" if line.strip() else f"#{line}" - for line in tomlkit.dumps(config).splitlines(keepends=True) - ) - - start, end = match.span("content") - return script[:start] + new_content + script[end:] - - -def resave_script(source_path: Path, dest_path: Path) -> None: - """ - Read a script from source_path and save it to dest_path after inserting the absolute path to the - local Zarr project directory in the PEP-723 header. - """ - source_text = source_path.read_text() - dest_text = set_dep(source_text, f"zarr @ file:///{ZARR_PROJECT_PATH}") - dest_path.write_text(dest_text) + for dep in config.get("dependencies", []): + req = Requirement(dep) + if req.name == "zarr" and req.extras: + return "[" + ",".join(sorted(req.extras)) + "]" + return "" def test_script_paths() -> None: @@ -73,14 +53,15 @@ def test_script_paths() -> None: sys.platform in ("win32",), reason="This test fails due for unknown reasons on Windows in CI." ) @pytest.mark.parametrize("script_path", script_paths) -def test_scripts_can_run(script_path: Path, tmp_path: Path) -> None: - dest_path = tmp_path / script_path.name - # We resave the script after inserting the absolute path to the local Zarr project directory, - # and then test its behavior. - # This allows the example to be useful to users who don't have Zarr installed, but also testable. - resave_script(script_path, dest_path) +def test_scripts_can_run(script_path: Path) -> None: + # Override the zarr dependency with the local project, preserving any extras + # declared in the script's PEP 723 header (e.g. zarr[server]). + extras = _get_zarr_extras(script_path) + zarr_dep = f"zarr{extras} @ file:///{ZARR_PROJECT_PATH}" result = subprocess.run( - ["uv", "run", "--refresh", str(dest_path)], capture_output=True, text=True + ["uv", "run", "--with", zarr_dep, "--refresh", str(script_path)], + capture_output=True, + text=True, ) assert result.returncode == 0, ( f"Script at {script_path} failed to run. Output: {result.stdout} Error: {result.stderr}" diff --git a/tests/test_experimental/test_serve.py b/tests/test_experimental/test_serve.py index 2ea11530c4..e8ecfff28a 100644 --- a/tests/test_experimental/test_serve.py +++ b/tests/test_experimental/test_serve.py @@ -1,30 +1,29 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import pytest import zarr from zarr.core.buffer import cpu from zarr.core.sync import sync -from zarr.storage import MemoryStore + +if TYPE_CHECKING: + from zarr.abc.store import Store pytest.importorskip("starlette") pytest.importorskip("httpx") from starlette.testclient import TestClient -from zarr.experimental.serve import serve_node, serve_store +from zarr.experimental.serve import CorsOptions, _parse_range_header, serve_node, serve_store @pytest.fixture -def memory_store() -> MemoryStore: - return MemoryStore() - - -@pytest.fixture -def group_with_arrays(memory_store: MemoryStore) -> zarr.Group: +def group_with_arrays(store: Store) -> zarr.Group: """Create a group containing a regular array and a sharded array.""" - root = zarr.open_group(memory_store, mode="w") + root = zarr.open_group(store, mode="w") zarr.create_array(root.store_path / "regular", shape=(4, 4), chunks=(2, 2), dtype="f8") zarr.create_array( root.store_path / "sharded", @@ -36,15 +35,15 @@ def group_with_arrays(memory_store: MemoryStore) -> zarr.Group: return root +@pytest.mark.parametrize("store", ["memory"], indirect=True) class TestServeNodeDoesNotExposeNonZarrKeys: """serve_node must never expose keys that are not part of the zarr hierarchy.""" - def test_non_zarr_key_returns_404( - self, memory_store: MemoryStore, group_with_arrays: zarr.Group - ) -> None: - # Plant a non-zarr file in the store. + def test_non_zarr_key_returns_404(self, store: Store, group_with_arrays: zarr.Group) -> None: + """A key that is not valid zarr metadata or a valid chunk key should return 404, + even if the underlying store contains data at that path.""" non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"secret data") - sync(memory_store.set("secret.txt", non_zarr_buf)) + sync(store.set("secret.txt", non_zarr_buf)) app = serve_node(group_with_arrays) client = TestClient(app) @@ -54,12 +53,12 @@ def test_non_zarr_key_returns_404( assert response.status_code == 404 def test_non_zarr_key_nested_returns_404( - self, memory_store: MemoryStore, group_with_arrays: zarr.Group + self, store: Store, group_with_arrays: zarr.Group ) -> None: - # Plant a non-zarr file under a path that shares a prefix with a - # real array, but is not a valid chunk or metadata key. + """A non-zarr key nested under a real array's path should return 404, + even though the path prefix matches a valid zarr node.""" non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"not a chunk") - sync(memory_store.set("regular/notes.txt", non_zarr_buf)) + sync(store.set("regular/notes.txt", non_zarr_buf)) app = serve_node(group_with_arrays) client = TestClient(app) @@ -68,6 +67,8 @@ def test_non_zarr_key_nested_returns_404( assert response.status_code == 404 def test_valid_metadata_is_accessible(self, group_with_arrays: zarr.Group) -> None: + """Zarr metadata keys (zarr.json) for both the root group and child arrays + should be served with a 200 status.""" app = serve_node(group_with_arrays) client = TestClient(app) @@ -80,8 +81,10 @@ def test_valid_metadata_is_accessible(self, group_with_arrays: zarr.Group) -> No assert response.status_code == 200 def test_valid_chunk_is_accessible(self, group_with_arrays: zarr.Group) -> None: - # Write some data so the chunk actually exists in the store. + """A valid, in-bounds chunk key for an array with written data should + be served with a 200 status.""" arr = group_with_arrays["regular"] + assert isinstance(arr, zarr.Array) arr[:] = np.ones((4, 4)) app = serve_node(group_with_arrays) @@ -91,12 +94,40 @@ def test_valid_chunk_is_accessible(self, group_with_arrays: zarr.Group) -> None: response = client.get("/regular/c/0/0") assert response.status_code == 200 + def test_out_of_bounds_chunk_key_returns_404(self, group_with_arrays: zarr.Group) -> None: + """A chunk key that is syntactically valid but references indices beyond + the array's chunk grid should return 404.""" + arr = group_with_arrays["regular"] + assert isinstance(arr, zarr.Array) + arr[:] = np.ones((4, 4)) + + app = serve_node(group_with_arrays) + client = TestClient(app) + + # (4,4) array with (2,2) chunks has grid shape (2,2), so c/99/99 is + # syntactically valid but out of bounds. + response = client.get("/regular/c/99/99") + assert response.status_code == 404 + + def test_empty_path_returns_404(self, group_with_arrays: zarr.Group) -> None: + """A request to the root path '/' should return 404 because an empty + string is not a valid zarr key.""" + app = serve_node(group_with_arrays) + client = TestClient(app) + + response = client.get("/") + assert response.status_code == 404 + +@pytest.mark.parametrize("store", ["memory"], indirect=True) class TestShardedArrayByteRangeReads: """Byte-range reads against a sharded array served via serve_node.""" def test_range_read_returns_206(self, group_with_arrays: zarr.Group) -> None: + """A Range header requesting a specific byte range (e.g. bytes=0-7) should + return 206 Partial Content with exactly those bytes.""" arr = group_with_arrays["sharded"] + assert isinstance(arr, zarr.Array) arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) app = serve_node(group_with_arrays) @@ -113,7 +144,10 @@ def test_range_read_returns_206(self, group_with_arrays: zarr.Group) -> None: assert range_response.content == full_body[:8] def test_suffix_range_read(self, group_with_arrays: zarr.Group) -> None: + """A suffix byte range (e.g. bytes=-4) should return the last N bytes + of the resource with a 206 status.""" arr = group_with_arrays["sharded"] + assert isinstance(arr, zarr.Array) arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) app = serve_node(group_with_arrays) @@ -128,7 +162,10 @@ def test_suffix_range_read(self, group_with_arrays: zarr.Group) -> None: assert range_response.content == full_body[-4:] def test_offset_range_read(self, group_with_arrays: zarr.Group) -> None: + """An offset byte range (e.g. bytes=4-) should return all bytes from + the given offset to the end, with a 206 status.""" arr = group_with_arrays["sharded"] + assert isinstance(arr, zarr.Array) arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) app = serve_node(group_with_arrays) @@ -143,11 +180,100 @@ def test_offset_range_read(self, group_with_arrays: zarr.Group) -> None: assert range_response.content == full_body[4:] +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestMalformedRangeHeaders: + """Malformed Range headers must return 416, never crash the server.""" + + @pytest.mark.parametrize( + "header", + [ + "bytes=abc-def", + "bytes=-abc", + "bytes=abc-", + "bytes=0-7,10-20", + ], + ) + def test_malformed_range_returns_416(self, store: Store, header: str) -> None: + """Range headers with non-numeric values, multiple ranges, or other + malformed syntax should return 416 Range Not Satisfiable instead of + crashing the server.""" + buf = cpu.buffer_prototype.buffer.from_bytes(b"some data here") + sync(store.set("key", buf)) + + app = serve_store(store) + client = TestClient(app, raise_server_exceptions=False) + + response = client.get("/key", headers={"Range": header}) + assert response.status_code == 416 + + def test_non_bytes_unit_returns_416(self, store: Store) -> None: + """A Range header using a unit other than 'bytes' (e.g. 'chars=0-7') + should return 416 because only byte ranges are supported.""" + buf = cpu.buffer_prototype.buffer.from_bytes(b"some data") + sync(store.set("key", buf)) + + app = serve_store(store) + client = TestClient(app) + + response = client.get("/key", headers={"Range": "chars=0-7"}) + assert response.status_code == 416 + + +class TestParseRangeHeader: + """Unit tests for _parse_range_header.""" + + def test_valid_range(self) -> None: + """'bytes=0-99' should parse into a RangeByteRequest with start=0 and + end=100 (end is exclusive, so the inclusive HTTP end is incremented).""" + from zarr.abc.store import RangeByteRequest + + result = _parse_range_header("bytes=0-99") + assert result == RangeByteRequest(start=0, end=100) + + def test_valid_suffix(self) -> None: + """'bytes=-50' should parse into a SuffixByteRequest requesting the + last 50 bytes of the resource.""" + from zarr.abc.store import SuffixByteRequest + + result = _parse_range_header("bytes=-50") + assert result == SuffixByteRequest(suffix=50) + + def test_valid_offset(self) -> None: + """'bytes=10-' should parse into an OffsetByteRequest starting at + byte 10 and reading to the end of the resource.""" + from zarr.abc.store import OffsetByteRequest + + result = _parse_range_header("bytes=10-") + assert result == OffsetByteRequest(offset=10) + + def test_non_bytes_unit(self) -> None: + """A Range header with a non-'bytes' unit should return None.""" + assert _parse_range_header("chars=0-7") is None + + def test_garbage_values(self) -> None: + """Non-numeric values in the byte range should return None instead + of raising a ValueError.""" + assert _parse_range_header("bytes=abc-def") is None + + def test_multi_range(self) -> None: + """Multi-range requests (e.g. bytes=0-7,10-20) are not supported and + should return None.""" + assert _parse_range_header("bytes=0-7,10-20") is None + + def test_empty_spec(self) -> None: + """A Range header with no range specifier after 'bytes=' should + return None.""" + assert _parse_range_header("bytes=") is None + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) class TestWriteViaPut: """serve_store and serve_node can be configured to accept PUT writes.""" - def test_put_writes_to_store(self, memory_store: MemoryStore) -> None: - app = serve_store(memory_store, methods={"GET", "PUT"}) + def test_put_writes_to_store(self, store: Store) -> None: + """A PUT request to serve_store with PUT enabled should write the + request body into the store at the given key.""" + app = serve_store(store, methods={"GET", "PUT"}) client = TestClient(app) payload = b"hello zarr" @@ -155,12 +281,14 @@ def test_put_writes_to_store(self, memory_store: MemoryStore) -> None: assert response.status_code == 204 # Verify the data landed in the store. - buf = sync(memory_store.get("some/key", cpu.buffer_prototype)) + buf = sync(store.get("some/key", cpu.buffer_prototype)) assert buf is not None assert buf.to_bytes() == payload - def test_put_then_get_roundtrip(self, memory_store: MemoryStore) -> None: - app = serve_store(memory_store, methods={"GET", "PUT"}) + def test_put_then_get_roundtrip(self, store: Store) -> None: + """Data written via PUT should be retrievable via a subsequent GET + at the same key.""" + app = serve_store(store, methods={"GET", "PUT"}) client = TestClient(app) payload = b"\x00\x01\x02\x03" @@ -170,20 +298,200 @@ def test_put_then_get_roundtrip(self, memory_store: MemoryStore) -> None: assert response.status_code == 200 assert response.content == payload - def test_put_rejected_when_not_configured(self, memory_store: MemoryStore) -> None: - # Default methods is {"GET"} only. - app = serve_store(memory_store) + def test_put_rejected_when_not_configured(self, store: Store) -> None: + """PUT requests should return 405 Method Not Allowed when the server + is created with the default methods (GET only).""" + app = serve_store(store) client = TestClient(app) response = client.put("/some/key", content=b"data") assert response.status_code == 405 - def test_put_on_node_validates_key( - self, memory_store: MemoryStore, group_with_arrays: zarr.Group - ) -> None: + def test_put_on_node_validates_key(self, store: Store, group_with_arrays: zarr.Group) -> None: + """PUT requests via serve_node should be rejected with 404 when the + target key is not a valid zarr key (metadata or chunk).""" app = serve_node(group_with_arrays, methods={"GET", "PUT"}) client = TestClient(app) - # Writing to a non-zarr key should be rejected. response = client.put("/not_a_zarr_key.bin", content=b"data") assert response.status_code == 404 + + def test_put_to_valid_chunk_key_succeeds(self, group_with_arrays: zarr.Group) -> None: + """PUT requests via serve_node to a valid chunk key should succeed + with 204, and the written data should be retrievable via GET.""" + app = serve_node(group_with_arrays, methods={"GET", "PUT"}) + client = TestClient(app) + + payload = b"\x00" * 32 + response = client.put("/regular/c/0/0", content=payload) + assert response.status_code == 204 + + # Confirm it round-trips. + get_response = client.get("/regular/c/0/0") + assert get_response.status_code == 200 + assert get_response.content == payload + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestServeStoreEdgeCases: + """Edge cases for serve_store.""" + + def test_get_nonexistent_key_returns_404(self, store: Store) -> None: + """GET for a key that does not exist in the store should return 404.""" + app = serve_store(store) + client = TestClient(app) + + response = client.get("/no/such/key") + assert response.status_code == 404 + + def test_empty_path_returns_404(self, store: Store) -> None: + """GET to the root path '/' (empty key) should return 404 because + an empty string is not a valid store key.""" + app = serve_store(store) + client = TestClient(app) + + response = client.get("/") + assert response.status_code == 404 + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestServeNodeDirectArray: + """Serve a single array directly (not through a group).""" + + def test_serve_nested_array_directly(self, store: Store) -> None: + """When serve_node is given a nested array (not a group), requests + should use keys relative to that array's path. Metadata and in-bounds + chunks should return 200, and out-of-bounds chunks should return 404.""" + root = zarr.open_group(store, mode="w") + arr = zarr.create_array( + root.store_path / "sub/nested", + shape=(4,), + chunks=(2,), + dtype="f8", + ) + arr[:] = np.arange(4, dtype="f8") + + # Serve the array directly — its prefix is "sub/nested". + app = serve_node(arr) + client = TestClient(app) + + # Metadata should be accessible at the array root. + response = client.get("/zarr.json") + assert response.status_code == 200 + + # Chunk keys are relative to the array. + response = client.get("/c/0") + assert response.status_code == 200 + + response = client.get("/c/1") + assert response.status_code == 200 + + # Out of bounds. + response = client.get("/c/99") + assert response.status_code == 404 + + def test_serve_root_array(self, store: Store) -> None: + """When serve_node is given an array stored at the root of a store + (empty prefix), metadata and chunk keys should be accessible at + their natural paths.""" + arr = zarr.create_array( + store, + shape=(6,), + chunks=(3,), + dtype="i4", + ) + arr[:] = np.arange(6, dtype="i4") + + # Root-level array has prefix = "". + app = serve_node(arr) + client = TestClient(app) + + response = client.get("/zarr.json") + assert response.status_code == 200 + + response = client.get("/c/0") + assert response.status_code == 200 + + response = client.get("/c/1") + assert response.status_code == 200 + + response = client.get("/c/2") + assert response.status_code == 404 + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestContentType: + """Responses should have the correct Content-Type.""" + + def test_metadata_has_json_content_type(self, group_with_arrays: zarr.Group) -> None: + """Zarr metadata files (zarr.json) should be served with + Content-Type: application/json.""" + app = serve_node(group_with_arrays) + client = TestClient(app) + + response = client.get("/zarr.json") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + + def test_chunk_has_octet_stream_content_type(self, group_with_arrays: zarr.Group) -> None: + """Chunk data should be served with Content-Type: application/octet-stream + since it is binary data.""" + arr = group_with_arrays["regular"] + assert isinstance(arr, zarr.Array) + arr[:] = np.ones((4, 4)) + + app = serve_node(group_with_arrays) + client = TestClient(app) + + response = client.get("/regular/c/0/0") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/octet-stream" + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestCorsMiddleware: + """CORS middleware should add the expected headers.""" + + def test_cors_headers_present(self, store: Store) -> None: + """When cors_options are provided, responses should include the + Access-Control-Allow-Origin header matching the request origin.""" + buf = cpu.buffer_prototype.buffer.from_bytes(b"data") + sync(store.set("key", buf)) + + cors = CorsOptions(allow_origins=["https://example.com"], allow_methods=["GET"]) + app = serve_store(store, cors_options=cors) + client = TestClient(app) + + response = client.get("/key", headers={"Origin": "https://example.com"}) + assert response.status_code == 200 + assert response.headers["access-control-allow-origin"] == "https://example.com" + + def test_cors_preflight(self, store: Store) -> None: + """CORS preflight OPTIONS requests should return 200 with the + Access-Control-Allow-Origin header when CORS is configured.""" + cors = CorsOptions(allow_origins=["*"], allow_methods=["GET", "PUT"]) + app = serve_store(store, methods={"GET", "PUT"}, cors_options=cors) + client = TestClient(app) + + response = client.options( + "/any/path", + headers={ + "Origin": "https://example.com", + "Access-Control-Request-Method": "PUT", + }, + ) + assert response.status_code == 200 + assert "access-control-allow-origin" in response.headers + + def test_no_cors_headers_without_option(self, store: Store) -> None: + """When no cors_options are provided, responses should not include + any CORS headers, even if the request includes an Origin header.""" + buf = cpu.buffer_prototype.buffer.from_bytes(b"data") + sync(store.set("key", buf)) + + app = serve_store(store) + client = TestClient(app) + + response = client.get("/key", headers={"Origin": "https://example.com"}) + assert response.status_code == 200 + assert "access-control-allow-origin" not in response.headers From a3d0358245f5607842b576e9697048f733fa8aca Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 1 Mar 2026 15:09:54 -0500 Subject: [PATCH 3/7] add __all__ and clean up tests --- src/zarr/experimental/serve.py | 9 ++- tests/test_experimental/test_serve.py | 83 +++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/zarr/experimental/serve.py b/src/zarr/experimental/serve.py index 1b6d4367a6..26978aa02f 100644 --- a/src/zarr/experimental/serve.py +++ b/src/zarr/experimental/serve.py @@ -7,6 +7,7 @@ from zarr.core.keys import is_valid_node_key if TYPE_CHECKING: + from starlette.applications import Starlette from starlette.requests import Request from starlette.responses import Response @@ -14,6 +15,8 @@ from zarr.core.array import Array from zarr.core.group import Group +__all__ = ["CorsOptions", "HTTPMethod", "serve_node", "serve_store"] + class CorsOptions(TypedDict): allow_origins: list[str] @@ -111,7 +114,7 @@ def _make_starlette_app( *, methods: set[HTTPMethod] | None = None, cors_options: CorsOptions | None = None, -) -> Any: +) -> Starlette: """Create a Starlette app with the request handler.""" try: from starlette.applications import Starlette @@ -144,7 +147,7 @@ def serve_store( *, methods: set[HTTPMethod] | None = None, cors_options: CorsOptions | None = None, -) -> Any: +) -> Starlette: """Create a Starlette ASGI app that serves every key in a zarr ``Store``. Parameters @@ -173,7 +176,7 @@ def serve_node( *, methods: set[HTTPMethod] | None = None, cors_options: CorsOptions | None = None, -) -> Any: +) -> Starlette: """Create a Starlette ASGI app that serves only the keys belonging to a zarr ``Array`` or ``Group``. diff --git a/tests/test_experimental/test_serve.py b/tests/test_experimental/test_serve.py index e8ecfff28a..5ec28b9a81 100644 --- a/tests/test_experimental/test_serve.py +++ b/tests/test_experimental/test_serve.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from zarr.abc.store import Store + from zarr.core.common import ZarrFormat pytest.importorskip("starlette") pytest.importorskip("httpx") @@ -495,3 +496,85 @@ def test_no_cors_headers_without_option(self, store: Store) -> None: response = client.get("/key", headers={"Origin": "https://example.com"}) assert response.status_code == 200 assert "access-control-allow-origin" not in response.headers + + +def _metadata_key(zarr_format: ZarrFormat) -> str: + """Return the metadata key for the given zarr format.""" + return "zarr.json" if zarr_format == 3 else ".zarray" + + +def _chunk_key(zarr_format: ZarrFormat, coords: str) -> str: + """Return a chunk key for the given format. + + *coords* is a dot-separated string like ``"0.0"``. For v3 this becomes + ``"c/0/0"``; for v2 it is returned unchanged. + """ + if zarr_format == 3: + return "c/" + coords.replace(".", "/") + return coords + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestServeNodeV2AndV3: + """Test serve_node with both v2 and v3 arrays side by side.""" + + def test_metadata_accessible(self, store: Store, zarr_format: ZarrFormat) -> None: + """The format-appropriate metadata key should be served with 200.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + app = serve_node(arr) + client = TestClient(app) + + response = client.get(f"/{_metadata_key(zarr_format)}") + assert response.status_code == 200 + + def test_chunk_accessible(self, store: Store, zarr_format: ZarrFormat) -> None: + """An in-bounds chunk key should be served with 200 for both formats.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + arr[:] = np.ones(4) + + app = serve_node(arr) + client = TestClient(app) + + response = client.get(f"/{_chunk_key(zarr_format, '0')}") + assert response.status_code == 200 + + def test_out_of_bounds_chunk_returns_404(self, store: Store, zarr_format: ZarrFormat) -> None: + """An out-of-bounds chunk key should return 404 for both formats.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + arr[:] = np.ones(4) + + app = serve_node(arr) + client = TestClient(app) + + response = client.get(f"/{_chunk_key(zarr_format, '99')}") + assert response.status_code == 404 + + def test_non_zarr_key_returns_404(self, store: Store, zarr_format: ZarrFormat) -> None: + """A non-zarr key should return 404 regardless of format.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"secret") + sync(store.set("secret.txt", non_zarr_buf)) + + app = serve_node(arr) + client = TestClient(app) + + response = client.get("/secret.txt") + assert response.status_code == 404 + + def test_data_roundtrip(self, store: Store, zarr_format: ZarrFormat) -> None: + """Data written to an array should be readable via serve_store for + both formats.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + arr[:] = np.arange(4, dtype="f8") + + app = serve_store(store) + client = TestClient(app) + + # Metadata should be accessible. + response = client.get(f"/{_metadata_key(zarr_format)}") + assert response.status_code == 200 + + # First chunk should be accessible. + response = client.get(f"/{_chunk_key(zarr_format, '0')}") + assert response.status_code == 200 + assert len(response.content) > 0 From 71e303f5d944e230389232f27cf0721ed376e1e6 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 1 Mar 2026 15:26:40 -0500 Subject: [PATCH 4/7] add docs and changelog --- changes/3732.feature.md | 3 + docs/api/zarr/experimental.md | 6 +- docs/user-guide/examples/serve_v2_v3.md | 7 ++ docs/user-guide/experimental.md | 133 ++++++++++++++++++++++++ examples/serve_v2_v3/README.md | 3 +- 5 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 changes/3732.feature.md create mode 100644 docs/user-guide/examples/serve_v2_v3.md diff --git a/changes/3732.feature.md b/changes/3732.feature.md new file mode 100644 index 0000000000..8ec3ddac9c --- /dev/null +++ b/changes/3732.feature.md @@ -0,0 +1,3 @@ +Adds an experimental HTTP server that can expose `Store`, `Array`, or `Group` instances over HTTP. +See the [user guide](https://zarr.readthedocs.io/en/latest/user-guide/experimental.html#http-server) +and the [Serve v2 as v3 example](https://zarr.readthedocs.io/en/latest/user-guide/examples/serve_v2_v3.html). \ No newline at end of file diff --git a/docs/api/zarr/experimental.md b/docs/api/zarr/experimental.md index 60f1f987b5..159300952d 100644 --- a/docs/api/zarr/experimental.md +++ b/docs/api/zarr/experimental.md @@ -4,6 +4,10 @@ title: experimental Experimental functionality is not stable and may change or be removed at any point. -## Classes +## Cache Store ::: zarr.experimental.cache_store + +## HTTP Server + +::: zarr.experimental.serve diff --git a/docs/user-guide/examples/serve_v2_v3.md b/docs/user-guide/examples/serve_v2_v3.md new file mode 100644 index 0000000000..1e39bc59e6 --- /dev/null +++ b/docs/user-guide/examples/serve_v2_v3.md @@ -0,0 +1,7 @@ +--8<-- "examples/serve_v2_v3/README.md" + +## Source Code + +```python +--8<-- "examples/serve_v2_v3/serve_v2_v3.py" +``` diff --git a/docs/user-guide/experimental.md b/docs/user-guide/experimental.md index eaa53a4622..94e2ddc61b 100644 --- a/docs/user-guide/experimental.md +++ b/docs/user-guide/experimental.md @@ -273,3 +273,136 @@ print(f"Cache contains {info['cached_keys']} keys with {info['current_size']} by This example shows how the CacheStore can significantly reduce access times for repeated data reads, particularly important when working with remote data sources. The dual-store architecture allows for flexible cache persistence and management. + +## HTTP Server + +Zarr Python provides an experimental HTTP server that exposes a Zarr `Store`, `Array`, +or `Group` over HTTP as an [ASGI](https://asgi.readthedocs.io/) application. +This makes it possible to serve zarr data to any HTTP-capable client (including +another Zarr Python process backed by an `HTTPStore`). + +The server is built on [Starlette](https://www.starlette.io/) and can be run with +any ASGI server such as [Uvicorn](https://www.uvicorn.org/). + +Install the server dependencies with: + +```bash +pip install zarr[server] +``` + +### Serving a Store + +[`zarr.experimental.serve.serve_store`][] creates an ASGI app that exposes every key +in a store: + +```python +import zarr +from zarr.experimental.serve import serve_store + +store = zarr.storage.MemoryStore() +zarr.create_array(store, shape=(100, 100), chunks=(10, 10), dtype="float64") + +app = serve_store(store) + +# Run with Uvicorn: +# uvicorn my_module:app --host 0.0.0.0 --port 8000 +``` + +### Serving a Node + +[`zarr.experimental.serve.serve_node`][] creates an ASGI app that only serves keys +belonging to a specific `Array` or `Group`. Requests for keys outside the node +receive a 404, even if those keys exist in the underlying store: + +```python +import zarr +from zarr.experimental.serve import serve_node + +store = zarr.storage.MemoryStore() +root = zarr.open_group(store) +root.create_array("a", shape=(10,), dtype="int32") +root.create_array("b", shape=(20,), dtype="float64") + +# Only serve the array at "a" — requests for "b" will return 404. +arr = root["a"] +app = serve_node(arr) +``` + +### CORS Support + +Both `serve_store` and `serve_node` accept a [`CorsOptions`][zarr.experimental.serve.CorsOptions] +parameter to enable [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) +middleware for browser-based clients: + +```python +from zarr.experimental.serve import CorsOptions, serve_store + +app = serve_store( + store, + cors_options=CorsOptions( + allow_origins=["*"], + allow_methods=["GET"], + ), +) +``` + +### HTTP Range Requests + +The server supports the standard `Range` header for partial reads. The three +forms defined by [RFC 7233](https://httpwg.org/specs/rfc7233.html) are supported: + +| Header | Meaning | +| -------------------- | ------------------------------ | +| `bytes=0-99` | First 100 bytes | +| `bytes=100-` | Everything from byte 100 | +| `bytes=-50` | Last 50 bytes | + +A successful range request returns HTTP 206 (Partial Content). + +### Write Support + +By default only `GET` requests are accepted. To enable writes, pass +`methods={"GET", "PUT"}`: + +```python +app = serve_store(store, methods={"GET", "PUT"}) +``` + +A `PUT` request stores the request body at the given path and returns 204 (No Content). + +### Running the Server in a Background Thread + +Because `serve_store` and `serve_node` return a standard ASGI app, you can run the +server in a daemon thread and interact with it from the same process. This is +useful for notebooks, scripts, and interactive exploration: + +```python +import threading + +import numpy as np +import uvicorn + +import zarr +from zarr.experimental.serve import serve_node +from zarr.storage import MemoryStore + +# Create an array with some data. +store = MemoryStore() +arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="float64") +arr[:] = np.arange(100, dtype="float64") + +# Build the ASGI app and launch Uvicorn in a daemon thread. +app = serve_node(arr) +config = uvicorn.Config(app, host="127.0.0.1", port=8000) +server = uvicorn.Server(config) +thread = threading.Thread(target=server.run, daemon=True) +thread.start() + +# Now open the served array from another zarr client. +remote = zarr.open_array("http://127.0.0.1:8000", mode="r") +np.testing.assert_array_equal(remote[:], arr[:]) + +# Shut down when finished. +server.should_exit = True +thread.join() +``` diff --git a/examples/serve_v2_v3/README.md b/examples/serve_v2_v3/README.md index c4790f18ac..1bdf92be2d 100644 --- a/examples/serve_v2_v3/README.md +++ b/examples/serve_v2_v3/README.md @@ -10,7 +10,8 @@ The example shows how to: existing v2 store - Translate v2 metadata (`.zarray` + `.zattrs`) to v3 `zarr.json` using the same `_convert_array_metadata` helper that powers `zarr migrate v3` -- Map v3 default chunk keys (`c/0/0`) to their v2 equivalents (`0.0`) +- Pass chunk keys through unchanged (the converted metadata preserves + `V2ChunkKeyEncoding`, so keys like `0.0` work in both formats) - Serve the translated store over HTTP so that any v3-compatible client can read v2 data without knowing the original format From 64de16fa73261913161e24ae7a1686bd9ec40e82 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 1 Mar 2026 19:42:13 -0500 Subject: [PATCH 5/7] add proper server --- changes/3732.feature.md | 2 + docs/user-guide/experimental.md | 105 +++++++-------- examples/serve_v2_v3/README.md | 2 +- examples/serve_v2_v3/serve_v2_v3.py | 8 +- mkdocs.yml | 1 + src/zarr/experimental/serve.py | 186 +++++++++++++++++++++++++- tests/test_experimental/test_serve.py | 150 ++++++++++++++------- 7 files changed, 346 insertions(+), 108 deletions(-) diff --git a/changes/3732.feature.md b/changes/3732.feature.md index 8ec3ddac9c..5706e99552 100644 --- a/changes/3732.feature.md +++ b/changes/3732.feature.md @@ -1,3 +1,5 @@ Adds an experimental HTTP server that can expose `Store`, `Array`, or `Group` instances over HTTP. +`store_app` and `node_app` build ASGI applications; `serve_store` and `serve_node` additionally +start a Uvicorn server (blocking by default, or in a background thread with `background=True`). See the [user guide](https://zarr.readthedocs.io/en/latest/user-guide/experimental.html#http-server) and the [Serve v2 as v3 example](https://zarr.readthedocs.io/en/latest/user-guide/examples/serve_v2_v3.html). \ No newline at end of file diff --git a/docs/user-guide/experimental.md b/docs/user-guide/experimental.md index 94e2ddc61b..8644506d7f 100644 --- a/docs/user-guide/experimental.md +++ b/docs/user-guide/experimental.md @@ -290,33 +290,31 @@ Install the server dependencies with: pip install zarr[server] ``` -### Serving a Store +### Building an ASGI App -[`zarr.experimental.serve.serve_store`][] creates an ASGI app that exposes every key +[`zarr.experimental.serve.store_app`][] creates an ASGI app that exposes every key in a store: ```python import zarr -from zarr.experimental.serve import serve_store +from zarr.experimental.serve import store_app store = zarr.storage.MemoryStore() zarr.create_array(store, shape=(100, 100), chunks=(10, 10), dtype="float64") -app = serve_store(store) +app = store_app(store) -# Run with Uvicorn: +# Run with any ASGI server, e.g. Uvicorn: # uvicorn my_module:app --host 0.0.0.0 --port 8000 ``` -### Serving a Node - -[`zarr.experimental.serve.serve_node`][] creates an ASGI app that only serves keys +[`zarr.experimental.serve.node_app`][] creates an ASGI app that only serves keys belonging to a specific `Array` or `Group`. Requests for keys outside the node receive a 404, even if those keys exist in the underlying store: ```python import zarr -from zarr.experimental.serve import serve_node +from zarr.experimental.serve import node_app store = zarr.storage.MemoryStore() root = zarr.open_group(store) @@ -325,19 +323,57 @@ root.create_array("b", shape=(20,), dtype="float64") # Only serve the array at "a" — requests for "b" will return 404. arr = root["a"] -app = serve_node(arr) +app = node_app(arr) +``` + +### Running the Server + +[`zarr.experimental.serve.serve_store`][] and [`zarr.experimental.serve.serve_node`][] +build an ASGI app *and* start a [Uvicorn](https://www.uvicorn.org/) server. +By default they block until the server is shut down: + +```python +from zarr.experimental.serve import serve_store + +serve_store(store, host="127.0.0.1", port=8000) +``` + +Pass `background=True` to start the server in a daemon thread and return +immediately. The returned `uvicorn.Server` object can be used to shut down +the server: + +```python +import numpy as np + +import zarr +from zarr.experimental.serve import serve_node +from zarr.storage import MemoryStore + +store = MemoryStore() +arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="float64") +arr[:] = np.arange(100, dtype="float64") + +server = serve_node(arr, host="127.0.0.1", port=8000, background=True) + +# Now open the served array from another zarr client. +remote = zarr.open_array("http://127.0.0.1:8000", mode="r") +np.testing.assert_array_equal(remote[:], arr[:]) + +# Shut down when finished. +server.should_exit = True ``` ### CORS Support -Both `serve_store` and `serve_node` accept a [`CorsOptions`][zarr.experimental.serve.CorsOptions] -parameter to enable [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) -middleware for browser-based clients: +Both `store_app` and `node_app` (and their `serve_*` counterparts) accept a +[`CorsOptions`][zarr.experimental.serve.CorsOptions] parameter to enable +[CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) middleware for +browser-based clients: ```python -from zarr.experimental.serve import CorsOptions, serve_store +from zarr.experimental.serve import CorsOptions, store_app -app = serve_store( +app = store_app( store, cors_options=CorsOptions( allow_origins=["*"], @@ -365,44 +401,7 @@ By default only `GET` requests are accepted. To enable writes, pass `methods={"GET", "PUT"}`: ```python -app = serve_store(store, methods={"GET", "PUT"}) +app = store_app(store, methods={"GET", "PUT"}) ``` A `PUT` request stores the request body at the given path and returns 204 (No Content). - -### Running the Server in a Background Thread - -Because `serve_store` and `serve_node` return a standard ASGI app, you can run the -server in a daemon thread and interact with it from the same process. This is -useful for notebooks, scripts, and interactive exploration: - -```python -import threading - -import numpy as np -import uvicorn - -import zarr -from zarr.experimental.serve import serve_node -from zarr.storage import MemoryStore - -# Create an array with some data. -store = MemoryStore() -arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="float64") -arr[:] = np.arange(100, dtype="float64") - -# Build the ASGI app and launch Uvicorn in a daemon thread. -app = serve_node(arr) -config = uvicorn.Config(app, host="127.0.0.1", port=8000) -server = uvicorn.Server(config) -thread = threading.Thread(target=server.run, daemon=True) -thread.start() - -# Now open the served array from another zarr client. -remote = zarr.open_array("http://127.0.0.1:8000", mode="r") -np.testing.assert_array_equal(remote[:], arr[:]) - -# Shut down when finished. -server.should_exit = True -thread.join() -``` diff --git a/examples/serve_v2_v3/README.md b/examples/serve_v2_v3/README.md index 1bdf92be2d..8bec69d4d1 100644 --- a/examples/serve_v2_v3/README.md +++ b/examples/serve_v2_v3/README.md @@ -2,7 +2,7 @@ This example demonstrates how to build a custom read-only `Store` that translates Zarr v2 data into v3 format on the fly, and serve it over HTTP -using `zarr.experimental.serve.serve_store`. +using `zarr.experimental.serve.store_app`. The example shows how to: diff --git a/examples/serve_v2_v3/serve_v2_v3.py b/examples/serve_v2_v3/serve_v2_v3.py index 558ca2acaf..c600fb6c3c 100644 --- a/examples/serve_v2_v3/serve_v2_v3.py +++ b/examples/serve_v2_v3/serve_v2_v3.py @@ -26,7 +26,7 @@ * The v2 metadata files (``.zarray``, ``.zattrs``) are hidden so only v3 keys are visible. -The translated store is then served over HTTP with ``serve_store``. A test +The translated store is then served over HTTP with ``store_app``. A test at the bottom opens the served data *as a v3 array* and verifies it can read the values back. """ @@ -272,12 +272,12 @@ def test_serve_roundtrip() -> None: """Serve the translated store over HTTP and read it back as v3.""" from starlette.testclient import TestClient - from zarr.experimental.serve import serve_store + from zarr.experimental.serve import store_app v2_store, _data = create_v2_array() v3_store = V2AsV3Store(v2_store) - app = serve_store(v3_store) + app = store_app(v3_store) client = TestClient(app) # Metadata should be valid v3 JSON. @@ -328,7 +328,7 @@ def test_open_as_v3_array() -> None: print("\n4. Store listing") test_listing() - print("\n5. HTTP round-trip via serve_store") + print("\n5. HTTP round-trip via store_app") test_serve_roundtrip() print("\n6. Open as v3 array") diff --git a/mkdocs.yml b/mkdocs.yml index 61872b6234..bfdc19586b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,6 +29,7 @@ nav: - user-guide/experimental.md - Examples: - user-guide/examples/custom_dtype.md + - user-guide/examples/serve_v2_v3.md - API Reference: - api/zarr/index.md - api/zarr/array.md diff --git a/src/zarr/experimental/serve.py b/src/zarr/experimental/serve.py index 26978aa02f..d9fd04b2ca 100644 --- a/src/zarr/experimental/serve.py +++ b/src/zarr/experimental/serve.py @@ -1,12 +1,15 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Literal, TypedDict +import threading +import time +from typing import TYPE_CHECKING, Any, Literal, TypedDict, overload from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest from zarr.core.buffer import cpu from zarr.core.keys import is_valid_node_key if TYPE_CHECKING: + import uvicorn from starlette.applications import Starlette from starlette.requests import Request from starlette.responses import Response @@ -15,7 +18,7 @@ from zarr.core.array import Array from zarr.core.group import Group -__all__ = ["CorsOptions", "HTTPMethod", "serve_node", "serve_store"] +__all__ = ["CorsOptions", "HTTPMethod", "node_app", "serve_node", "serve_store", "store_app"] class CorsOptions(TypedDict): @@ -142,7 +145,42 @@ def _make_starlette_app( return app -def serve_store( +def _start_server( + app: Starlette, + *, + host: str, + port: int, + background: bool, +) -> uvicorn.Server | None: + """Create a uvicorn server for *app* and either block or run in a daemon thread.""" + try: + import uvicorn + except ImportError as e: + raise ImportError( + "The zarr server requires the 'uvicorn' package. " + "Install it with: pip install zarr[server]" + ) from e + + config = uvicorn.Config(app, host=host, port=port) + server = uvicorn.Server(config) + + if not background: + server.run() + return None + + thread = threading.Thread(target=server.run, daemon=True) + thread.start() + + deadline = time.monotonic() + 5.0 + while not server.started: + if time.monotonic() > deadline: + raise RuntimeError("Server failed to start within 5 seconds") + time.sleep(0.01) + + return server + + +def store_app( store: Store, *, methods: set[HTTPMethod] | None = None, @@ -171,7 +209,7 @@ def serve_store( return app -def serve_node( +def node_app( node: Array[Any] | Group, *, methods: set[HTTPMethod] | None = None, @@ -209,3 +247,143 @@ def serve_node( app.state.node = node app.state.prefix = node.store_path.path return app + + +@overload +def serve_store( + store: Store, + *, + host: str = ..., + port: int = ..., + methods: set[HTTPMethod] | None = ..., + cors_options: CorsOptions | None = ..., + background: Literal[False] = ..., +) -> None: ... + + +@overload +def serve_store( + store: Store, + *, + host: str = ..., + port: int = ..., + methods: set[HTTPMethod] | None = ..., + cors_options: CorsOptions | None = ..., + background: Literal[True], +) -> uvicorn.Server: ... + + +def serve_store( + store: Store, + *, + host: str = "127.0.0.1", + port: int = 8000, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, + background: bool = False, +) -> uvicorn.Server | None: + """Serve every key in a zarr ``Store`` over HTTP. + + Builds a Starlette ASGI app (see :func:`store_app`) and starts a + `Uvicorn `_ server. + + Parameters + ---------- + store : Store + The zarr store to serve. + host : str, optional + The host to bind to. Defaults to ``"127.0.0.1"``. + port : int, optional + The port to bind to. Defaults to ``8000``. + methods : set of HTTPMethod, optional + The HTTP methods to accept. Defaults to ``{"GET"}``. + cors_options : CorsOptions, optional + If provided, CORS middleware will be added with the given options. + background : bool, optional + If ``False`` (the default), the server blocks until shut down. + If ``True``, the server runs in a daemon thread and this function + returns immediately. + + Returns + ------- + uvicorn.Server or None + The running server when ``background=True``, or ``None`` when + the server has been shut down after blocking. + """ + app = store_app(store, methods=methods, cors_options=cors_options) + return _start_server(app, host=host, port=port, background=background) + + +@overload +def serve_node( + node: Array[Any] | Group, + *, + host: str = ..., + port: int = ..., + methods: set[HTTPMethod] | None = ..., + cors_options: CorsOptions | None = ..., + background: Literal[False] = ..., +) -> None: ... + + +@overload +def serve_node( + node: Array[Any] | Group, + *, + host: str = ..., + port: int = ..., + methods: set[HTTPMethod] | None = ..., + cors_options: CorsOptions | None = ..., + background: Literal[True], +) -> uvicorn.Server: ... + + +def serve_node( + node: Array[Any] | Group, + *, + host: str = "127.0.0.1", + port: int = 8000, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, + background: bool = False, +) -> uvicorn.Server | None: + """Serve only the keys belonging to a zarr ``Array`` or ``Group`` over HTTP. + + Builds a Starlette ASGI app (see :func:`node_app`) and starts a + `Uvicorn `_ server. + + For an ``Array``, the served keys are the metadata document(s) and all + chunk (or shard) keys whose coordinates fall within the array's grid. + + For a ``Group``, the served keys are the group's own metadata plus any + path that resolves through the group's members to a valid array metadata + document or chunk key. + + Requests for keys outside this set receive a 404 response, even if the + underlying store contains data at that path. + + Parameters + ---------- + node : Array or Group + The zarr array or group to serve. + host : str, optional + The host to bind to. Defaults to ``"127.0.0.1"``. + port : int, optional + The port to bind to. Defaults to ``8000``. + methods : set of HTTPMethod, optional + The HTTP methods to accept. Defaults to ``{"GET"}``. + cors_options : CorsOptions, optional + If provided, CORS middleware will be added with the given options. + background : bool, optional + If ``False`` (the default), the server blocks until shut down. + If ``True``, the server runs in a daemon thread and this function + returns immediately. + + Returns + ------- + uvicorn.Server or None + The running server when ``background=True``, or ``None`` when + the server has been shut down after blocking. + """ + app = node_app(node, methods=methods, cors_options=cors_options) + return _start_server(app, host=host, port=port, background=background) diff --git a/tests/test_experimental/test_serve.py b/tests/test_experimental/test_serve.py index 5ec28b9a81..58e7da3aef 100644 --- a/tests/test_experimental/test_serve.py +++ b/tests/test_experimental/test_serve.py @@ -18,7 +18,7 @@ from starlette.testclient import TestClient -from zarr.experimental.serve import CorsOptions, _parse_range_header, serve_node, serve_store +from zarr.experimental.serve import CorsOptions, _parse_range_header, node_app, store_app @pytest.fixture @@ -37,8 +37,8 @@ def group_with_arrays(store: Store) -> zarr.Group: @pytest.mark.parametrize("store", ["memory"], indirect=True) -class TestServeNodeDoesNotExposeNonZarrKeys: - """serve_node must never expose keys that are not part of the zarr hierarchy.""" +class TestNodeAppDoesNotExposeNonZarrKeys: + """node_app must never expose keys that are not part of the zarr hierarchy.""" def test_non_zarr_key_returns_404(self, store: Store, group_with_arrays: zarr.Group) -> None: """A key that is not valid zarr metadata or a valid chunk key should return 404, @@ -46,7 +46,7 @@ def test_non_zarr_key_returns_404(self, store: Store, group_with_arrays: zarr.Gr non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"secret data") sync(store.set("secret.txt", non_zarr_buf)) - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) # The non-zarr key must not be accessible. @@ -61,7 +61,7 @@ def test_non_zarr_key_nested_returns_404( non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"not a chunk") sync(store.set("regular/notes.txt", non_zarr_buf)) - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) response = client.get("/regular/notes.txt") @@ -70,7 +70,7 @@ def test_non_zarr_key_nested_returns_404( def test_valid_metadata_is_accessible(self, group_with_arrays: zarr.Group) -> None: """Zarr metadata keys (zarr.json) for both the root group and child arrays should be served with a 200 status.""" - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) # Root group metadata @@ -88,7 +88,7 @@ def test_valid_chunk_is_accessible(self, group_with_arrays: zarr.Group) -> None: assert isinstance(arr, zarr.Array) arr[:] = np.ones((4, 4)) - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) # c/0/0 is a valid chunk key for a (4,4) array with (2,2) chunks. @@ -102,7 +102,7 @@ def test_out_of_bounds_chunk_key_returns_404(self, group_with_arrays: zarr.Group assert isinstance(arr, zarr.Array) arr[:] = np.ones((4, 4)) - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) # (4,4) array with (2,2) chunks has grid shape (2,2), so c/99/99 is @@ -113,7 +113,7 @@ def test_out_of_bounds_chunk_key_returns_404(self, group_with_arrays: zarr.Group def test_empty_path_returns_404(self, group_with_arrays: zarr.Group) -> None: """A request to the root path '/' should return 404 because an empty string is not a valid zarr key.""" - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) response = client.get("/") @@ -122,7 +122,7 @@ def test_empty_path_returns_404(self, group_with_arrays: zarr.Group) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) class TestShardedArrayByteRangeReads: - """Byte-range reads against a sharded array served via serve_node.""" + """Byte-range reads against a sharded array served via node_app.""" def test_range_read_returns_206(self, group_with_arrays: zarr.Group) -> None: """A Range header requesting a specific byte range (e.g. bytes=0-7) should @@ -131,7 +131,7 @@ def test_range_read_returns_206(self, group_with_arrays: zarr.Group) -> None: assert isinstance(arr, zarr.Array) arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) # c/0/0 is the first shard key for an (8,8) array with (4,4) shards. @@ -151,7 +151,7 @@ def test_suffix_range_read(self, group_with_arrays: zarr.Group) -> None: assert isinstance(arr, zarr.Array) arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) full_response = client.get("/sharded/c/0/0") @@ -169,7 +169,7 @@ def test_offset_range_read(self, group_with_arrays: zarr.Group) -> None: assert isinstance(arr, zarr.Array) arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) full_response = client.get("/sharded/c/0/0") @@ -201,7 +201,7 @@ def test_malformed_range_returns_416(self, store: Store, header: str) -> None: buf = cpu.buffer_prototype.buffer.from_bytes(b"some data here") sync(store.set("key", buf)) - app = serve_store(store) + app = store_app(store) client = TestClient(app, raise_server_exceptions=False) response = client.get("/key", headers={"Range": header}) @@ -213,7 +213,7 @@ def test_non_bytes_unit_returns_416(self, store: Store) -> None: buf = cpu.buffer_prototype.buffer.from_bytes(b"some data") sync(store.set("key", buf)) - app = serve_store(store) + app = store_app(store) client = TestClient(app) response = client.get("/key", headers={"Range": "chars=0-7"}) @@ -269,12 +269,12 @@ def test_empty_spec(self) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) class TestWriteViaPut: - """serve_store and serve_node can be configured to accept PUT writes.""" + """store_app and node_app can be configured to accept PUT writes.""" def test_put_writes_to_store(self, store: Store) -> None: - """A PUT request to serve_store with PUT enabled should write the + """A PUT request to store_app with PUT enabled should write the request body into the store at the given key.""" - app = serve_store(store, methods={"GET", "PUT"}) + app = store_app(store, methods={"GET", "PUT"}) client = TestClient(app) payload = b"hello zarr" @@ -289,7 +289,7 @@ def test_put_writes_to_store(self, store: Store) -> None: def test_put_then_get_roundtrip(self, store: Store) -> None: """Data written via PUT should be retrievable via a subsequent GET at the same key.""" - app = serve_store(store, methods={"GET", "PUT"}) + app = store_app(store, methods={"GET", "PUT"}) client = TestClient(app) payload = b"\x00\x01\x02\x03" @@ -302,25 +302,25 @@ def test_put_then_get_roundtrip(self, store: Store) -> None: def test_put_rejected_when_not_configured(self, store: Store) -> None: """PUT requests should return 405 Method Not Allowed when the server is created with the default methods (GET only).""" - app = serve_store(store) + app = store_app(store) client = TestClient(app) response = client.put("/some/key", content=b"data") assert response.status_code == 405 def test_put_on_node_validates_key(self, store: Store, group_with_arrays: zarr.Group) -> None: - """PUT requests via serve_node should be rejected with 404 when the + """PUT requests via node_app should be rejected with 404 when the target key is not a valid zarr key (metadata or chunk).""" - app = serve_node(group_with_arrays, methods={"GET", "PUT"}) + app = node_app(group_with_arrays, methods={"GET", "PUT"}) client = TestClient(app) response = client.put("/not_a_zarr_key.bin", content=b"data") assert response.status_code == 404 def test_put_to_valid_chunk_key_succeeds(self, group_with_arrays: zarr.Group) -> None: - """PUT requests via serve_node to a valid chunk key should succeed + """PUT requests via node_app to a valid chunk key should succeed with 204, and the written data should be retrievable via GET.""" - app = serve_node(group_with_arrays, methods={"GET", "PUT"}) + app = node_app(group_with_arrays, methods={"GET", "PUT"}) client = TestClient(app) payload = b"\x00" * 32 @@ -334,12 +334,12 @@ def test_put_to_valid_chunk_key_succeeds(self, group_with_arrays: zarr.Group) -> @pytest.mark.parametrize("store", ["memory"], indirect=True) -class TestServeStoreEdgeCases: - """Edge cases for serve_store.""" +class TestStoreAppEdgeCases: + """Edge cases for store_app.""" def test_get_nonexistent_key_returns_404(self, store: Store) -> None: """GET for a key that does not exist in the store should return 404.""" - app = serve_store(store) + app = store_app(store) client = TestClient(app) response = client.get("/no/such/key") @@ -348,7 +348,7 @@ def test_get_nonexistent_key_returns_404(self, store: Store) -> None: def test_empty_path_returns_404(self, store: Store) -> None: """GET to the root path '/' (empty key) should return 404 because an empty string is not a valid store key.""" - app = serve_store(store) + app = store_app(store) client = TestClient(app) response = client.get("/") @@ -356,11 +356,11 @@ def test_empty_path_returns_404(self, store: Store) -> None: @pytest.mark.parametrize("store", ["memory"], indirect=True) -class TestServeNodeDirectArray: +class TestNodeAppDirectArray: """Serve a single array directly (not through a group).""" def test_serve_nested_array_directly(self, store: Store) -> None: - """When serve_node is given a nested array (not a group), requests + """When node_app is given a nested array (not a group), requests should use keys relative to that array's path. Metadata and in-bounds chunks should return 200, and out-of-bounds chunks should return 404.""" root = zarr.open_group(store, mode="w") @@ -373,7 +373,7 @@ def test_serve_nested_array_directly(self, store: Store) -> None: arr[:] = np.arange(4, dtype="f8") # Serve the array directly — its prefix is "sub/nested". - app = serve_node(arr) + app = node_app(arr) client = TestClient(app) # Metadata should be accessible at the array root. @@ -392,7 +392,7 @@ def test_serve_nested_array_directly(self, store: Store) -> None: assert response.status_code == 404 def test_serve_root_array(self, store: Store) -> None: - """When serve_node is given an array stored at the root of a store + """When node_app is given an array stored at the root of a store (empty prefix), metadata and chunk keys should be accessible at their natural paths.""" arr = zarr.create_array( @@ -404,7 +404,7 @@ def test_serve_root_array(self, store: Store) -> None: arr[:] = np.arange(6, dtype="i4") # Root-level array has prefix = "". - app = serve_node(arr) + app = node_app(arr) client = TestClient(app) response = client.get("/zarr.json") @@ -427,7 +427,7 @@ class TestContentType: def test_metadata_has_json_content_type(self, group_with_arrays: zarr.Group) -> None: """Zarr metadata files (zarr.json) should be served with Content-Type: application/json.""" - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) response = client.get("/zarr.json") @@ -441,7 +441,7 @@ def test_chunk_has_octet_stream_content_type(self, group_with_arrays: zarr.Group assert isinstance(arr, zarr.Array) arr[:] = np.ones((4, 4)) - app = serve_node(group_with_arrays) + app = node_app(group_with_arrays) client = TestClient(app) response = client.get("/regular/c/0/0") @@ -460,7 +460,7 @@ def test_cors_headers_present(self, store: Store) -> None: sync(store.set("key", buf)) cors = CorsOptions(allow_origins=["https://example.com"], allow_methods=["GET"]) - app = serve_store(store, cors_options=cors) + app = store_app(store, cors_options=cors) client = TestClient(app) response = client.get("/key", headers={"Origin": "https://example.com"}) @@ -471,7 +471,7 @@ def test_cors_preflight(self, store: Store) -> None: """CORS preflight OPTIONS requests should return 200 with the Access-Control-Allow-Origin header when CORS is configured.""" cors = CorsOptions(allow_origins=["*"], allow_methods=["GET", "PUT"]) - app = serve_store(store, methods={"GET", "PUT"}, cors_options=cors) + app = store_app(store, methods={"GET", "PUT"}, cors_options=cors) client = TestClient(app) response = client.options( @@ -490,7 +490,7 @@ def test_no_cors_headers_without_option(self, store: Store) -> None: buf = cpu.buffer_prototype.buffer.from_bytes(b"data") sync(store.set("key", buf)) - app = serve_store(store) + app = store_app(store) client = TestClient(app) response = client.get("/key", headers={"Origin": "https://example.com"}) @@ -515,13 +515,13 @@ def _chunk_key(zarr_format: ZarrFormat, coords: str) -> str: @pytest.mark.parametrize("store", ["memory"], indirect=True) -class TestServeNodeV2AndV3: - """Test serve_node with both v2 and v3 arrays side by side.""" +class TestNodeAppV2AndV3: + """Test node_app with both v2 and v3 arrays side by side.""" def test_metadata_accessible(self, store: Store, zarr_format: ZarrFormat) -> None: """The format-appropriate metadata key should be served with 200.""" arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) - app = serve_node(arr) + app = node_app(arr) client = TestClient(app) response = client.get(f"/{_metadata_key(zarr_format)}") @@ -532,7 +532,7 @@ def test_chunk_accessible(self, store: Store, zarr_format: ZarrFormat) -> None: arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) arr[:] = np.ones(4) - app = serve_node(arr) + app = node_app(arr) client = TestClient(app) response = client.get(f"/{_chunk_key(zarr_format, '0')}") @@ -543,7 +543,7 @@ def test_out_of_bounds_chunk_returns_404(self, store: Store, zarr_format: ZarrFo arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) arr[:] = np.ones(4) - app = serve_node(arr) + app = node_app(arr) client = TestClient(app) response = client.get(f"/{_chunk_key(zarr_format, '99')}") @@ -555,19 +555,19 @@ def test_non_zarr_key_returns_404(self, store: Store, zarr_format: ZarrFormat) - non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"secret") sync(store.set("secret.txt", non_zarr_buf)) - app = serve_node(arr) + app = node_app(arr) client = TestClient(app) response = client.get("/secret.txt") assert response.status_code == 404 def test_data_roundtrip(self, store: Store, zarr_format: ZarrFormat) -> None: - """Data written to an array should be readable via serve_store for + """Data written to an array should be readable via store_app for both formats.""" arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) arr[:] = np.arange(4, dtype="f8") - app = serve_store(store) + app = store_app(store) client = TestClient(app) # Metadata should be accessible. @@ -578,3 +578,61 @@ def test_data_roundtrip(self, store: Store, zarr_format: ZarrFormat) -> None: response = client.get(f"/{_chunk_key(zarr_format, '0')}") assert response.status_code == 200 assert len(response.content) > 0 + + +def _get_free_port() -> int: + """Return an unused TCP port on localhost.""" + import socket + + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + port: int = s.getsockname()[1] + return port + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestServeBackground: + """Test serve_store and serve_node with background=True.""" + + def test_serve_store_background(self, store: Store) -> None: + """serve_store(background=True) should start a server in a daemon + thread and return a uvicorn.Server that responds to HTTP requests.""" + import httpx + + from zarr.experimental.serve import serve_store + + buf = cpu.buffer_prototype.buffer.from_bytes(b"hello") + sync(store.set("key", buf)) + + port = _get_free_port() + server = serve_store(store, host="127.0.0.1", port=port, background=True) + try: + assert server is not None + assert server.started + + response = httpx.get(f"http://127.0.0.1:{port}/key") + assert response.status_code == 200 + assert response.content == b"hello" + finally: + server.should_exit = True + + def test_serve_node_background(self, store: Store) -> None: + """serve_node(background=True) should start a server in a daemon + thread and return a uvicorn.Server that responds to HTTP requests.""" + import httpx + + from zarr.experimental.serve import serve_node + + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8") + arr[:] = np.arange(4, dtype="f8") + + port = _get_free_port() + server = serve_node(arr, host="127.0.0.1", port=port, background=True) + try: + assert server is not None + assert server.started + + response = httpx.get(f"http://127.0.0.1:{port}/zarr.json") + assert response.status_code == 200 + finally: + server.should_exit = True From e40b20b364f2127b07a9acc8ec9b0cb29c765234 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 1 Mar 2026 20:33:27 -0500 Subject: [PATCH 6/7] rework examples (simplify) and make server a context manager --- changes/3732.feature.md | 2 +- docs/user-guide/examples/serve.md | 7 + docs/user-guide/examples/serve_v2_v3.md | 7 - docs/user-guide/experimental.md | 17 +- examples/serve/README.md | 17 ++ examples/serve/serve.py | 43 +++ examples/serve_v2_v3/README.md | 28 -- examples/serve_v2_v3/serve_v2_v3.py | 337 ------------------------ mkdocs.yml | 2 +- src/zarr/experimental/serve.py | 75 +++++- tests/test_experimental/test_serve.py | 30 +-- 11 files changed, 150 insertions(+), 415 deletions(-) create mode 100644 docs/user-guide/examples/serve.md delete mode 100644 docs/user-guide/examples/serve_v2_v3.md create mode 100644 examples/serve/README.md create mode 100644 examples/serve/serve.py delete mode 100644 examples/serve_v2_v3/README.md delete mode 100644 examples/serve_v2_v3/serve_v2_v3.py diff --git a/changes/3732.feature.md b/changes/3732.feature.md index 5706e99552..a83e5c0264 100644 --- a/changes/3732.feature.md +++ b/changes/3732.feature.md @@ -2,4 +2,4 @@ Adds an experimental HTTP server that can expose `Store`, `Array`, or `Group` in `store_app` and `node_app` build ASGI applications; `serve_store` and `serve_node` additionally start a Uvicorn server (blocking by default, or in a background thread with `background=True`). See the [user guide](https://zarr.readthedocs.io/en/latest/user-guide/experimental.html#http-server) -and the [Serve v2 as v3 example](https://zarr.readthedocs.io/en/latest/user-guide/examples/serve_v2_v3.html). \ No newline at end of file +and the [example](https://zarr.readthedocs.io/en/latest/user-guide/examples/serve.html). \ No newline at end of file diff --git a/docs/user-guide/examples/serve.md b/docs/user-guide/examples/serve.md new file mode 100644 index 0000000000..d02a67e4a9 --- /dev/null +++ b/docs/user-guide/examples/serve.md @@ -0,0 +1,7 @@ +--8<-- "examples/serve/README.md" + +## Source Code + +```python +--8<-- "examples/serve/serve.py" +``` diff --git a/docs/user-guide/examples/serve_v2_v3.md b/docs/user-guide/examples/serve_v2_v3.md deleted file mode 100644 index 1e39bc59e6..0000000000 --- a/docs/user-guide/examples/serve_v2_v3.md +++ /dev/null @@ -1,7 +0,0 @@ ---8<-- "examples/serve_v2_v3/README.md" - -## Source Code - -```python ---8<-- "examples/serve_v2_v3/serve_v2_v3.py" -``` diff --git a/docs/user-guide/experimental.md b/docs/user-guide/experimental.md index 8644506d7f..eded9661fc 100644 --- a/docs/user-guide/experimental.md +++ b/docs/user-guide/experimental.md @@ -339,8 +339,8 @@ serve_store(store, host="127.0.0.1", port=8000) ``` Pass `background=True` to start the server in a daemon thread and return -immediately. The returned `uvicorn.Server` object can be used to shut down -the server: +immediately. The returned [`BackgroundServer`][zarr.experimental.serve.BackgroundServer] +can be used as a context manager for automatic shutdown: ```python import numpy as np @@ -353,14 +353,11 @@ store = MemoryStore() arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="float64") arr[:] = np.arange(100, dtype="float64") -server = serve_node(arr, host="127.0.0.1", port=8000, background=True) - -# Now open the served array from another zarr client. -remote = zarr.open_array("http://127.0.0.1:8000", mode="r") -np.testing.assert_array_equal(remote[:], arr[:]) - -# Shut down when finished. -server.should_exit = True +with serve_node(arr, host="127.0.0.1", port=8000, background=True) as server: + # Now open the served array from another zarr client. + remote = zarr.open_array(server.url, mode="r") + np.testing.assert_array_equal(remote[:], arr[:]) +# Server is shut down automatically when the block exits. ``` ### CORS Support diff --git a/examples/serve/README.md b/examples/serve/README.md new file mode 100644 index 0000000000..7b37968248 --- /dev/null +++ b/examples/serve/README.md @@ -0,0 +1,17 @@ +# Serve a Zarr Array over HTTP + +This example creates an in-memory Zarr array, serves it over HTTP with +`zarr.experimental.serve.serve_node`, and fetches the `zarr.json` metadata +document and a raw chunk using `httpx`. + +## Running the Example + +```bash +python examples/serve/serve.py +``` + +Or run with uv: + +```bash +uv run examples/serve/serve.py +``` diff --git a/examples/serve/serve.py b/examples/serve/serve.py new file mode 100644 index 0000000000..19550cc0ec --- /dev/null +++ b/examples/serve/serve.py @@ -0,0 +1,43 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "zarr[server] @ git+https://github.com/zarr-developers/zarr-python.git@main", +# "httpx", +# ] +# /// +""" +Serve a Zarr array over HTTP and fetch its metadata and chunks. + +This example creates an in-memory array, serves it in a background thread, +then uses ``httpx`` to request the ``zarr.json`` metadata document and a raw +chunk. +""" + +import json + +import httpx +import numpy as np + +import zarr +from zarr.experimental.serve import serve_node +from zarr.storage import MemoryStore + +# -- create an array -------------------------------------------------------- +store = MemoryStore() +data = np.arange(1000, dtype="uint8").reshape(10, 10, 10) +# no compression +arr = zarr.create_array(store, data=data, chunks=(5, 5, 5), write_data=True, compressors=None) + +# -- serve it in the background --------------------------------------------- +with serve_node(arr, host="127.0.0.1", port=8000, background=True) as server: + # -- fetch metadata ------------------------------------------------------ + resp = httpx.get(f"{server.url}/zarr.json") + assert resp.status_code == 200 + meta = resp.json() + print("zarr.json:") + print(json.dumps(meta, indent=2)) + + # -- fetch a raw chunk --------------------------------------------------- + resp = httpx.get(f"{server.url}/c/0/0/0") + assert resp.status_code == 200 + print(f"\nchunk c/0/0/0: {len(resp.content)} bytes") diff --git a/examples/serve_v2_v3/README.md b/examples/serve_v2_v3/README.md deleted file mode 100644 index 8bec69d4d1..0000000000 --- a/examples/serve_v2_v3/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# Serve a Zarr v2 Array as v3 over HTTP - -This example demonstrates how to build a custom read-only `Store` that -translates Zarr v2 data into v3 format on the fly, and serve it over HTTP -using `zarr.experimental.serve.store_app`. - -The example shows how to: - -- Implement a custom `Store` subclass (`V2AsV3Store`) that wraps an - existing v2 store -- Translate v2 metadata (`.zarray` + `.zattrs`) to v3 `zarr.json` using - the same `_convert_array_metadata` helper that powers `zarr migrate v3` -- Pass chunk keys through unchanged (the converted metadata preserves - `V2ChunkKeyEncoding`, so keys like `0.0` work in both formats) -- Serve the translated store over HTTP so that any v3-compatible client - can read v2 data without knowing the original format - -## Running the Example - -```bash -python examples/serve_v2_v3/serve_v2_v3.py -``` - -Or run with uv: - -```bash -uv run examples/serve_v2_v3/serve_v2_v3.py -``` diff --git a/examples/serve_v2_v3/serve_v2_v3.py b/examples/serve_v2_v3/serve_v2_v3.py deleted file mode 100644 index c600fb6c3c..0000000000 --- a/examples/serve_v2_v3/serve_v2_v3.py +++ /dev/null @@ -1,337 +0,0 @@ -# /// script -# requires-python = ">=3.11" -# dependencies = [ -# "zarr[server]", -# "httpx", -# ] -# /// -# -""" -Serve a Zarr v2 array over HTTP as a Zarr v3 array. - -This example demonstrates how to build a read-only ``Store`` that translates -between Zarr formats on the fly. A v2 array lives in a ``MemoryStore``; the -custom ``V2AsV3Store`` intercepts reads and translates metadata: - -* Requests for ``zarr.json`` are answered by reading the v2 ``.zarray`` / - ``.zattrs`` metadata and converting it to a v3 ``zarr.json`` document - using the same ``_convert_array_metadata`` helper that powers the - ``zarr migrate v3`` CLI command. - -* Chunk keys are passed through unchanged because ``_convert_array_metadata`` - preserves the v2 chunk key encoding (``V2ChunkKeyEncoding``). A v3 - client reads the encoding from ``zarr.json`` and naturally produces the - same keys (e.g. ``0.0``) that the v2 store already contains. - -* The v2 metadata files (``.zarray``, ``.zattrs``) are hidden so only - v3 keys are visible. - -The translated store is then served over HTTP with ``store_app``. A test -at the bottom opens the served data *as a v3 array* and verifies it can -read the values back. -""" - -from __future__ import annotations - -import json -from typing import TYPE_CHECKING - -import numpy as np - -import zarr -from zarr.abc.store import ByteRequest, Store -from zarr.core.buffer import Buffer, cpu -from zarr.core.buffer.core import default_buffer_prototype -from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON -from zarr.core.metadata.v2 import ArrayV2Metadata -from zarr.core.sync import sync -from zarr.metadata.migrate_v3 import _convert_array_metadata -from zarr.storage import MemoryStore - -if TYPE_CHECKING: - from collections.abc import AsyncIterator, Iterable - - from zarr.core.buffer import BufferPrototype - - -# --------------------------------------------------------------------------- -# Custom store that presents v2 data as v3 -# --------------------------------------------------------------------------- - -# v2 metadata keys that should be hidden from v3 clients. -_HIDDEN_V2_KEYS = frozenset({ZARRAY_JSON, ZATTRS_JSON}) - - -class V2AsV3Store(Store): - """A read-only store that wraps an existing v2 store and presents it as v3. - - Metadata translation - -------------------- - ``zarr.json`` ← ``.zarray`` + ``.zattrs`` (converted via - ``_convert_array_metadata`` from the CLI migration module) - - Chunk keys - ---------- - Chunk keys are **not** translated. The v3 metadata produced by - ``_convert_array_metadata`` uses ``V2ChunkKeyEncoding`` with the - same separator as the original v2 array, so chunk keys like ``0.0`` - are valid in both formats. - - Visibility - ---------- - The v2 metadata files (``.zarray``, ``.zattrs``) are hidden from - listing and ``get`` so that clients only see v3 keys. - """ - - supports_writes: bool = False - supports_deletes: bool = False - supports_listing: bool = True - - def __init__(self, v2_store: Store) -> None: - super().__init__(read_only=True) - self._v2 = v2_store - - def __eq__(self, other: object) -> bool: - return isinstance(other, V2AsV3Store) and self._v2 == other._v2 - - # -- metadata conversion ----------------------------------------------- - - async def _build_zarr_json(self, prototype: BufferPrototype) -> Buffer | None: - """Read v2 metadata from the wrapped store and return a v3 - ``zarr.json`` buffer.""" - zarray_buf = await self._v2.get(ZARRAY_JSON, prototype) - if zarray_buf is None: - return None - - zarray_dict = json.loads(zarray_buf.to_bytes()) - v2_meta = ArrayV2Metadata.from_dict(zarray_dict) - - # Merge in .zattrs if present. - zattrs_buf = await self._v2.get(ZATTRS_JSON, prototype) - if zattrs_buf is not None: - attrs = json.loads(zattrs_buf.to_bytes()) - if attrs: - v2_meta = v2_meta.update_attributes(attrs) - - # Reuse the same conversion the CLI uses. - v3_meta = _convert_array_metadata(v2_meta) - v3_json = json.dumps(v3_meta.to_dict()).encode() - return prototype.buffer.from_bytes(v3_json) - - # -- Store ABC implementation ------------------------------------------ - - async def get( - self, - key: str, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> Buffer | None: - if prototype is None: - prototype = default_buffer_prototype() - await self._ensure_open() - - # Synthesise zarr.json from v2 metadata. - if key == ZARR_JSON: - buf = await self._build_zarr_json(prototype) - if buf is None or byte_range is None: - return buf - from zarr.storage._utils import _normalize_byte_range_index - - start, stop = _normalize_byte_range_index(buf, byte_range) - return prototype.buffer.from_buffer(buf[start:stop]) - - # Hide v2 metadata files. - if key in _HIDDEN_V2_KEYS: - return None - - # All other keys (chunk keys) pass through unchanged. - return await self._v2.get(key, prototype, byte_range=byte_range) - - async def get_partial_values( - self, - prototype: BufferPrototype, - key_ranges: Iterable[tuple[str, ByteRequest | None]], - ) -> list[Buffer | None]: - return [await self.get(k, prototype, br) for k, br in key_ranges] - - async def exists(self, key: str) -> bool: - if key == ZARR_JSON: - return await self._v2.exists(ZARRAY_JSON) - if key in _HIDDEN_V2_KEYS: - return False - return await self._v2.exists(key) - - async def set(self, key: str, value: Buffer) -> None: - raise NotImplementedError("V2AsV3Store is read-only") - - async def delete(self, key: str) -> None: - raise NotImplementedError("V2AsV3Store is read-only") - - async def list(self) -> AsyncIterator[str]: - async for key in self._v2.list(): - if key == ZARRAY_JSON: - yield ZARR_JSON - elif key in _HIDDEN_V2_KEYS: - continue - else: - yield key - - async def list_prefix(self, prefix: str) -> AsyncIterator[str]: - async for key in self.list(): - if key.startswith(prefix): - yield key - - async def list_dir(self, prefix: str) -> AsyncIterator[str]: - async for key in self.list(): - if not key.startswith(prefix): - continue - remainder = key[len(prefix) :] - if "/" not in remainder: - yield key - - -# --------------------------------------------------------------------------- -# Demo / tests -# --------------------------------------------------------------------------- - - -def create_v2_array() -> tuple[MemoryStore, np.ndarray]: - """Create a v2 array with some data and return the store + data.""" - store = MemoryStore() - data = np.arange(16, dtype="float64").reshape(4, 4) - arr = zarr.create_array(store, shape=data.shape, chunks=(2, 2), dtype=data.dtype, zarr_format=2) - arr[:] = data - return store, data - - -def test_metadata_translation() -> None: - """The translated zarr.json should be valid v3 metadata.""" - v2_store, _ = create_v2_array() - v3_store = V2AsV3Store(v2_store) - - buf = sync(v3_store.get(ZARR_JSON, cpu.buffer_prototype)) - assert buf is not None - meta = json.loads(buf.to_bytes()) - - assert meta["zarr_format"] == 3 - assert meta["node_type"] == "array" - assert meta["shape"] == [4, 4] - assert meta["chunk_grid"]["configuration"]["chunk_shape"] == [2, 2] - # The v2 chunk key encoding is preserved. - assert meta["chunk_key_encoding"]["name"] == "v2" - assert any(c["name"] in ("bytes", "zstd", "blosc") for c in meta["codecs"]) - print(" metadata translation: OK") - print(f" zarr.json:\n{json.dumps(meta, indent=2)}") - - -def test_chunk_passthrough() -> None: - """Chunk keys should pass through unchanged (v2 encoding preserved).""" - v2_store, _ = create_v2_array() - v3_store = V2AsV3Store(v2_store) - - # The v2 store has chunk key "0.0"; the v3 store should serve the - # same key since the metadata says V2ChunkKeyEncoding. - v2_buf = sync(v2_store.get("0.0", cpu.buffer_prototype)) - v3_buf = sync(v3_store.get("0.0", cpu.buffer_prototype)) - assert v2_buf is not None - assert v3_buf is not None - assert v2_buf.to_bytes() == v3_buf.to_bytes() - print(" chunk passthrough: OK") - - -def test_v2_metadata_hidden() -> None: - """v2 metadata files should not be visible.""" - v2_store, _ = create_v2_array() - v3_store = V2AsV3Store(v2_store) - - assert sync(v3_store.get(ZARRAY_JSON, cpu.buffer_prototype)) is None - assert sync(v3_store.get(ZATTRS_JSON, cpu.buffer_prototype)) is None - assert not sync(v3_store.exists(ZARRAY_JSON)) - assert not sync(v3_store.exists(ZATTRS_JSON)) - print(" v2 metadata hidden: OK") - - -def test_listing() -> None: - """Store listing should show v3 keys only.""" - v2_store, _ = create_v2_array() - v3_store = V2AsV3Store(v2_store) - - async def _list() -> list[str]: - return [k async for k in v3_store.list()] - - keys = sync(_list()) - assert ZARR_JSON in keys - assert ZARRAY_JSON not in keys - assert ZATTRS_JSON not in keys - # Chunk keys use v2 encoding (unchanged). - assert "0.0" in keys - print(f" listing keys: {sorted(keys)}") - - -def test_serve_roundtrip() -> None: - """Serve the translated store over HTTP and read it back as v3.""" - from starlette.testclient import TestClient - - from zarr.experimental.serve import store_app - - v2_store, _data = create_v2_array() - v3_store = V2AsV3Store(v2_store) - - app = store_app(v3_store) - client = TestClient(app) - - # Metadata should be valid v3 JSON. - resp = client.get("/zarr.json") - assert resp.status_code == 200 - assert resp.headers["content-type"] == "application/json" - meta = resp.json() - assert meta["zarr_format"] == 3 - - # Chunks should be accessible via v2-style keys (as the metadata declares). - resp = client.get("/0.0") - assert resp.status_code == 200 - assert len(resp.content) > 0 - - # v2 metadata files should NOT be accessible. - resp = client.get("/.zarray") - assert resp.status_code == 404 - resp = client.get("/.zattrs") - assert resp.status_code == 404 - - print(" HTTP round-trip: OK") - - -def test_open_as_v3_array() -> None: - """Open the translated store as a v3 array and verify the data.""" - v2_store, data = create_v2_array() - v3_store = V2AsV3Store(v2_store) - - arr = zarr.open_array(v3_store) - assert arr.metadata.zarr_format == 3 - np.testing.assert_array_equal(arr[:], data) - print(" open as v3 array: OK") - print(f" data:\n{arr[:]}") - - -if __name__ == "__main__": - print("Creating v2 array and wrapping it with V2AsV3Store...\n") - - print("1. Metadata translation") - test_metadata_translation() - - print("\n2. Chunk key passthrough") - test_chunk_passthrough() - - print("\n3. v2 metadata hidden") - test_v2_metadata_hidden() - - print("\n4. Store listing") - test_listing() - - print("\n5. HTTP round-trip via store_app") - test_serve_roundtrip() - - print("\n6. Open as v3 array") - test_open_as_v3_array() - - print("\nAll checks passed.") diff --git a/mkdocs.yml b/mkdocs.yml index bfdc19586b..1b1f70c8fb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,7 +29,7 @@ nav: - user-guide/experimental.md - Examples: - user-guide/examples/custom_dtype.md - - user-guide/examples/serve_v2_v3.md + - user-guide/examples/serve.md - API Reference: - api/zarr/index.md - api/zarr/array.md diff --git a/src/zarr/experimental/serve.py b/src/zarr/experimental/serve.py index d9fd04b2ca..c83c06a9b8 100644 --- a/src/zarr/experimental/serve.py +++ b/src/zarr/experimental/serve.py @@ -2,7 +2,7 @@ import threading import time -from typing import TYPE_CHECKING, Any, Literal, TypedDict, overload +from typing import TYPE_CHECKING, Any, Literal, Self, TypedDict, overload from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest from zarr.core.buffer import cpu @@ -18,7 +18,15 @@ from zarr.core.array import Array from zarr.core.group import Group -__all__ = ["CorsOptions", "HTTPMethod", "node_app", "serve_node", "serve_store", "store_app"] +__all__ = [ + "BackgroundServer", + "CorsOptions", + "HTTPMethod", + "node_app", + "serve_node", + "serve_store", + "store_app", +] class CorsOptions(TypedDict): @@ -29,6 +37,45 @@ class CorsOptions(TypedDict): HTTPMethod = Literal["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"] +class BackgroundServer: + """A running background HTTP server that can be used as a context manager. + + Wraps a ``uvicorn.Server`` running in a daemon thread. When used as a + context manager the server is shut down automatically on exit. + + Parameters + ---------- + server : uvicorn.Server + The running uvicorn server instance. + + Examples + -------- + >>> with serve_node(arr, background=True) as server: # doctest: +SKIP + ... print(f"Listening on {server.host}:{server.port}") + ... # server is shut down when the block exits + """ + + def __init__(self, server: uvicorn.Server, *, host: str, port: int) -> None: + self._server = server + self.host = host + self.port = port + + @property + def url(self) -> str: + """The base URL of the running server.""" + return f"http://{self.host}:{self.port}" + + def shutdown(self) -> None: + """Signal the server to shut down.""" + self._server.should_exit = True + + def __enter__(self) -> Self: + return self + + def __exit__(self, *args: object) -> None: + self.shutdown() + + def _parse_range_header(range_header: str) -> ByteRequest | None: """Parse an HTTP Range header into a ByteRequest. @@ -151,7 +198,7 @@ def _start_server( host: str, port: int, background: bool, -) -> uvicorn.Server | None: +) -> BackgroundServer | None: """Create a uvicorn server for *app* and either block or run in a daemon thread.""" try: import uvicorn @@ -177,7 +224,7 @@ def _start_server( raise RuntimeError("Server failed to start within 5 seconds") time.sleep(0.01) - return server + return BackgroundServer(server, host=host, port=port) def store_app( @@ -270,7 +317,7 @@ def serve_store( methods: set[HTTPMethod] | None = ..., cors_options: CorsOptions | None = ..., background: Literal[True], -) -> uvicorn.Server: ... +) -> BackgroundServer: ... def serve_store( @@ -281,7 +328,7 @@ def serve_store( methods: set[HTTPMethod] | None = None, cors_options: CorsOptions | None = None, background: bool = False, -) -> uvicorn.Server | None: +) -> BackgroundServer | None: """Serve every key in a zarr ``Store`` over HTTP. Builds a Starlette ASGI app (see :func:`store_app`) and starts a @@ -306,9 +353,11 @@ def serve_store( Returns ------- - uvicorn.Server or None + BackgroundServer or None The running server when ``background=True``, or ``None`` when - the server has been shut down after blocking. + the server has been shut down after blocking. The + ``BackgroundServer`` can be used as a context manager for + automatic shutdown. """ app = store_app(store, methods=methods, cors_options=cors_options) return _start_server(app, host=host, port=port, background=background) @@ -335,7 +384,7 @@ def serve_node( methods: set[HTTPMethod] | None = ..., cors_options: CorsOptions | None = ..., background: Literal[True], -) -> uvicorn.Server: ... +) -> BackgroundServer: ... def serve_node( @@ -346,7 +395,7 @@ def serve_node( methods: set[HTTPMethod] | None = None, cors_options: CorsOptions | None = None, background: bool = False, -) -> uvicorn.Server | None: +) -> BackgroundServer | None: """Serve only the keys belonging to a zarr ``Array`` or ``Group`` over HTTP. Builds a Starlette ASGI app (see :func:`node_app`) and starts a @@ -381,9 +430,11 @@ def serve_node( Returns ------- - uvicorn.Server or None + BackgroundServer or None The running server when ``background=True``, or ``None`` when - the server has been shut down after blocking. + the server has been shut down after blocking. The + ``BackgroundServer`` can be used as a context manager for + automatic shutdown. """ app = node_app(node, methods=methods, cors_options=cors_options) return _start_server(app, host=host, port=port, background=background) diff --git a/tests/test_experimental/test_serve.py b/tests/test_experimental/test_serve.py index 58e7da3aef..41b323b26b 100644 --- a/tests/test_experimental/test_serve.py +++ b/tests/test_experimental/test_serve.py @@ -595,8 +595,8 @@ class TestServeBackground: """Test serve_store and serve_node with background=True.""" def test_serve_store_background(self, store: Store) -> None: - """serve_store(background=True) should start a server in a daemon - thread and return a uvicorn.Server that responds to HTTP requests.""" + """serve_store(background=True) should return a BackgroundServer + that responds to HTTP requests and can be used as a context manager.""" import httpx from zarr.experimental.serve import serve_store @@ -605,20 +605,18 @@ def test_serve_store_background(self, store: Store) -> None: sync(store.set("key", buf)) port = _get_free_port() - server = serve_store(store, host="127.0.0.1", port=port, background=True) - try: - assert server is not None - assert server.started + with serve_store(store, host="127.0.0.1", port=port, background=True) as server: + assert server.host == "127.0.0.1" + assert server.port == port + assert server.url == f"http://127.0.0.1:{port}" - response = httpx.get(f"http://127.0.0.1:{port}/key") + response = httpx.get(f"{server.url}/key") assert response.status_code == 200 assert response.content == b"hello" - finally: - server.should_exit = True def test_serve_node_background(self, store: Store) -> None: - """serve_node(background=True) should start a server in a daemon - thread and return a uvicorn.Server that responds to HTTP requests.""" + """serve_node(background=True) should return a BackgroundServer + that responds to HTTP requests and can be used as a context manager.""" import httpx from zarr.experimental.serve import serve_node @@ -627,12 +625,6 @@ def test_serve_node_background(self, store: Store) -> None: arr[:] = np.arange(4, dtype="f8") port = _get_free_port() - server = serve_node(arr, host="127.0.0.1", port=port, background=True) - try: - assert server is not None - assert server.started - - response = httpx.get(f"http://127.0.0.1:{port}/zarr.json") + with serve_node(arr, host="127.0.0.1", port=port, background=True) as server: + response = httpx.get(f"{server.url}/zarr.json") assert response.status_code == 200 - finally: - server.should_exit = True From e167ad34b319a01cc791fad0d476cd86195370e0 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 1 Mar 2026 20:36:51 -0500 Subject: [PATCH 7/7] minor tweaks to server --- src/zarr/experimental/serve.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/zarr/experimental/serve.py b/src/zarr/experimental/serve.py index c83c06a9b8..96f37bc290 100644 --- a/src/zarr/experimental/serve.py +++ b/src/zarr/experimental/serve.py @@ -55,8 +55,11 @@ class BackgroundServer: ... # server is shut down when the block exits """ - def __init__(self, server: uvicorn.Server, *, host: str, port: int) -> None: + def __init__( + self, server: uvicorn.Server, thread: threading.Thread, *, host: str, port: int + ) -> None: self._server = server + self._thread = thread self.host = host self.port = port @@ -66,8 +69,9 @@ def url(self) -> str: return f"http://{self.host}:{self.port}" def shutdown(self) -> None: - """Signal the server to shut down.""" + """Signal the server to shut down and wait for it to stop.""" self._server.should_exit = True + self._thread.join() def __enter__(self) -> Self: return self @@ -215,6 +219,11 @@ def _start_server( server.run() return None + # Signal handlers can only be installed on the main thread, so + # disable them when running in a background thread. + # See https://github.com/encode/uvicorn/issues/742 + server.install_signal_handlers = lambda: None + thread = threading.Thread(target=server.run, daemon=True) thread.start() @@ -224,7 +233,7 @@ def _start_server( raise RuntimeError("Server failed to start within 5 seconds") time.sleep(0.01) - return BackgroundServer(server, host=host, port=port) + return BackgroundServer(server, thread, host=host, port=port) def store_app(