diff --git a/changes/3732.feature.md b/changes/3732.feature.md new file mode 100644 index 0000000000..a83e5c0264 --- /dev/null +++ b/changes/3732.feature.md @@ -0,0 +1,5 @@ +Adds an experimental HTTP server that can expose `Store`, `Array`, or `Group` instances over HTTP. +`store_app` and `node_app` build ASGI applications; `serve_store` and `serve_node` additionally +start a Uvicorn server (blocking by default, or in a background thread with `background=True`). +See the [user guide](https://zarr.readthedocs.io/en/latest/user-guide/experimental.html#http-server) +and the [example](https://zarr.readthedocs.io/en/latest/user-guide/examples/serve.html). \ No newline at end of file diff --git a/docs/api/zarr/experimental.md b/docs/api/zarr/experimental.md index 60f1f987b5..159300952d 100644 --- a/docs/api/zarr/experimental.md +++ b/docs/api/zarr/experimental.md @@ -4,6 +4,10 @@ title: experimental Experimental functionality is not stable and may change or be removed at any point. -## Classes +## Cache Store ::: zarr.experimental.cache_store + +## HTTP Server + +::: zarr.experimental.serve diff --git a/docs/user-guide/examples/serve.md b/docs/user-guide/examples/serve.md new file mode 100644 index 0000000000..d02a67e4a9 --- /dev/null +++ b/docs/user-guide/examples/serve.md @@ -0,0 +1,7 @@ +--8<-- "examples/serve/README.md" + +## Source Code + +```python +--8<-- "examples/serve/serve.py" +``` diff --git a/docs/user-guide/experimental.md b/docs/user-guide/experimental.md index eaa53a4622..eded9661fc 100644 --- a/docs/user-guide/experimental.md +++ b/docs/user-guide/experimental.md @@ -273,3 +273,132 @@ print(f"Cache contains {info['cached_keys']} keys with {info['current_size']} by This example shows how the CacheStore can significantly reduce access times for repeated data reads, particularly important when working with remote data sources. The dual-store architecture allows for flexible cache persistence and management. + +## HTTP Server + +Zarr Python provides an experimental HTTP server that exposes a Zarr `Store`, `Array`, +or `Group` over HTTP as an [ASGI](https://asgi.readthedocs.io/) application. +This makes it possible to serve zarr data to any HTTP-capable client (including +another Zarr Python process backed by an `HTTPStore`). + +The server is built on [Starlette](https://www.starlette.io/) and can be run with +any ASGI server such as [Uvicorn](https://www.uvicorn.org/). + +Install the server dependencies with: + +```bash +pip install zarr[server] +``` + +### Building an ASGI App + +[`zarr.experimental.serve.store_app`][] creates an ASGI app that exposes every key +in a store: + +```python +import zarr +from zarr.experimental.serve import store_app + +store = zarr.storage.MemoryStore() +zarr.create_array(store, shape=(100, 100), chunks=(10, 10), dtype="float64") + +app = store_app(store) + +# Run with any ASGI server, e.g. Uvicorn: +# uvicorn my_module:app --host 0.0.0.0 --port 8000 +``` + +[`zarr.experimental.serve.node_app`][] creates an ASGI app that only serves keys +belonging to a specific `Array` or `Group`. Requests for keys outside the node +receive a 404, even if those keys exist in the underlying store: + +```python +import zarr +from zarr.experimental.serve import node_app + +store = zarr.storage.MemoryStore() +root = zarr.open_group(store) +root.create_array("a", shape=(10,), dtype="int32") +root.create_array("b", shape=(20,), dtype="float64") + +# Only serve the array at "a" — requests for "b" will return 404. +arr = root["a"] +app = node_app(arr) +``` + +### Running the Server + +[`zarr.experimental.serve.serve_store`][] and [`zarr.experimental.serve.serve_node`][] +build an ASGI app *and* start a [Uvicorn](https://www.uvicorn.org/) server. +By default they block until the server is shut down: + +```python +from zarr.experimental.serve import serve_store + +serve_store(store, host="127.0.0.1", port=8000) +``` + +Pass `background=True` to start the server in a daemon thread and return +immediately. The returned [`BackgroundServer`][zarr.experimental.serve.BackgroundServer] +can be used as a context manager for automatic shutdown: + +```python +import numpy as np + +import zarr +from zarr.experimental.serve import serve_node +from zarr.storage import MemoryStore + +store = MemoryStore() +arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="float64") +arr[:] = np.arange(100, dtype="float64") + +with serve_node(arr, host="127.0.0.1", port=8000, background=True) as server: + # Now open the served array from another zarr client. + remote = zarr.open_array(server.url, mode="r") + np.testing.assert_array_equal(remote[:], arr[:]) +# Server is shut down automatically when the block exits. +``` + +### CORS Support + +Both `store_app` and `node_app` (and their `serve_*` counterparts) accept a +[`CorsOptions`][zarr.experimental.serve.CorsOptions] parameter to enable +[CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) middleware for +browser-based clients: + +```python +from zarr.experimental.serve import CorsOptions, store_app + +app = store_app( + store, + cors_options=CorsOptions( + allow_origins=["*"], + allow_methods=["GET"], + ), +) +``` + +### HTTP Range Requests + +The server supports the standard `Range` header for partial reads. The three +forms defined by [RFC 7233](https://httpwg.org/specs/rfc7233.html) are supported: + +| Header | Meaning | +| -------------------- | ------------------------------ | +| `bytes=0-99` | First 100 bytes | +| `bytes=100-` | Everything from byte 100 | +| `bytes=-50` | Last 50 bytes | + +A successful range request returns HTTP 206 (Partial Content). + +### Write Support + +By default only `GET` requests are accepted. To enable writes, pass +`methods={"GET", "PUT"}`: + +```python +app = store_app(store, methods={"GET", "PUT"}) +``` + +A `PUT` request stores the request body at the given path and returns 204 (No Content). diff --git a/examples/serve/README.md b/examples/serve/README.md new file mode 100644 index 0000000000..7b37968248 --- /dev/null +++ b/examples/serve/README.md @@ -0,0 +1,17 @@ +# Serve a Zarr Array over HTTP + +This example creates an in-memory Zarr array, serves it over HTTP with +`zarr.experimental.serve.serve_node`, and fetches the `zarr.json` metadata +document and a raw chunk using `httpx`. + +## Running the Example + +```bash +python examples/serve/serve.py +``` + +Or run with uv: + +```bash +uv run examples/serve/serve.py +``` diff --git a/examples/serve/serve.py b/examples/serve/serve.py new file mode 100644 index 0000000000..19550cc0ec --- /dev/null +++ b/examples/serve/serve.py @@ -0,0 +1,43 @@ +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "zarr[server] @ git+https://github.com/zarr-developers/zarr-python.git@main", +# "httpx", +# ] +# /// +""" +Serve a Zarr array over HTTP and fetch its metadata and chunks. + +This example creates an in-memory array, serves it in a background thread, +then uses ``httpx`` to request the ``zarr.json`` metadata document and a raw +chunk. +""" + +import json + +import httpx +import numpy as np + +import zarr +from zarr.experimental.serve import serve_node +from zarr.storage import MemoryStore + +# -- create an array -------------------------------------------------------- +store = MemoryStore() +data = np.arange(1000, dtype="uint8").reshape(10, 10, 10) +# no compression +arr = zarr.create_array(store, data=data, chunks=(5, 5, 5), write_data=True, compressors=None) + +# -- serve it in the background --------------------------------------------- +with serve_node(arr, host="127.0.0.1", port=8000, background=True) as server: + # -- fetch metadata ------------------------------------------------------ + resp = httpx.get(f"{server.url}/zarr.json") + assert resp.status_code == 200 + meta = resp.json() + print("zarr.json:") + print(json.dumps(meta, indent=2)) + + # -- fetch a raw chunk --------------------------------------------------- + resp = httpx.get(f"{server.url}/c/0/0/0") + assert resp.status_code == 200 + print(f"\nchunk c/0/0/0: {len(resp.content)} bytes") diff --git a/mkdocs.yml b/mkdocs.yml index 61872b6234..1b1f70c8fb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,6 +29,7 @@ nav: - user-guide/experimental.md - Examples: - user-guide/examples/custom_dtype.md + - user-guide/examples/serve.md - API Reference: - api/zarr/index.md - api/zarr/array.md diff --git a/pyproject.toml b/pyproject.toml index 02ebdc4176..b21827999d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,11 @@ gpu = [ "cupy-cuda12x", ] cli = ["typer"] +server = [ + "starlette", + "httpx", + "uvicorn", +] optional = ["rich", "universal-pathlib"] [project.scripts] diff --git a/src/zarr/core/chunk_key_encodings.py b/src/zarr/core/chunk_key_encodings.py index 5c9f77118a..ca6154d1ba 100644 --- a/src/zarr/core/chunk_key_encodings.py +++ b/src/zarr/core/chunk_key_encodings.py @@ -79,7 +79,11 @@ def __post_init__(self) -> None: def decode_chunk_key(self, chunk_key: str) -> tuple[int, ...]: if chunk_key == "c": return () - return tuple(map(int, chunk_key[1:].split(self.separator))) + # Strip the "c" prefix (e.g. "c/" or "c.") before splitting. + prefix = "c" + self.separator + if chunk_key.startswith(prefix): + return tuple(map(int, chunk_key[len(prefix) :].split(self.separator))) + raise ValueError(f"Invalid chunk key for default encoding: {chunk_key!r}") def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str: return self.separator.join(map(str, ("c",) + chunk_coords)) diff --git a/src/zarr/core/keys.py b/src/zarr/core/keys.py new file mode 100644 index 0000000000..4cc13874ab --- /dev/null +++ b/src/zarr/core/keys.py @@ -0,0 +1,178 @@ +"""Utilities for determining the set of valid store keys for zarr nodes. + +A zarr node (array or group) implicitly defines a subset of keys in the +underlying store. For an **array** the valid keys are: + +* metadata documents (``zarr.json`` for v3, ``.zarray`` / ``.zattrs`` for v2) +* chunk (or shard) keys whose decoded coordinates fall within the storage grid + +For a **group** the valid keys are: + +* its own metadata documents +* any path ``/`` where ```` is a direct member and + ```` is recursively valid for that child +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON + +if TYPE_CHECKING: + from zarr.core.array import Array + from zarr.core.group import Group + +_METADATA_KEYS_V3 = frozenset({ZARR_JSON}) +_METADATA_KEYS_V2 = frozenset({ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON}) + + +def metadata_keys(zarr_format: int) -> frozenset[str]: + """Return the set of metadata key basenames for a given zarr format version. + + Parameters + ---------- + zarr_format : int + The zarr format version (2 or 3). + + Returns + ------- + frozenset of str + """ + if zarr_format == 3: + return _METADATA_KEYS_V3 + return _METADATA_KEYS_V2 + + +def decode_chunk_key(array: Array[Any], key: str) -> tuple[int, ...] | None: + """Try to decode *key* into chunk coordinates for *array*. + + Parameters + ---------- + array : Array + The array whose chunk key encoding should be used. + key : str + The candidate chunk key string. + + Returns + ------- + tuple of int, or None + The decoded coordinates, or ``None`` if *key* is not a valid chunk key. + """ + from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding + from zarr.core.metadata.v2 import ArrayV2Metadata + + try: + if isinstance(array.metadata, ArrayV2Metadata): + parts = key.split(array.metadata.dimension_separator) + return tuple(int(p) for p in parts) + + encoding = array.metadata.chunk_key_encoding + if isinstance(encoding, DefaultChunkKeyEncoding): + # Default v3 keys have the form "c012". + prefix = "c" + encoding.separator + if key == "c": + return () + if not key.startswith(prefix): + return None + return tuple(int(p) for p in key[len(prefix) :].split(encoding.separator)) + if isinstance(encoding, V2ChunkKeyEncoding): + return tuple(int(p) for p in key.split(encoding.separator)) + + # Unknown encoding — fall back to the encoding's own decode. + return encoding.decode_chunk_key(key) + except (ValueError, TypeError, NotImplementedError): + return None + + +def is_valid_chunk_key(array: Array[Any], key: str) -> bool: + """Check whether *key* is a valid chunk key for *array*. + + Tries to decode the key and checks that the resulting coordinates fall + within the storage grid (shard grid if sharding is used, chunk grid + otherwise). + + Parameters + ---------- + array : Array + The array to validate against. + key : str + The candidate chunk key string. + + Returns + ------- + bool + """ + coords = decode_chunk_key(array, key) + if coords is None: + return False + grid = array._shard_grid_shape + if len(coords) != len(grid): + return False + return all(0 <= c < g for c, g in zip(coords, grid, strict=True)) + + +def is_valid_array_key(array: Array[Any], key: str) -> bool: + """Check whether *key* is a valid store key for *array*. + + Valid keys are metadata documents and chunk keys. + + Parameters + ---------- + array : Array + The array to validate against. + key : str + The candidate key, relative to the array's root. + + Returns + ------- + bool + """ + if key in metadata_keys(array.metadata.zarr_format): + return True + return is_valid_chunk_key(array, key) + + +def is_valid_node_key(node: Array[Any] | Group, key: str) -> bool: + """Check whether *key* is a valid store key relative to *node*. + + For an ``Array``, valid keys are metadata documents and chunk keys. + + For a ``Group``, valid keys are the group's own metadata documents, or + a path of the form ``/`` where ```` is a direct + member and ```` is recursively valid for that child. + + Parameters + ---------- + node : Array or Group + The zarr node to validate against. + key : str + The candidate key, relative to the node's root. + + Returns + ------- + bool + """ + from zarr.core.array import Array + + if isinstance(node, Array): + return is_valid_array_key(node, key) + + # Group + if key in metadata_keys(node.metadata.zarr_format): + return True + + # Try to match the first path component against a child member. + if "/" in key: + child_name, remainder = key.split("/", 1) + else: + # A bare name with no slash can't be a valid group-level key — + # groups contain children (which have subkeys), not bare keys. + return False + + try: + child = node[child_name] + except KeyError: + return False + + return is_valid_node_key(child, remainder) diff --git a/src/zarr/experimental/serve.py b/src/zarr/experimental/serve.py new file mode 100644 index 0000000000..96f37bc290 --- /dev/null +++ b/src/zarr/experimental/serve.py @@ -0,0 +1,449 @@ +from __future__ import annotations + +import threading +import time +from typing import TYPE_CHECKING, Any, Literal, Self, TypedDict, overload + +from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest +from zarr.core.buffer import cpu +from zarr.core.keys import is_valid_node_key + +if TYPE_CHECKING: + import uvicorn + from starlette.applications import Starlette + from starlette.requests import Request + from starlette.responses import Response + + from zarr.abc.store import ByteRequest, Store + from zarr.core.array import Array + from zarr.core.group import Group + +__all__ = [ + "BackgroundServer", + "CorsOptions", + "HTTPMethod", + "node_app", + "serve_node", + "serve_store", + "store_app", +] + + +class CorsOptions(TypedDict): + allow_origins: list[str] + allow_methods: list[str] + + +HTTPMethod = Literal["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"] + + +class BackgroundServer: + """A running background HTTP server that can be used as a context manager. + + Wraps a ``uvicorn.Server`` running in a daemon thread. When used as a + context manager the server is shut down automatically on exit. + + Parameters + ---------- + server : uvicorn.Server + The running uvicorn server instance. + + Examples + -------- + >>> with serve_node(arr, background=True) as server: # doctest: +SKIP + ... print(f"Listening on {server.host}:{server.port}") + ... # server is shut down when the block exits + """ + + def __init__( + self, server: uvicorn.Server, thread: threading.Thread, *, host: str, port: int + ) -> None: + self._server = server + self._thread = thread + self.host = host + self.port = port + + @property + def url(self) -> str: + """The base URL of the running server.""" + return f"http://{self.host}:{self.port}" + + def shutdown(self) -> None: + """Signal the server to shut down and wait for it to stop.""" + self._server.should_exit = True + self._thread.join() + + def __enter__(self) -> Self: + return self + + def __exit__(self, *args: object) -> None: + self.shutdown() + + +def _parse_range_header(range_header: str) -> ByteRequest | None: + """Parse an HTTP Range header into a ByteRequest. + + Parameters + ---------- + range_header : str + The value of the Range header, e.g. ``"bytes=0-99"`` or ``"bytes=-100"``. + + Returns + ------- + ByteRequest or None + A ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``, + or ``None`` if the header cannot be parsed. + """ + if not range_header.startswith("bytes="): + return None + range_spec = range_header[len("bytes=") :] + try: + if range_spec.startswith("-"): + # suffix request: bytes=-N + suffix = int(range_spec[1:]) + return SuffixByteRequest(suffix=suffix) + parts = range_spec.split("-", 1) + if len(parts) != 2: + return None + start_str, end_str = parts + start = int(start_str) + if end_str == "": + # offset request: bytes=N- + return OffsetByteRequest(offset=start) + # range request: bytes=N-M (HTTP end is inclusive, ByteRequest end is exclusive) + end = int(end_str) + 1 + return RangeByteRequest(start=start, end=end) + except ValueError: + return None + + +async def _get_response(store: Store, path: str, byte_range: ByteRequest | None = None) -> Response: + """Fetch a key from the store and return an HTTP response.""" + from starlette.responses import Response + + proto = cpu.buffer_prototype + content_type = "application/json" if path.endswith("zarr.json") else "application/octet-stream" + + buf = await store.get(path, proto, byte_range=byte_range) + if buf is None: + return Response(status_code=404) + + status_code = 206 if byte_range is not None else 200 + return Response(content=buf.to_bytes(), status_code=status_code, media_type=content_type) + + +async def _handle_request(request: Request) -> Response: + """Handle a request, optionally filtering by node validity.""" + from starlette.responses import Response + + store: Store = request.app.state.store + node: Array[Any] | Group | None = request.app.state.node + prefix: str = request.app.state.prefix + path = request.path_params.get("path", "") + + # If serving a node, validate the key before touching the store. + if node is not None and not is_valid_node_key(node, path): + return Response(status_code=404) + + # Resolve the full store key by prepending the node's prefix. + store_key = f"{prefix}/{path}" if prefix else path + + if request.method == "PUT": + body = await request.body() + buf = cpu.buffer_prototype.buffer.from_bytes(body) + await store.set(store_key, buf) + return Response(status_code=204) + + range_header = request.headers.get("range") + byte_range: ByteRequest | None = None + if range_header is not None: + byte_range = _parse_range_header(range_header) + if byte_range is None: + return Response(status_code=416) + + return await _get_response(store, store_key, byte_range) + + +def _make_starlette_app( + *, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, +) -> Starlette: + """Create a Starlette app with the request handler.""" + try: + from starlette.applications import Starlette + from starlette.middleware.cors import CORSMiddleware + from starlette.routing import Route + except ImportError as e: + raise ImportError( + "The zarr server requires the 'starlette' package. " + "Install it with: pip install zarr[server]" + ) from e + + if methods is None: + methods = {"GET"} + + app = Starlette( + routes=[Route("/{path:path}", _handle_request, methods=list(methods))], + ) + + if cors_options is not None: + app.add_middleware( + CORSMiddleware, + allow_origins=cors_options["allow_origins"], + allow_methods=cors_options["allow_methods"], + ) + return app + + +def _start_server( + app: Starlette, + *, + host: str, + port: int, + background: bool, +) -> BackgroundServer | None: + """Create a uvicorn server for *app* and either block or run in a daemon thread.""" + try: + import uvicorn + except ImportError as e: + raise ImportError( + "The zarr server requires the 'uvicorn' package. " + "Install it with: pip install zarr[server]" + ) from e + + config = uvicorn.Config(app, host=host, port=port) + server = uvicorn.Server(config) + + if not background: + server.run() + return None + + # Signal handlers can only be installed on the main thread, so + # disable them when running in a background thread. + # See https://github.com/encode/uvicorn/issues/742 + server.install_signal_handlers = lambda: None + + thread = threading.Thread(target=server.run, daemon=True) + thread.start() + + deadline = time.monotonic() + 5.0 + while not server.started: + if time.monotonic() > deadline: + raise RuntimeError("Server failed to start within 5 seconds") + time.sleep(0.01) + + return BackgroundServer(server, thread, host=host, port=port) + + +def store_app( + store: Store, + *, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, +) -> Starlette: + """Create a Starlette ASGI app that serves every key in a zarr ``Store``. + + Parameters + ---------- + store : Store + The zarr store to serve. + methods : set of HTTPMethod, optional + The HTTP methods to accept. Defaults to ``{"GET"}``. + cors_options : CorsOptions, optional + If provided, CORS middleware will be added with the given options. + + Returns + ------- + Starlette + An ASGI application. + """ + app = _make_starlette_app(methods=methods, cors_options=cors_options) + app.state.store = store + app.state.node = None + app.state.prefix = "" + return app + + +def node_app( + node: Array[Any] | Group, + *, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, +) -> Starlette: + """Create a Starlette ASGI app that serves only the keys belonging to a + zarr ``Array`` or ``Group``. + + For an ``Array``, the served keys are the metadata document(s) and all + chunk (or shard) keys whose coordinates fall within the array's grid. + + For a ``Group``, the served keys are the group's own metadata plus any + path that resolves through the group's members to a valid array metadata + document or chunk key. + + Requests for keys outside this set receive a 404 response, even if the + underlying store contains data at that path. + + Parameters + ---------- + node : Array or Group + The zarr array or group to serve. + methods : set of HTTPMethod, optional + The HTTP methods to accept. Defaults to ``{"GET"}``. + cors_options : CorsOptions, optional + If provided, CORS middleware will be added with the given options. + + Returns + ------- + Starlette + An ASGI application. + """ + app = _make_starlette_app(methods=methods, cors_options=cors_options) + app.state.store = node.store_path.store + app.state.node = node + app.state.prefix = node.store_path.path + return app + + +@overload +def serve_store( + store: Store, + *, + host: str = ..., + port: int = ..., + methods: set[HTTPMethod] | None = ..., + cors_options: CorsOptions | None = ..., + background: Literal[False] = ..., +) -> None: ... + + +@overload +def serve_store( + store: Store, + *, + host: str = ..., + port: int = ..., + methods: set[HTTPMethod] | None = ..., + cors_options: CorsOptions | None = ..., + background: Literal[True], +) -> BackgroundServer: ... + + +def serve_store( + store: Store, + *, + host: str = "127.0.0.1", + port: int = 8000, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, + background: bool = False, +) -> BackgroundServer | None: + """Serve every key in a zarr ``Store`` over HTTP. + + Builds a Starlette ASGI app (see :func:`store_app`) and starts a + `Uvicorn `_ server. + + Parameters + ---------- + store : Store + The zarr store to serve. + host : str, optional + The host to bind to. Defaults to ``"127.0.0.1"``. + port : int, optional + The port to bind to. Defaults to ``8000``. + methods : set of HTTPMethod, optional + The HTTP methods to accept. Defaults to ``{"GET"}``. + cors_options : CorsOptions, optional + If provided, CORS middleware will be added with the given options. + background : bool, optional + If ``False`` (the default), the server blocks until shut down. + If ``True``, the server runs in a daemon thread and this function + returns immediately. + + Returns + ------- + BackgroundServer or None + The running server when ``background=True``, or ``None`` when + the server has been shut down after blocking. The + ``BackgroundServer`` can be used as a context manager for + automatic shutdown. + """ + app = store_app(store, methods=methods, cors_options=cors_options) + return _start_server(app, host=host, port=port, background=background) + + +@overload +def serve_node( + node: Array[Any] | Group, + *, + host: str = ..., + port: int = ..., + methods: set[HTTPMethod] | None = ..., + cors_options: CorsOptions | None = ..., + background: Literal[False] = ..., +) -> None: ... + + +@overload +def serve_node( + node: Array[Any] | Group, + *, + host: str = ..., + port: int = ..., + methods: set[HTTPMethod] | None = ..., + cors_options: CorsOptions | None = ..., + background: Literal[True], +) -> BackgroundServer: ... + + +def serve_node( + node: Array[Any] | Group, + *, + host: str = "127.0.0.1", + port: int = 8000, + methods: set[HTTPMethod] | None = None, + cors_options: CorsOptions | None = None, + background: bool = False, +) -> BackgroundServer | None: + """Serve only the keys belonging to a zarr ``Array`` or ``Group`` over HTTP. + + Builds a Starlette ASGI app (see :func:`node_app`) and starts a + `Uvicorn `_ server. + + For an ``Array``, the served keys are the metadata document(s) and all + chunk (or shard) keys whose coordinates fall within the array's grid. + + For a ``Group``, the served keys are the group's own metadata plus any + path that resolves through the group's members to a valid array metadata + document or chunk key. + + Requests for keys outside this set receive a 404 response, even if the + underlying store contains data at that path. + + Parameters + ---------- + node : Array or Group + The zarr array or group to serve. + host : str, optional + The host to bind to. Defaults to ``"127.0.0.1"``. + port : int, optional + The port to bind to. Defaults to ``8000``. + methods : set of HTTPMethod, optional + The HTTP methods to accept. Defaults to ``{"GET"}``. + cors_options : CorsOptions, optional + If provided, CORS middleware will be added with the given options. + background : bool, optional + If ``False`` (the default), the server blocks until shut down. + If ``True``, the server runs in a daemon thread and this function + returns immediately. + + Returns + ------- + BackgroundServer or None + The running server when ``background=True``, or ``None`` when + the server has been shut down after blocking. The + ``BackgroundServer`` can be used as a context manager for + automatic shutdown. + """ + app = node_app(node, methods=methods, cors_options=cors_options) + return _start_server(app, host=host, port=port, background=background) diff --git a/tests/test_examples.py b/tests/test_examples.py index 152b0a1a88..4c358f8de9 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -19,47 +19,27 @@ ZARR_PROJECT_PATH = Path(".").absolute() -def set_dep(script: str, dependency: str) -> str: - """ - Set a dependency in a PEP-723 script header. - If the package is already in the list, it will be replaced. - If the package is not already in the list, it will be added. +def _get_zarr_extras(script_path: Path) -> str: + """Extract extras from the zarr dependency in a script's PEP 723 header. - Source code modified from - https://packaging.python.org/en/latest/specifications/inline-script-metadata/#reference-implementation + For example, if the script declares ``zarr[server]``, this returns ``[server]``. + If the script declares ``zarr`` with no extras, this returns ``""``. """ - match = re.search(PEP_723_REGEX, script) - + source_text = script_path.read_text() + match = re.search(PEP_723_REGEX, source_text) if match is None: - raise ValueError(f"PEP-723 header not found in {script}") + return "" content = "".join( line[2:] if line.startswith("# ") else line[1:] for line in match.group("content").splitlines(keepends=True) ) - config = tomlkit.parse(content) - for idx, dep in enumerate(tuple(config["dependencies"])): - if Requirement(dep).name == Requirement(dependency).name: - config["dependencies"][idx] = dependency - - new_content = "".join( - f"# {line}" if line.strip() else f"#{line}" - for line in tomlkit.dumps(config).splitlines(keepends=True) - ) - - start, end = match.span("content") - return script[:start] + new_content + script[end:] - - -def resave_script(source_path: Path, dest_path: Path) -> None: - """ - Read a script from source_path and save it to dest_path after inserting the absolute path to the - local Zarr project directory in the PEP-723 header. - """ - source_text = source_path.read_text() - dest_text = set_dep(source_text, f"zarr @ file:///{ZARR_PROJECT_PATH}") - dest_path.write_text(dest_text) + for dep in config.get("dependencies", []): + req = Requirement(dep) + if req.name == "zarr" and req.extras: + return "[" + ",".join(sorted(req.extras)) + "]" + return "" def test_script_paths() -> None: @@ -73,14 +53,15 @@ def test_script_paths() -> None: sys.platform in ("win32",), reason="This test fails due for unknown reasons on Windows in CI." ) @pytest.mark.parametrize("script_path", script_paths) -def test_scripts_can_run(script_path: Path, tmp_path: Path) -> None: - dest_path = tmp_path / script_path.name - # We resave the script after inserting the absolute path to the local Zarr project directory, - # and then test its behavior. - # This allows the example to be useful to users who don't have Zarr installed, but also testable. - resave_script(script_path, dest_path) +def test_scripts_can_run(script_path: Path) -> None: + # Override the zarr dependency with the local project, preserving any extras + # declared in the script's PEP 723 header (e.g. zarr[server]). + extras = _get_zarr_extras(script_path) + zarr_dep = f"zarr{extras} @ file:///{ZARR_PROJECT_PATH}" result = subprocess.run( - ["uv", "run", "--refresh", str(dest_path)], capture_output=True, text=True + ["uv", "run", "--with", zarr_dep, "--refresh", str(script_path)], + capture_output=True, + text=True, ) assert result.returncode == 0, ( f"Script at {script_path} failed to run. Output: {result.stdout} Error: {result.stderr}" diff --git a/tests/test_experimental/test_serve.py b/tests/test_experimental/test_serve.py new file mode 100644 index 0000000000..41b323b26b --- /dev/null +++ b/tests/test_experimental/test_serve.py @@ -0,0 +1,630 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np +import pytest + +import zarr +from zarr.core.buffer import cpu +from zarr.core.sync import sync + +if TYPE_CHECKING: + from zarr.abc.store import Store + from zarr.core.common import ZarrFormat + +pytest.importorskip("starlette") +pytest.importorskip("httpx") + +from starlette.testclient import TestClient + +from zarr.experimental.serve import CorsOptions, _parse_range_header, node_app, store_app + + +@pytest.fixture +def group_with_arrays(store: Store) -> zarr.Group: + """Create a group containing a regular array and a sharded array.""" + root = zarr.open_group(store, mode="w") + zarr.create_array(root.store_path / "regular", shape=(4, 4), chunks=(2, 2), dtype="f8") + zarr.create_array( + root.store_path / "sharded", + shape=(8, 8), + chunks=(2, 2), + shards=(4, 4), + dtype="i4", + ) + return root + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestNodeAppDoesNotExposeNonZarrKeys: + """node_app must never expose keys that are not part of the zarr hierarchy.""" + + def test_non_zarr_key_returns_404(self, store: Store, group_with_arrays: zarr.Group) -> None: + """A key that is not valid zarr metadata or a valid chunk key should return 404, + even if the underlying store contains data at that path.""" + non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"secret data") + sync(store.set("secret.txt", non_zarr_buf)) + + app = node_app(group_with_arrays) + client = TestClient(app) + + # The non-zarr key must not be accessible. + response = client.get("/secret.txt") + assert response.status_code == 404 + + def test_non_zarr_key_nested_returns_404( + self, store: Store, group_with_arrays: zarr.Group + ) -> None: + """A non-zarr key nested under a real array's path should return 404, + even though the path prefix matches a valid zarr node.""" + non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"not a chunk") + sync(store.set("regular/notes.txt", non_zarr_buf)) + + app = node_app(group_with_arrays) + client = TestClient(app) + + response = client.get("/regular/notes.txt") + assert response.status_code == 404 + + def test_valid_metadata_is_accessible(self, group_with_arrays: zarr.Group) -> None: + """Zarr metadata keys (zarr.json) for both the root group and child arrays + should be served with a 200 status.""" + app = node_app(group_with_arrays) + client = TestClient(app) + + # Root group metadata + response = client.get("/zarr.json") + assert response.status_code == 200 + + # Array metadata + response = client.get("/regular/zarr.json") + assert response.status_code == 200 + + def test_valid_chunk_is_accessible(self, group_with_arrays: zarr.Group) -> None: + """A valid, in-bounds chunk key for an array with written data should + be served with a 200 status.""" + arr = group_with_arrays["regular"] + assert isinstance(arr, zarr.Array) + arr[:] = np.ones((4, 4)) + + app = node_app(group_with_arrays) + client = TestClient(app) + + # c/0/0 is a valid chunk key for a (4,4) array with (2,2) chunks. + response = client.get("/regular/c/0/0") + assert response.status_code == 200 + + def test_out_of_bounds_chunk_key_returns_404(self, group_with_arrays: zarr.Group) -> None: + """A chunk key that is syntactically valid but references indices beyond + the array's chunk grid should return 404.""" + arr = group_with_arrays["regular"] + assert isinstance(arr, zarr.Array) + arr[:] = np.ones((4, 4)) + + app = node_app(group_with_arrays) + client = TestClient(app) + + # (4,4) array with (2,2) chunks has grid shape (2,2), so c/99/99 is + # syntactically valid but out of bounds. + response = client.get("/regular/c/99/99") + assert response.status_code == 404 + + def test_empty_path_returns_404(self, group_with_arrays: zarr.Group) -> None: + """A request to the root path '/' should return 404 because an empty + string is not a valid zarr key.""" + app = node_app(group_with_arrays) + client = TestClient(app) + + response = client.get("/") + assert response.status_code == 404 + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestShardedArrayByteRangeReads: + """Byte-range reads against a sharded array served via node_app.""" + + def test_range_read_returns_206(self, group_with_arrays: zarr.Group) -> None: + """A Range header requesting a specific byte range (e.g. bytes=0-7) should + return 206 Partial Content with exactly those bytes.""" + arr = group_with_arrays["sharded"] + assert isinstance(arr, zarr.Array) + arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) + + app = node_app(group_with_arrays) + client = TestClient(app) + + # c/0/0 is the first shard key for an (8,8) array with (4,4) shards. + full_response = client.get("/sharded/c/0/0") + assert full_response.status_code == 200 + full_body = full_response.content + + # Request the first 8 bytes. + range_response = client.get("/sharded/c/0/0", headers={"Range": "bytes=0-7"}) + assert range_response.status_code == 206 + assert range_response.content == full_body[:8] + + def test_suffix_range_read(self, group_with_arrays: zarr.Group) -> None: + """A suffix byte range (e.g. bytes=-4) should return the last N bytes + of the resource with a 206 status.""" + arr = group_with_arrays["sharded"] + assert isinstance(arr, zarr.Array) + arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) + + app = node_app(group_with_arrays) + client = TestClient(app) + + full_response = client.get("/sharded/c/0/0") + full_body = full_response.content + + # Request the last 4 bytes. + range_response = client.get("/sharded/c/0/0", headers={"Range": "bytes=-4"}) + assert range_response.status_code == 206 + assert range_response.content == full_body[-4:] + + def test_offset_range_read(self, group_with_arrays: zarr.Group) -> None: + """An offset byte range (e.g. bytes=4-) should return all bytes from + the given offset to the end, with a 206 status.""" + arr = group_with_arrays["sharded"] + assert isinstance(arr, zarr.Array) + arr[:] = np.arange(64, dtype="i4").reshape((8, 8)) + + app = node_app(group_with_arrays) + client = TestClient(app) + + full_response = client.get("/sharded/c/0/0") + full_body = full_response.content + + # Request everything from byte 4 onward. + range_response = client.get("/sharded/c/0/0", headers={"Range": "bytes=4-"}) + assert range_response.status_code == 206 + assert range_response.content == full_body[4:] + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestMalformedRangeHeaders: + """Malformed Range headers must return 416, never crash the server.""" + + @pytest.mark.parametrize( + "header", + [ + "bytes=abc-def", + "bytes=-abc", + "bytes=abc-", + "bytes=0-7,10-20", + ], + ) + def test_malformed_range_returns_416(self, store: Store, header: str) -> None: + """Range headers with non-numeric values, multiple ranges, or other + malformed syntax should return 416 Range Not Satisfiable instead of + crashing the server.""" + buf = cpu.buffer_prototype.buffer.from_bytes(b"some data here") + sync(store.set("key", buf)) + + app = store_app(store) + client = TestClient(app, raise_server_exceptions=False) + + response = client.get("/key", headers={"Range": header}) + assert response.status_code == 416 + + def test_non_bytes_unit_returns_416(self, store: Store) -> None: + """A Range header using a unit other than 'bytes' (e.g. 'chars=0-7') + should return 416 because only byte ranges are supported.""" + buf = cpu.buffer_prototype.buffer.from_bytes(b"some data") + sync(store.set("key", buf)) + + app = store_app(store) + client = TestClient(app) + + response = client.get("/key", headers={"Range": "chars=0-7"}) + assert response.status_code == 416 + + +class TestParseRangeHeader: + """Unit tests for _parse_range_header.""" + + def test_valid_range(self) -> None: + """'bytes=0-99' should parse into a RangeByteRequest with start=0 and + end=100 (end is exclusive, so the inclusive HTTP end is incremented).""" + from zarr.abc.store import RangeByteRequest + + result = _parse_range_header("bytes=0-99") + assert result == RangeByteRequest(start=0, end=100) + + def test_valid_suffix(self) -> None: + """'bytes=-50' should parse into a SuffixByteRequest requesting the + last 50 bytes of the resource.""" + from zarr.abc.store import SuffixByteRequest + + result = _parse_range_header("bytes=-50") + assert result == SuffixByteRequest(suffix=50) + + def test_valid_offset(self) -> None: + """'bytes=10-' should parse into an OffsetByteRequest starting at + byte 10 and reading to the end of the resource.""" + from zarr.abc.store import OffsetByteRequest + + result = _parse_range_header("bytes=10-") + assert result == OffsetByteRequest(offset=10) + + def test_non_bytes_unit(self) -> None: + """A Range header with a non-'bytes' unit should return None.""" + assert _parse_range_header("chars=0-7") is None + + def test_garbage_values(self) -> None: + """Non-numeric values in the byte range should return None instead + of raising a ValueError.""" + assert _parse_range_header("bytes=abc-def") is None + + def test_multi_range(self) -> None: + """Multi-range requests (e.g. bytes=0-7,10-20) are not supported and + should return None.""" + assert _parse_range_header("bytes=0-7,10-20") is None + + def test_empty_spec(self) -> None: + """A Range header with no range specifier after 'bytes=' should + return None.""" + assert _parse_range_header("bytes=") is None + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestWriteViaPut: + """store_app and node_app can be configured to accept PUT writes.""" + + def test_put_writes_to_store(self, store: Store) -> None: + """A PUT request to store_app with PUT enabled should write the + request body into the store at the given key.""" + app = store_app(store, methods={"GET", "PUT"}) + client = TestClient(app) + + payload = b"hello zarr" + response = client.put("/some/key", content=payload) + assert response.status_code == 204 + + # Verify the data landed in the store. + buf = sync(store.get("some/key", cpu.buffer_prototype)) + assert buf is not None + assert buf.to_bytes() == payload + + def test_put_then_get_roundtrip(self, store: Store) -> None: + """Data written via PUT should be retrievable via a subsequent GET + at the same key.""" + app = store_app(store, methods={"GET", "PUT"}) + client = TestClient(app) + + payload = b"\x00\x01\x02\x03" + client.put("/data/blob", content=payload) + + response = client.get("/data/blob") + assert response.status_code == 200 + assert response.content == payload + + def test_put_rejected_when_not_configured(self, store: Store) -> None: + """PUT requests should return 405 Method Not Allowed when the server + is created with the default methods (GET only).""" + app = store_app(store) + client = TestClient(app) + + response = client.put("/some/key", content=b"data") + assert response.status_code == 405 + + def test_put_on_node_validates_key(self, store: Store, group_with_arrays: zarr.Group) -> None: + """PUT requests via node_app should be rejected with 404 when the + target key is not a valid zarr key (metadata or chunk).""" + app = node_app(group_with_arrays, methods={"GET", "PUT"}) + client = TestClient(app) + + response = client.put("/not_a_zarr_key.bin", content=b"data") + assert response.status_code == 404 + + def test_put_to_valid_chunk_key_succeeds(self, group_with_arrays: zarr.Group) -> None: + """PUT requests via node_app to a valid chunk key should succeed + with 204, and the written data should be retrievable via GET.""" + app = node_app(group_with_arrays, methods={"GET", "PUT"}) + client = TestClient(app) + + payload = b"\x00" * 32 + response = client.put("/regular/c/0/0", content=payload) + assert response.status_code == 204 + + # Confirm it round-trips. + get_response = client.get("/regular/c/0/0") + assert get_response.status_code == 200 + assert get_response.content == payload + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestStoreAppEdgeCases: + """Edge cases for store_app.""" + + def test_get_nonexistent_key_returns_404(self, store: Store) -> None: + """GET for a key that does not exist in the store should return 404.""" + app = store_app(store) + client = TestClient(app) + + response = client.get("/no/such/key") + assert response.status_code == 404 + + def test_empty_path_returns_404(self, store: Store) -> None: + """GET to the root path '/' (empty key) should return 404 because + an empty string is not a valid store key.""" + app = store_app(store) + client = TestClient(app) + + response = client.get("/") + assert response.status_code == 404 + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestNodeAppDirectArray: + """Serve a single array directly (not through a group).""" + + def test_serve_nested_array_directly(self, store: Store) -> None: + """When node_app is given a nested array (not a group), requests + should use keys relative to that array's path. Metadata and in-bounds + chunks should return 200, and out-of-bounds chunks should return 404.""" + root = zarr.open_group(store, mode="w") + arr = zarr.create_array( + root.store_path / "sub/nested", + shape=(4,), + chunks=(2,), + dtype="f8", + ) + arr[:] = np.arange(4, dtype="f8") + + # Serve the array directly — its prefix is "sub/nested". + app = node_app(arr) + client = TestClient(app) + + # Metadata should be accessible at the array root. + response = client.get("/zarr.json") + assert response.status_code == 200 + + # Chunk keys are relative to the array. + response = client.get("/c/0") + assert response.status_code == 200 + + response = client.get("/c/1") + assert response.status_code == 200 + + # Out of bounds. + response = client.get("/c/99") + assert response.status_code == 404 + + def test_serve_root_array(self, store: Store) -> None: + """When node_app is given an array stored at the root of a store + (empty prefix), metadata and chunk keys should be accessible at + their natural paths.""" + arr = zarr.create_array( + store, + shape=(6,), + chunks=(3,), + dtype="i4", + ) + arr[:] = np.arange(6, dtype="i4") + + # Root-level array has prefix = "". + app = node_app(arr) + client = TestClient(app) + + response = client.get("/zarr.json") + assert response.status_code == 200 + + response = client.get("/c/0") + assert response.status_code == 200 + + response = client.get("/c/1") + assert response.status_code == 200 + + response = client.get("/c/2") + assert response.status_code == 404 + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestContentType: + """Responses should have the correct Content-Type.""" + + def test_metadata_has_json_content_type(self, group_with_arrays: zarr.Group) -> None: + """Zarr metadata files (zarr.json) should be served with + Content-Type: application/json.""" + app = node_app(group_with_arrays) + client = TestClient(app) + + response = client.get("/zarr.json") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + + def test_chunk_has_octet_stream_content_type(self, group_with_arrays: zarr.Group) -> None: + """Chunk data should be served with Content-Type: application/octet-stream + since it is binary data.""" + arr = group_with_arrays["regular"] + assert isinstance(arr, zarr.Array) + arr[:] = np.ones((4, 4)) + + app = node_app(group_with_arrays) + client = TestClient(app) + + response = client.get("/regular/c/0/0") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/octet-stream" + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestCorsMiddleware: + """CORS middleware should add the expected headers.""" + + def test_cors_headers_present(self, store: Store) -> None: + """When cors_options are provided, responses should include the + Access-Control-Allow-Origin header matching the request origin.""" + buf = cpu.buffer_prototype.buffer.from_bytes(b"data") + sync(store.set("key", buf)) + + cors = CorsOptions(allow_origins=["https://example.com"], allow_methods=["GET"]) + app = store_app(store, cors_options=cors) + client = TestClient(app) + + response = client.get("/key", headers={"Origin": "https://example.com"}) + assert response.status_code == 200 + assert response.headers["access-control-allow-origin"] == "https://example.com" + + def test_cors_preflight(self, store: Store) -> None: + """CORS preflight OPTIONS requests should return 200 with the + Access-Control-Allow-Origin header when CORS is configured.""" + cors = CorsOptions(allow_origins=["*"], allow_methods=["GET", "PUT"]) + app = store_app(store, methods={"GET", "PUT"}, cors_options=cors) + client = TestClient(app) + + response = client.options( + "/any/path", + headers={ + "Origin": "https://example.com", + "Access-Control-Request-Method": "PUT", + }, + ) + assert response.status_code == 200 + assert "access-control-allow-origin" in response.headers + + def test_no_cors_headers_without_option(self, store: Store) -> None: + """When no cors_options are provided, responses should not include + any CORS headers, even if the request includes an Origin header.""" + buf = cpu.buffer_prototype.buffer.from_bytes(b"data") + sync(store.set("key", buf)) + + app = store_app(store) + client = TestClient(app) + + response = client.get("/key", headers={"Origin": "https://example.com"}) + assert response.status_code == 200 + assert "access-control-allow-origin" not in response.headers + + +def _metadata_key(zarr_format: ZarrFormat) -> str: + """Return the metadata key for the given zarr format.""" + return "zarr.json" if zarr_format == 3 else ".zarray" + + +def _chunk_key(zarr_format: ZarrFormat, coords: str) -> str: + """Return a chunk key for the given format. + + *coords* is a dot-separated string like ``"0.0"``. For v3 this becomes + ``"c/0/0"``; for v2 it is returned unchanged. + """ + if zarr_format == 3: + return "c/" + coords.replace(".", "/") + return coords + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestNodeAppV2AndV3: + """Test node_app with both v2 and v3 arrays side by side.""" + + def test_metadata_accessible(self, store: Store, zarr_format: ZarrFormat) -> None: + """The format-appropriate metadata key should be served with 200.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + app = node_app(arr) + client = TestClient(app) + + response = client.get(f"/{_metadata_key(zarr_format)}") + assert response.status_code == 200 + + def test_chunk_accessible(self, store: Store, zarr_format: ZarrFormat) -> None: + """An in-bounds chunk key should be served with 200 for both formats.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + arr[:] = np.ones(4) + + app = node_app(arr) + client = TestClient(app) + + response = client.get(f"/{_chunk_key(zarr_format, '0')}") + assert response.status_code == 200 + + def test_out_of_bounds_chunk_returns_404(self, store: Store, zarr_format: ZarrFormat) -> None: + """An out-of-bounds chunk key should return 404 for both formats.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + arr[:] = np.ones(4) + + app = node_app(arr) + client = TestClient(app) + + response = client.get(f"/{_chunk_key(zarr_format, '99')}") + assert response.status_code == 404 + + def test_non_zarr_key_returns_404(self, store: Store, zarr_format: ZarrFormat) -> None: + """A non-zarr key should return 404 regardless of format.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + non_zarr_buf = cpu.buffer_prototype.buffer.from_bytes(b"secret") + sync(store.set("secret.txt", non_zarr_buf)) + + app = node_app(arr) + client = TestClient(app) + + response = client.get("/secret.txt") + assert response.status_code == 404 + + def test_data_roundtrip(self, store: Store, zarr_format: ZarrFormat) -> None: + """Data written to an array should be readable via store_app for + both formats.""" + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8", zarr_format=zarr_format) + arr[:] = np.arange(4, dtype="f8") + + app = store_app(store) + client = TestClient(app) + + # Metadata should be accessible. + response = client.get(f"/{_metadata_key(zarr_format)}") + assert response.status_code == 200 + + # First chunk should be accessible. + response = client.get(f"/{_chunk_key(zarr_format, '0')}") + assert response.status_code == 200 + assert len(response.content) > 0 + + +def _get_free_port() -> int: + """Return an unused TCP port on localhost.""" + import socket + + with socket.socket() as s: + s.bind(("127.0.0.1", 0)) + port: int = s.getsockname()[1] + return port + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +class TestServeBackground: + """Test serve_store and serve_node with background=True.""" + + def test_serve_store_background(self, store: Store) -> None: + """serve_store(background=True) should return a BackgroundServer + that responds to HTTP requests and can be used as a context manager.""" + import httpx + + from zarr.experimental.serve import serve_store + + buf = cpu.buffer_prototype.buffer.from_bytes(b"hello") + sync(store.set("key", buf)) + + port = _get_free_port() + with serve_store(store, host="127.0.0.1", port=port, background=True) as server: + assert server.host == "127.0.0.1" + assert server.port == port + assert server.url == f"http://127.0.0.1:{port}" + + response = httpx.get(f"{server.url}/key") + assert response.status_code == 200 + assert response.content == b"hello" + + def test_serve_node_background(self, store: Store) -> None: + """serve_node(background=True) should return a BackgroundServer + that responds to HTTP requests and can be used as a context manager.""" + import httpx + + from zarr.experimental.serve import serve_node + + arr = zarr.create_array(store, shape=(4,), chunks=(2,), dtype="f8") + arr[:] = np.arange(4, dtype="f8") + + port = _get_free_port() + with serve_node(arr, host="127.0.0.1", port=port, background=True) as server: + response = httpx.get(f"{server.url}/zarr.json") + assert response.status_code == 200