From 5108fdb87bc93e456e0b516fb36f9de2ad17368e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 04:17:31 +0000 Subject: [PATCH 1/4] Initial plan From aaef2fdedf49da62b377e451c2bddc99b1d14f6c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 04:24:22 +0000 Subject: [PATCH 2/4] Fix umap_embedding edge case: return early when fewer than 2 non-constant rows exist Co-authored-by: lappalainenj <34949352+lappalainenj@users.noreply.github.com> --- flyvis/analysis/clustering.py | 22 +++++++++++------ tests/test_clustering.py | 46 +++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 tests/test_clustering.py diff --git a/flyvis/analysis/clustering.py b/flyvis/analysis/clustering.py index e0011cc..208c467 100644 --- a/flyvis/analysis/clustering.py +++ b/flyvis/analysis/clustering.py @@ -456,9 +456,11 @@ def umap_embedding( Returns: A tuple containing: - - embedding: The UMAP embedding. - - mask: Boolean mask for valid samples. - - reducer: The fitted UMAP object. + - embedding: The UMAP embedding (n_samples, n_components). May be NaN + if insufficient data. + - mask: Boolean mask (length n_samples) showing which rows had nonzero + variance and were connected. + - reducer: The fitted UMAP object or None if insufficient data. Raises: ValueError: If n_components is too large relative to sample size. @@ -481,10 +483,16 @@ def umap_embedding( X = X.reshape(X.shape[0], -1) logging.info("reshaped X from %s to %s", shape, X.shape) - embedding = np.ones([X.shape[0], n_components]) * np.nan - # umap doesn't like contant rows + n_samples = X.shape[0] + embedding = np.ones([n_samples, n_components]) * np.nan + # umap doesn't like constant rows mask = ~np.isclose(X.std(axis=1), 0) - X = X[mask] + X_nonconst = X[mask] + + # If fewer than 2 rows remain, skip UMAP and return embedding of NaNs. + if X_nonconst.shape[0] < 2: + return embedding, mask, None + reducer = UMAP( n_neighbors=n_neighbors, min_dist=min_dist, @@ -495,7 +503,7 @@ def umap_embedding( n_epochs=n_epochs, **kwargs, ) - _embedding = reducer.fit_transform(X) + _embedding = reducer.fit_transform(X_nonconst) # gaussian mixture doesn't like nans through disconnected vertices in umap connected_vertices_mask = ~disconnected_vertices(reducer) diff --git a/tests/test_clustering.py b/tests/test_clustering.py new file mode 100644 index 0000000..0b33143 --- /dev/null +++ b/tests/test_clustering.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest + +from flyvis.analysis.clustering import umap_embedding + + +def test_umap_embedding_single_nonzero_variance_row(): + """Test that umap_embedding handles the edge case where only one row has + nonzero variance (all others are constant). UMAP should not be fitted and + the function should return NaN embedding with None reducer.""" + rng = np.random.default_rng(0) + # One row with variance, four constant rows + X = np.zeros((5, 10)) + X[2] = rng.random(10) + + embedding, mask, reducer = umap_embedding(X) + + assert reducer is None + assert np.all(np.isnan(embedding)) + # Only the one non-constant row should be True in the mask + expected_mask = np.array([False, False, True, False, False]) + np.testing.assert_array_equal(mask, expected_mask) + + +def test_umap_embedding_all_zero_variance_rows(): + """Test that umap_embedding handles all-constant rows gracefully.""" + X = np.ones((5, 10)) + + embedding, mask, reducer = umap_embedding(X) + + assert reducer is None + assert np.all(np.isnan(embedding)) + assert not np.any(mask) + + +def test_umap_embedding_returns_none_reducer_when_insufficient_data(): + """Test that reducer is None when fewer than 2 rows have nonzero variance.""" + X = np.zeros((4, 8)) + # Only one non-constant row + X[0] = np.arange(8, dtype=float) + + embedding, mask, reducer = umap_embedding(X) + + assert reducer is None + assert embedding.shape == (4, 2) + assert np.all(np.isnan(embedding)) From 0e59f8eac33c138ab80817add4098aa57af8927d Mon Sep 17 00:00:00 2001 From: Janne Lappalainen <34949352+lappalainenj@users.noreply.github.com> Date: Fri, 27 Feb 2026 12:25:25 -0800 Subject: [PATCH 3/4] Update tests/test_clustering.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_clustering.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_clustering.py b/tests/test_clustering.py index 0b33143..051b6d1 100644 --- a/tests/test_clustering.py +++ b/tests/test_clustering.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from flyvis.analysis.clustering import umap_embedding From 512596e59a7b7b75564f2a9e0e4d4a675f7e0e77 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 27 Feb 2026 20:30:16 +0000 Subject: [PATCH 4/4] Fix return type annotation: use Optional[UMAP] and clarify mask semantics in docstring Co-authored-by: lappalainenj <34949352+lappalainenj@users.noreply.github.com> --- flyvis/analysis/clustering.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/flyvis/analysis/clustering.py b/flyvis/analysis/clustering.py index 208c467..c7fdcbe 100644 --- a/flyvis/analysis/clustering.py +++ b/flyvis/analysis/clustering.py @@ -439,7 +439,7 @@ def umap_embedding( metric: str = "correlation", n_epochs: int = 1500, **kwargs, -) -> Tuple[np.ndarray, np.ndarray, UMAP]: +) -> Tuple[np.ndarray, np.ndarray, Optional[UMAP]]: """ Perform UMAP embedding on input data. @@ -458,9 +458,12 @@ def umap_embedding( A tuple containing: - embedding: The UMAP embedding (n_samples, n_components). May be NaN if insufficient data. - - mask: Boolean mask (length n_samples) showing which rows had nonzero - variance and were connected. - - reducer: The fitted UMAP object or None if insufficient data. + - mask: Boolean mask (length n_samples). When reducer is not None, + True indicates rows with nonzero variance that were also connected + in the UMAP graph. When reducer is None (insufficient data), True + indicates only rows with nonzero variance. + - reducer: The fitted UMAP object or None if fewer than 2 rows had + nonzero variance. Raises: ValueError: If n_components is too large relative to sample size.