diff --git a/mapillary_tools/constants.py b/mapillary_tools/constants.py index bf91e3a6..cab9e9b1 100644 --- a/mapillary_tools/constants.py +++ b/mapillary_tools/constants.py @@ -143,10 +143,10 @@ def _parse_scaled_integers( ) # Zig-zag detection parameters ZIGZAG_WINDOW_SIZE = int(os.getenv(_ENV_PREFIX + "ZIGZAG_WINDOW_SIZE", 5)) -ZIGZAG_BACKTRACK_THRESHOLD = float( - os.getenv(_ENV_PREFIX + "ZIGZAG_BACKTRACK_THRESHOLD", 0.8) +ZIGZAG_DEVIATION_THRESHOLD = float( + os.getenv(_ENV_PREFIX + "ZIGZAG_DEVIATION_THRESHOLD", 0.8) ) -ZIGZAG_MIN_BACKTRACKS = int(os.getenv(_ENV_PREFIX + "ZIGZAG_MIN_BACKTRACKS", 1)) +ZIGZAG_MIN_DEVIATIONS = int(os.getenv(_ENV_PREFIX + "ZIGZAG_MIN_DEVIATIONS", 1)) ZIGZAG_MIN_DISTANCE = float(os.getenv(_ENV_PREFIX + "ZIGZAG_MIN_DISTANCE", 30)) diff --git a/mapillary_tools/process_sequence_properties.py b/mapillary_tools/process_sequence_properties.py index 80510300..acc801c0 100644 --- a/mapillary_tools/process_sequence_properties.py +++ b/mapillary_tools/process_sequence_properties.py @@ -449,25 +449,27 @@ def _check_sequences_duplication( def _check_sequences_zigzag( input_sequences: T.Sequence[PointSequence], window_size: int = 5, - backtrack_threshold: float = 0.8, - min_backtracks: int = 1, + deviation_threshold: float = 0.8, + min_deviations: int = 1, min_distance: float = 50.0, ) -> tuple[list[PointSequence], list[types.ErrorMetadata]]: """ Check for zig-zag GPS patterns where images jump back and forth between locations. - Detects spatial backtracking - when an image returns closer to earlier images + Detects spatial deviations - when an image returns closer to earlier images than the previous image was. This catches zig-zag patterns where the sequence jumps to a different location and then returns. + Only marks deviation points as errors, keeping the main path intact. + Args: input_sequences: List of image sequences to check - window_size: Number of images to look back when checking for backtracking - backtrack_threshold: Ratio threshold - if dist_curr < dist_prev * threshold, - it's considered backtracking - min_backtracks: Minimum number of backtrack events to fail the sequence + window_size: Number of images to look back when checking for deviations + deviation_threshold: Ratio threshold - if dist_curr < dist_prev * threshold, + it's considered a deviation + min_deviations: Minimum number of deviation events to mark errors min_distance: Minimum distance (in meters) between consecutive images (prev to curr) - to consider for backtracking. This filters out small-scale + to consider for deviation detection. This filters out small-scale movements like U-turns. """ output_sequences: list[PointSequence] = [] @@ -479,8 +481,8 @@ def _check_sequences_zigzag( output_sequences.append(sequence) continue - backtrack_count = 0 - backtrack_locations: list[str] = [] + # Track which indices are detected as deviations + deviation_indices: set[int] = set() for i in range(window_size, len(sequence)): curr = sequence[i] @@ -496,36 +498,70 @@ def _check_sequences_zigzag( # Distance from previous image to reference dist_prev = geo.gps_distance((prev.lat, prev.lon), (ref.lat, ref.lon)) - # Backtracking: current is closer to reference than previous was + # Deviation: current is closer to reference than previous was # Only check if the jump between prev and curr is above min_distance if ( dist_prev_curr > min_distance - and dist_curr < dist_prev * backtrack_threshold + and dist_curr < dist_prev * deviation_threshold ): - backtrack_count += 1 - backtrack_locations.append(curr.filename.name) LOG.debug( - f"Potential zigzag at {curr.filename.name}: " - f"dist_curr={dist_curr:.1f}m < dist_prev={dist_prev:.1f}m * {backtrack_threshold}, " + f"Zigzag detected at {prev.filename.name}: " + f"dist_curr={dist_curr:.1f}m < dist_prev={dist_prev:.1f}m * {deviation_threshold}, " f"jump={dist_prev_curr:.1f}m" ) - if backtrack_count >= min_backtracks: - locations_preview = ", ".join(backtrack_locations[:5]) - if len(backtrack_locations) > 5: - locations_preview += "..." + # Mark prev as deviation (it's the point that jumped away) + deviation_indices.add(i - 1) - ex = exceptions.MapillaryZigZagError( - f"GPS zig-zag pattern detected: {backtrack_count} backtrack events " - f"found (at: {locations_preview})" - ) - LOG.error(f"{_sequence_name(sequence)}: {ex}") - for image in sequence: + # Walk backwards from prev to ref+1 to find more deviation points + # We're looking for deviations between prev and ref + # Use the same check as above: compare distance to ref and jump to curr + for j in range(i - 2, i - window_size, -1): # Stop at ref+1 + point_j = sequence[j] + + # Distance from j to ref + dist_j_to_ref = geo.gps_distance( + (point_j.lat, point_j.lon), (ref.lat, ref.lon) + ) + # Distance from j to curr + dist_j_to_curr = geo.gps_distance( + (point_j.lat, point_j.lon), (curr.lat, curr.lon) + ) + + # Same check as original: j is a deviation if it's farther from ref + # than curr is, and the jump from j to curr is significant + if ( + dist_j_to_curr > min_distance + and dist_curr < dist_j_to_ref * deviation_threshold + ): + deviation_indices.add(j) + LOG.debug( + f"Backwards walk: {point_j.filename.name} also marked as deviation" + ) + else: + # j is on the normal path, stop walking backwards + break + + if len(deviation_indices) >= min_deviations: + # Create errors only for deviation points + for idx in sorted(deviation_indices): + image = sequence[idx] + ex = exceptions.MapillaryZigZagError("GPS zig-zag deviation detected") + LOG.error(f"{image.filename.name}: {ex}") output_errors.append( types.describe_error_metadata( exc=ex, filename=image.filename, filetype=types.FileType.IMAGE ) ) + + # Keep non-deviation points in output sequence + non_deviation_points = [ + sequence[idx] + for idx in range(len(sequence)) + if idx not in deviation_indices + ] + if non_deviation_points: + output_sequences.append(non_deviation_points) else: output_sequences.append(sequence) @@ -839,8 +875,8 @@ def process_sequence_properties( sequences, errors = _check_sequences_zigzag( sequences, window_size=constants.ZIGZAG_WINDOW_SIZE, - backtrack_threshold=constants.ZIGZAG_BACKTRACK_THRESHOLD, - min_backtracks=constants.ZIGZAG_MIN_BACKTRACKS, + deviation_threshold=constants.ZIGZAG_DEVIATION_THRESHOLD, + min_deviations=constants.ZIGZAG_MIN_DEVIATIONS, min_distance=constants.ZIGZAG_MIN_DISTANCE, ) error_metadatas.extend(errors) diff --git a/tests/unit/test_sequence_processing.py b/tests/unit/test_sequence_processing.py index fab57130..d84ccf83 100644 --- a/tests/unit/test_sequence_processing.py +++ b/tests/unit/test_sequence_processing.py @@ -864,12 +864,15 @@ def test_zigzag_detection_straight_path(tmpdir: py.path.local): assert len(error_metadatas) == 0 -def test_zigzag_detection_backtracking(tmpdir: py.path.local): - """Test that a zig-zag pattern with backtracking is detected.""" +def test_zigzag_detection_deviation(tmpdir: py.path.local): + """Test that a zig-zag pattern with deviation is detected. + + Only the deviation points should be marked as errors, not the entire sequence. + """ # Create a sequence that moves forward then jumps back - # Images 0-4: moving forward - # Images 5-6: jump to a different location - # Images 7-9: jump back near images 0-4 (backtracking) + # Images 0-4: moving forward (normal path) + # Images 5-6: jump to a different location (deviations) + # Images 7-9: jump back near images 0-4 (normal path continues) sequence = [ # Moving forward on street 1 _make_image_metadata( @@ -887,14 +890,14 @@ def test_zigzag_detection_backtracking(tmpdir: py.path.local): _make_image_metadata( Path(tmpdir) / Path("./img4.jpg"), 1.0, 1.0004, 4, filesize=1 ), - # Jump to parallel street 2 (far away) + # Jump to parallel street 2 (far away) - these are deviations _make_image_metadata( Path(tmpdir) / Path("./img5.jpg"), 1.001, 1.0005, 5, filesize=1 ), _make_image_metadata( Path(tmpdir) / Path("./img6.jpg"), 1.001, 1.0006, 6, filesize=1 ), - # Jump back near street 1 (backtracking) + # Jump back near street 1 (normal path continues) _make_image_metadata( Path(tmpdir) / Path("./img7.jpg"), 1.0, 1.0007, 7, filesize=1 ), @@ -908,11 +911,36 @@ def test_zigzag_detection_backtracking(tmpdir: py.path.local): metadatas = psp.process_sequence_properties(sequence) error_metadatas = [d for d in metadatas if isinstance(d, types.ErrorMetadata)] + image_metadatas = [d for d in metadatas if isinstance(d, types.ImageMetadata)] + + # Zig-zag should be detected - only deviation points should be errors + zigzag_errors = [ + d + for d in error_metadatas + if isinstance(d.error, exceptions.MapillaryZigZagError) + ] + + # Exactly img5 and img6 should be errors (the deviation points) + error_filenames = {d.filename.name for d in zigzag_errors} + expected_errors = {"img5.jpg", "img6.jpg"} + assert error_filenames == expected_errors, ( + f"Expected errors: {expected_errors}, got: {error_filenames}" + ) - # Zig-zag should be detected - assert len(error_metadatas) == 10 # All images in sequence should be rejected - assert all( - isinstance(d.error, exceptions.MapillaryZigZagError) for d in error_metadatas + # Exactly img0-img4 and img7-img9 should be preserved (the normal path) + preserved_filenames = {d.filename.name for d in image_metadatas} + expected_preserved = { + "img0.jpg", + "img1.jpg", + "img2.jpg", + "img3.jpg", + "img4.jpg", + "img7.jpg", + "img8.jpg", + "img9.jpg", + } + assert preserved_filenames == expected_preserved, ( + f"Expected preserved: {expected_preserved}, got: {preserved_filenames}" ) @@ -1000,7 +1028,7 @@ def test_zigzag_detection_uturn_not_triggered(tmpdir: py.path.local): U-turns are legitimate captures where the camera goes down a street, turns around, and comes back. The min_distance threshold (50m) should - filter these out since the backtrack distances are small. + filter these out since the deviation distances are small. At the equator: 0.0001 degrees ≈ 11 meters So the distances in this test are ~11-33m, below the 50m threshold. @@ -1022,7 +1050,7 @@ def test_zigzag_detection_uturn_not_triggered(tmpdir: py.path.local): _make_image_metadata( Path(tmpdir) / Path("./img4.jpg"), 1.0, 1.0004, 4, filesize=1 ), - # U-turn - coming back (each step ~11m, total backtrack ~33m < 50m threshold) + # U-turn - coming back (each step ~11m, total deviation ~33m < 50m threshold) _make_image_metadata( Path(tmpdir) / Path("./img5.jpg"), 1.0, 1.0003, 5, filesize=1 ), @@ -1052,3 +1080,162 @@ def test_zigzag_detection_uturn_not_triggered(tmpdir: py.path.local): # U-turn should NOT trigger zig-zag detection (distances < 50m threshold) assert len(zigzag_errors) == 0 assert len(image_metadatas) > 0 + + +def test_zigzag_backwards_walk_single_deviation(tmpdir: py.path.local): + """Test backwards walk with a single deviation point. + + When only one point deviates and comes back, only that point should be marked. + """ + # 0.001 degrees ≈ 111 meters, well above 50m threshold + # Use 10 second gaps to stay under speed limit (111m / 10s = 11.1 m/s = 40 km/h) + sequence = [ + # Normal path + _make_image_metadata( + Path(tmpdir) / Path("./img0.jpg"), 1.0, 1.0000, 0, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img1.jpg"), 1.0, 1.0010, 10, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img2.jpg"), 1.0, 1.0020, 20, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img3.jpg"), 1.0, 1.0030, 30, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img4.jpg"), 1.0, 1.0040, 40, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img5.jpg"), 1.0, 1.0050, 50, filesize=1 + ), + # Single deviation - jumps far away + _make_image_metadata( + Path(tmpdir) / Path("./img6.jpg"), 1.010, 1.0060, 60, filesize=1 + ), + # Comes back to normal path + _make_image_metadata( + Path(tmpdir) / Path("./img7.jpg"), 1.0, 1.0070, 70, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img8.jpg"), 1.0, 1.0080, 80, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img9.jpg"), 1.0, 1.0090, 90, filesize=1 + ), + ] + + metadatas = psp.process_sequence_properties(sequence) + error_metadatas = [d for d in metadatas if isinstance(d, types.ErrorMetadata)] + image_metadatas = [d for d in metadatas if isinstance(d, types.ImageMetadata)] + + zigzag_errors = [ + d + for d in error_metadatas + if isinstance(d.error, exceptions.MapillaryZigZagError) + ] + + # Exactly img6 should be an error (the single deviation point) + error_filenames = {d.filename.name for d in zigzag_errors} + expected_errors = {"img6.jpg"} + assert error_filenames == expected_errors, ( + f"Expected errors: {expected_errors}, got: {error_filenames}" + ) + + # Exactly img0-img5 and img7-img9 should be preserved (the normal path) + preserved_filenames = {d.filename.name for d in image_metadatas} + expected_preserved = { + "img0.jpg", + "img1.jpg", + "img2.jpg", + "img3.jpg", + "img4.jpg", + "img5.jpg", + "img7.jpg", + "img8.jpg", + "img9.jpg", + } + assert preserved_filenames == expected_preserved, ( + f"Expected preserved: {expected_preserved}, got: {preserved_filenames}" + ) + + +def test_zigzag_backwards_walk_multiple_deviations(tmpdir: py.path.local): + """Test backwards walk with multiple consecutive deviation points. + + When multiple consecutive points deviate, the backwards walk should + identify all of them. + """ + # 0.001 degrees ≈ 111 meters, well above 50m threshold + # Use 10 second gaps to stay under speed limit (111m / 10s = 11.1 m/s = 40 km/h) + sequence = [ + # Normal path + _make_image_metadata( + Path(tmpdir) / Path("./img0.jpg"), 1.0, 1.0000, 0, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img1.jpg"), 1.0, 1.0010, 10, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img2.jpg"), 1.0, 1.0020, 20, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img3.jpg"), 1.0, 1.0030, 30, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img4.jpg"), 1.0, 1.0040, 40, filesize=1 + ), + # Multiple consecutive deviations - jump to different street + _make_image_metadata( + Path(tmpdir) / Path("./img5.jpg"), 1.010, 1.0050, 50, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img6.jpg"), 1.010, 1.0060, 60, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img7.jpg"), 1.010, 1.0070, 70, filesize=1 + ), + # Comes back to normal path + _make_image_metadata( + Path(tmpdir) / Path("./img8.jpg"), 1.0, 1.0080, 80, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img9.jpg"), 1.0, 1.0090, 90, filesize=1 + ), + _make_image_metadata( + Path(tmpdir) / Path("./img10.jpg"), 1.0, 1.0100, 100, filesize=1 + ), + ] + + metadatas = psp.process_sequence_properties(sequence) + error_metadatas = [d for d in metadatas if isinstance(d, types.ErrorMetadata)] + image_metadatas = [d for d in metadatas if isinstance(d, types.ImageMetadata)] + + zigzag_errors = [ + d + for d in error_metadatas + if isinstance(d.error, exceptions.MapillaryZigZagError) + ] + + # Exactly img5, img6, img7 should be errors (the deviation points) + error_filenames = {d.filename.name for d in zigzag_errors} + expected_errors = {"img5.jpg", "img6.jpg", "img7.jpg"} + assert error_filenames == expected_errors, ( + f"Expected errors: {expected_errors}, got: {error_filenames}" + ) + + # Exactly img0-img4 and img8-img10 should be preserved (the normal path) + preserved_filenames = {d.filename.name for d in image_metadatas} + expected_preserved = { + "img0.jpg", + "img1.jpg", + "img2.jpg", + "img3.jpg", + "img4.jpg", + "img8.jpg", + "img9.jpg", + "img10.jpg", + } + assert preserved_filenames == expected_preserved, ( + f"Expected preserved: {expected_preserved}, got: {preserved_filenames}" + )