diff --git a/src/napari_deeplabcut/_tests/core/io/test_write_routing.py b/src/napari_deeplabcut/_tests/core/io/test_write_routing.py index 00f2662a..2d09ba33 100644 --- a/src/napari_deeplabcut/_tests/core/io/test_write_routing.py +++ b/src/napari_deeplabcut/_tests/core/io/test_write_routing.py @@ -7,9 +7,9 @@ import pytest from napari_deeplabcut.config.models import AnnotationKind, DLCHeaderModel +from napari_deeplabcut.core.dataframes import drop_likelihood_columns from napari_deeplabcut.core.errors import AmbiguousSaveError, MissingProvenanceError from napari_deeplabcut.core.io import ( - _drop_likelihood_columns, _drop_likelihood_from_header, resolve_output_path_from_metadata, write_hdf, @@ -133,8 +133,8 @@ def test_drop_likelihood_before_merge_prevents_machine_likelihood_from_leaking() ) # Mimic writer behavior: strip likelihood on both sides before merge - df_old = _drop_likelihood_columns(df_old) - df_new = _drop_likelihood_columns(df_new) + df_old = drop_likelihood_columns(df_old) + df_new = drop_likelihood_columns(df_new) df_out = df_new.combine_first(df_old) @@ -169,8 +169,8 @@ def test_drop_likelihood_cleans_existing_gt_columns_too(): columns=cols_with_likelihood, ) - df_old = _drop_likelihood_columns(df_old) - df_new = _drop_likelihood_columns(df_new) + df_old = drop_likelihood_columns(df_old) + df_new = drop_likelihood_columns(df_new) df_out = df_new.combine_first(df_old) coords = df_out.columns.get_level_values("coords") @@ -188,7 +188,7 @@ def test_drop_likelihood_columns_removes_likelihood_from_empty_dataframe(): ) df = pd.DataFrame([], columns=cols, index=pd.Index([], name="image")) - out = _drop_likelihood_columns(df) + out = drop_likelihood_columns(df) assert out.empty assert "likelihood" not in out.columns.get_level_values("coords") diff --git a/src/napari_deeplabcut/_tests/core/test_conflicts.py b/src/napari_deeplabcut/_tests/core/test_conflicts.py index d8189283..3d04a727 100644 --- a/src/napari_deeplabcut/_tests/core/test_conflicts.py +++ b/src/napari_deeplabcut/_tests/core/test_conflicts.py @@ -7,11 +7,18 @@ import pytest import napari_deeplabcut.core.conflicts as conflicts_mod -import napari_deeplabcut.core.dataframes as dataframes_mod from napari_deeplabcut.config.models import AnnotationKind, DLCProjectContext from napari_deeplabcut.core.errors import AmbiguousSaveError, MissingProvenanceError +class _HeaderStub: + def __init__(self, scorer: str | None = "scorerA"): + self.scorer = scorer + + def with_scorer(self, scorer: str | None): + return _HeaderStub(scorer or self.scorer) + + def _make_points_meta( *, header_scorer: str | None = "scorerA", @@ -19,7 +26,7 @@ def _make_points_meta( io_kind=AnnotationKind.GT, root: str | None = None, ): - header = None if header_scorer is None else SimpleNamespace(scorer=header_scorer) + header = None if header_scorer is None else _HeaderStub(header_scorer) io = None if io_kind is None else SimpleNamespace(kind=io_kind) return SimpleNamespace( header=header, @@ -38,12 +45,14 @@ def _stub_validation_pipeline( props_obj=None, ctx_obj=None, df_new=None, + completed_df=None, ): """ - Stub schema validation + parse_points_metadata + form_df_from_validated. + Stub schema validation + parse_points_metadata + dataframe construction. This keeps tests focused on the routing/conflict logic rather than on - pydantic/schema correctness (which is tested elsewhere). + pydantic/schema correctness or dataframe save-scope expansion, which are + tested elsewhere. """ if attrs_obj is None: attrs_obj = SimpleNamespace( @@ -58,6 +67,8 @@ def _stub_validation_pipeline( ctx_obj = SimpleNamespace(ctx="CTX") if df_new is None: df_new = pd.DataFrame({"dummy": [1]}) + if completed_df is None: + completed_df = df_new monkeypatch.setattr( conflicts_mod.dlc_schemas.PointsLayerAttributesModel, @@ -85,17 +96,31 @@ def _stub_validation_pipeline( lambda payload: ctx_obj, ) monkeypatch.setattr( - dataframes_mod, + conflicts_mod, "form_df_from_validated", lambda ctx: df_new, ) + seen = {} + + def fake_complete_df_for_save(df, *, pts_meta, header): + seen["complete_df_for_save"] = (df, pts_meta, header) + return completed_df + + monkeypatch.setattr( + conflicts_mod, + "complete_df_for_save", + fake_complete_df_for_save, + ) + return SimpleNamespace( attrs_obj=attrs_obj, points_obj=points_obj, props_obj=props_obj, ctx_obj=ctx_obj, df_new=df_new, + completed_df=completed_df, + seen=seen, ) @@ -212,11 +237,19 @@ def test_compute_overwrite_report_returns_report_for_existing_gt_file_with_confl old_df = pd.DataFrame({"old": [1]}) raw_new_df = pd.DataFrame({"new": [1]}) promoted_df = pd.DataFrame({"promoted": [1]}) + completed_df = pd.DataFrame({"completed": [1]}) + key_conflict = object() + deletion_conflict = object() report = SimpleNamespace(has_conflicts=True, marker="REPORT") pts_meta = _make_points_meta(io_kind=AnnotationKind.GT) - _stub_validation_pipeline(monkeypatch, pts_meta=pts_meta, df_new=raw_new_df) + stub = _stub_validation_pipeline( + monkeypatch, + pts_meta=pts_meta, + df_new=raw_new_df, + completed_df=completed_df, + ) monkeypatch.setattr( conflicts_mod, @@ -238,14 +271,19 @@ def fake_keypoint_conflicts(df_old, df_new): seen["keypoint_conflicts"] = (df_old, df_new) return key_conflict - def fake_build_report(conflicts, *, layer_name, destination_path): - seen["build_report"] = (conflicts, layer_name, destination_path) + def fake_keypoint_deletions(df_old, df_new): + seen["keypoint_deletions"] = (df_old, df_new) + return deletion_conflict + + def fake_build_report(conflicts, *, deletion_conflict, layer_name, destination_path): + seen["build_report"] = (conflicts, deletion_conflict, layer_name, destination_path) return report monkeypatch.setattr(conflicts_mod, "set_df_scorer", fake_set_df_scorer) monkeypatch.setattr(pd, "read_hdf", fake_read_hdf) - monkeypatch.setattr(dataframes_mod, "keypoint_conflicts", fake_keypoint_conflicts) - monkeypatch.setattr(dataframes_mod, "build_overwrite_conflict_report", fake_build_report) + monkeypatch.setattr(conflicts_mod, "keypoint_conflicts", fake_keypoint_conflicts) + monkeypatch.setattr(conflicts_mod, "keypoint_deletions", fake_keypoint_deletions) + monkeypatch.setattr(conflicts_mod, "build_overwrite_conflict_report", fake_build_report) result = conflicts_mod.compute_overwrite_report_for_points_save( data=[[0, 1, 2]], @@ -254,10 +292,18 @@ def fake_build_report(conflicts, *, layer_name, destination_path): assert result is report assert seen["set_df_scorer"] == (raw_new_df, "target_scorer") + + completed_input_df, completed_pts_meta, completed_header = stub.seen["complete_df_for_save"] + assert completed_input_df is promoted_df + assert completed_pts_meta is pts_meta + assert completed_header.scorer == "target_scorer" + assert seen["read_hdf_calls"] == [(out, "df_with_missing")] - assert seen["keypoint_conflicts"] == (old_df, promoted_df) + assert seen["keypoint_conflicts"] == (old_df, completed_df) + assert seen["keypoint_deletions"] == (old_df, completed_df) assert seen["build_report"] == ( key_conflict, + deletion_conflict, "my-points-layer", str(out), ) @@ -269,10 +315,16 @@ def test_compute_overwrite_report_returns_none_when_report_has_no_conflicts(monk old_df = pd.DataFrame({"old": [1]}) new_df = pd.DataFrame({"new": [1]}) + completed_df = pd.DataFrame({"completed": [1]}) report = SimpleNamespace(has_conflicts=False) pts_meta = _make_points_meta(io_kind=AnnotationKind.GT) - _stub_validation_pipeline(monkeypatch, pts_meta=pts_meta, df_new=new_df) + _stub_validation_pipeline( + monkeypatch, + pts_meta=pts_meta, + df_new=new_df, + completed_df=completed_df, + ) monkeypatch.setattr( conflicts_mod, @@ -280,11 +332,12 @@ def test_compute_overwrite_report_returns_none_when_report_has_no_conflicts(monk lambda attributes: (str(out), None, AnnotationKind.GT), ) monkeypatch.setattr(pd, "read_hdf", lambda path, key=None: old_df) - monkeypatch.setattr(dataframes_mod, "keypoint_conflicts", lambda df_old, df_new: "conflicts") + monkeypatch.setattr(conflicts_mod, "keypoint_conflicts", lambda df_old, df_new: "conflicts") + monkeypatch.setattr(conflicts_mod, "keypoint_deletions", lambda df_old, df_new: "deletions") monkeypatch.setattr( - dataframes_mod, + conflicts_mod, "build_overwrite_conflict_report", - lambda conflicts, *, layer_name, destination_path: report, + lambda conflicts, *, deletion_conflict, layer_name, destination_path: report, ) result = conflicts_mod.compute_overwrite_report_for_points_save( @@ -301,10 +354,16 @@ def test_compute_overwrite_report_falls_back_when_keyed_hdf_read_fails(monkeypat old_df = pd.DataFrame({"old": [1]}) new_df = pd.DataFrame({"new": [1]}) + completed_df = pd.DataFrame({"completed": [1]}) report = SimpleNamespace(has_conflicts=True) pts_meta = _make_points_meta(io_kind=AnnotationKind.GT) - _stub_validation_pipeline(monkeypatch, pts_meta=pts_meta, df_new=new_df) + _stub_validation_pipeline( + monkeypatch, + pts_meta=pts_meta, + df_new=new_df, + completed_df=completed_df, + ) monkeypatch.setattr( conflicts_mod, @@ -321,11 +380,12 @@ def fake_read_hdf(path, key=None): return old_df monkeypatch.setattr(pd, "read_hdf", fake_read_hdf) - monkeypatch.setattr(dataframes_mod, "keypoint_conflicts", lambda df_old, df_new: "conflicts") + monkeypatch.setattr(conflicts_mod, "keypoint_conflicts", lambda df_old, df_new: "conflicts") + monkeypatch.setattr(conflicts_mod, "keypoint_deletions", lambda df_old, df_new: "deletions") monkeypatch.setattr( - dataframes_mod, + conflicts_mod, "build_overwrite_conflict_report", - lambda conflicts, *, layer_name, destination_path: report, + lambda conflicts, *, deletion_conflict, layer_name, destination_path: report, ) result = conflicts_mod.compute_overwrite_report_for_points_save( @@ -340,6 +400,57 @@ def fake_read_hdf(path, key=None): ] +def test_compute_overwrite_report_passes_deletion_conflicts_to_report_builder(monkeypatch, tmp_path): + out = tmp_path / "CollectedData_target.h5" + out.touch() + + old_df = pd.DataFrame({"old": [1]}) + new_df = pd.DataFrame({"new": [1]}) + completed_df = pd.DataFrame({"completed": [1]}) + + key_conflict = pd.DataFrame([[False]]) + deletion_conflict = pd.DataFrame([[True]]) + report = SimpleNamespace(has_conflicts=True, n_deletions=1) + + pts_meta = _make_points_meta(io_kind=AnnotationKind.GT) + _stub_validation_pipeline( + monkeypatch, + pts_meta=pts_meta, + df_new=new_df, + completed_df=completed_df, + ) + + monkeypatch.setattr( + conflicts_mod, + "resolve_output_path_from_metadata", + lambda attributes: (str(out), None, AnnotationKind.GT), + ) + monkeypatch.setattr(pd, "read_hdf", lambda path, key=None: old_df) + monkeypatch.setattr(conflicts_mod, "keypoint_conflicts", lambda df_old, df_new: key_conflict) + monkeypatch.setattr(conflicts_mod, "keypoint_deletions", lambda df_old, df_new: deletion_conflict) + + seen = {} + + def fake_build_report(conflicts, *, deletion_conflict, layer_name, destination_path): + seen["args"] = (conflicts, deletion_conflict, layer_name, destination_path) + return report + + monkeypatch.setattr(conflicts_mod, "build_overwrite_conflict_report", fake_build_report) + + result = conflicts_mod.compute_overwrite_report_for_points_save( + data=[[0, 1, 2]], + attributes={"name": "my-points-layer"}, + ) + + assert result is report + assert seen["args"] == ( + key_conflict, + deletion_conflict, + "my-points-layer", + str(out), + ) + + def test_compute_overwrite_report_raises_when_gt_fallback_has_no_root_and_no_dataset_dir(monkeypatch): pts_meta = _make_points_meta(io_kind=AnnotationKind.GT, root=None) _stub_validation_pipeline(monkeypatch, pts_meta=pts_meta) @@ -360,3 +471,118 @@ def test_compute_overwrite_report_raises_when_gt_fallback_has_no_root_and_no_dat data=[[0, 1, 2]], attributes={"name": "points"}, ) + + +def test_compute_overwrite_report_for_extracted_labels_row_returns_none_when_output_missing(tmp_path): + out = tmp_path / "machinelabels-iter0.h5" + df_new = pd.DataFrame({"new": [1]}) + + result = conflicts_mod.compute_overwrite_report_for_extracted_labels_row(out, df_new) + + assert result is None + + +def test_compute_overwrite_report_for_extracted_labels_row_returns_report(monkeypatch, tmp_path): + out = tmp_path / "machinelabels-iter0.h5" + out.touch() + + old_df = pd.DataFrame({"old": [1]}) + df_new = pd.DataFrame({"new": [1]}) + key_conflict = object() + report = SimpleNamespace(has_conflicts=True) + + seen = {} + + def fake_read_hdf(path, key=None): + seen.setdefault("read_hdf_calls", []).append((Path(path), key)) + return old_df + + def fake_keypoint_conflicts(df_old, df_new_arg): + seen["keypoint_conflicts"] = (df_old, df_new_arg) + return key_conflict + + def fake_build_report(conflicts, *, layer_name, destination_path): + seen["build_report"] = (conflicts, layer_name, destination_path) + return report + + monkeypatch.setattr(pd, "read_hdf", fake_read_hdf) + monkeypatch.setattr(conflicts_mod, "keypoint_conflicts", fake_keypoint_conflicts) + monkeypatch.setattr(conflicts_mod, "build_overwrite_conflict_report", fake_build_report) + + result = conflicts_mod.compute_overwrite_report_for_extracted_labels_row( + out, + df_new, + layer_name="extracted-row", + ) + + assert result is report + assert seen["read_hdf_calls"] == [(out, "df_with_missing")] + assert seen["keypoint_conflicts"] == (old_df, df_new) + assert seen["build_report"] == (key_conflict, "extracted-row", str(out)) + + +def test_compute_overwrite_report_for_extracted_labels_row_returns_none_when_report_has_no_conflicts( + monkeypatch, + tmp_path, +): + out = tmp_path / "machinelabels-iter0.h5" + out.touch() + + old_df = pd.DataFrame({"old": [1]}) + df_new = pd.DataFrame({"new": [1]}) + report = SimpleNamespace(has_conflicts=False) + + monkeypatch.setattr(pd, "read_hdf", lambda path, key=None: old_df) + monkeypatch.setattr(conflicts_mod, "keypoint_conflicts", lambda df_old, df_new_arg: "conflicts") + monkeypatch.setattr( + conflicts_mod, + "build_overwrite_conflict_report", + lambda conflicts, *, layer_name, destination_path: report, + ) + + result = conflicts_mod.compute_overwrite_report_for_extracted_labels_row( + out, + df_new, + layer_name="extracted-row", + ) + + assert result is None + + +def test_compute_overwrite_report_for_extracted_labels_row_falls_back_when_keyed_hdf_read_fails( + monkeypatch, + tmp_path, +): + out = tmp_path / "machinelabels-iter0.h5" + out.touch() + + old_df = pd.DataFrame({"old": [1]}) + df_new = pd.DataFrame({"new": [1]}) + report = SimpleNamespace(has_conflicts=True) + calls = [] + + def fake_read_hdf(path, key=None): + calls.append((Path(path), key)) + if key == "df_with_missing": + raise ValueError("missing key") + return old_df + + monkeypatch.setattr(pd, "read_hdf", fake_read_hdf) + monkeypatch.setattr(conflicts_mod, "keypoint_conflicts", lambda df_old, df_new_arg: "conflicts") + monkeypatch.setattr( + conflicts_mod, + "build_overwrite_conflict_report", + lambda conflicts, *, layer_name, destination_path: report, + ) + + result = conflicts_mod.compute_overwrite_report_for_extracted_labels_row( + out, + df_new, + layer_name="extracted-row", + ) + + assert result is report + assert calls == [ + (out, "df_with_missing"), + (out, None), + ] diff --git a/src/napari_deeplabcut/_tests/core/test_dataframes.py b/src/napari_deeplabcut/_tests/core/test_dataframes.py index e7aac883..3339b460 100644 --- a/src/napari_deeplabcut/_tests/core/test_dataframes.py +++ b/src/napari_deeplabcut/_tests/core/test_dataframes.py @@ -7,12 +7,17 @@ from napari_deeplabcut.config.models import DLCHeaderModel, PointsMetadata from napari_deeplabcut.core.dataframes import ( align_old_new, + build_overwrite_conflict_report, + complete_df_for_save, + drop_likelihood_columns, form_df_from_validated, guarantee_multiindex_rows, harmonize_keypoint_column_index, harmonize_keypoint_row_index, keypoint_conflicts, + keypoint_deletions, merge_multiple_scorers, + merge_save_df, ) from napari_deeplabcut.core.schemas import PointsWriteInputModel @@ -435,3 +440,591 @@ def test_form_df_from_validated_raises_when_header_reindex_drops_all_finite_poin with pytest.raises(RuntimeError, match="Writer produced no finite coordinates"): _ = form_df_from_validated(ctx) + + +# ----------------------------------------------------------------------------- +# 8) complete_df_for_save: expand sparse napari data to full editable save scope +# ----------------------------------------------------------------------------- + + +def test_complete_df_for_save_expands_all_paths_and_bodyparts_with_nan_for_missing(): + header = DLCHeaderModel( + columns=cols_4level( + scorer="S", + individuals=("animal1",), + bodyparts=("nose", "tail"), + coords=("x", "y"), + ) + ) + pts_meta = PointsMetadata( + header=header, + paths=[ + "labeled-data/test/img000.png", + "labeled-data/test/img001.png", + ], + ) + + # Sparse df contains only img000 / nose. + sparse = pd.DataFrame( + [[10.0, 20.0]], + columns=cols_4level( + scorer="S", + individuals=("animal1",), + bodyparts=("nose",), + coords=("x", "y"), + ), + index=["labeled-data/test/img000.png"], + ) + + out = complete_df_for_save( + sparse, + pts_meta=pts_meta, + header=header, + ) + + assert isinstance(out.index, pd.MultiIndex) + assert out.index.to_list() == [ + ("labeled-data", "test", "img000.png"), + ("labeled-data", "test", "img001.png"), + ] + + assert out.columns.equals( + cols_4level( + scorer="S", + individuals=("animal1",), + bodyparts=("nose", "tail"), + coords=("x", "y"), + ) + ) + + row0 = ("labeled-data", "test", "img000.png") + row1 = ("labeled-data", "test", "img001.png") + + assert out.loc[row0, ("S", "animal1", "nose", "x")] == 10.0 + assert out.loc[row0, ("S", "animal1", "nose", "y")] == 20.0 + + # Missing bodypart in loaded frame becomes explicit NaN. + assert pd.isna(out.loc[row0, ("S", "animal1", "tail", "x")]) + assert pd.isna(out.loc[row0, ("S", "animal1", "tail", "y")]) + + # Entire second frame is in save scope but has no points. + assert pd.isna(out.loc[row1, ("S", "animal1", "nose", "x")]) + assert pd.isna(out.loc[row1, ("S", "animal1", "tail", "y")]) + + +def test_complete_df_for_save_drops_likelihood_columns(): + header = DLCHeaderModel( + columns=cols_4level( + scorer="S", + individuals=("animal1",), + bodyparts=("nose",), + coords=("x", "y", "likelihood"), + ) + ) + pts_meta = PointsMetadata( + header=header, + paths=["labeled-data/test/img000.png"], + ) + + sparse = pd.DataFrame( + [[10.0, 20.0, 0.7]], + columns=cols_4level( + scorer="S", + individuals=("animal1",), + bodyparts=("nose",), + coords=("x", "y", "likelihood"), + ), + index=["labeled-data/test/img000.png"], + ) + + out = complete_df_for_save( + sparse, + pts_meta=pts_meta, + header=header, + ) + + assert "likelihood" not in set(out.columns.get_level_values("coords").astype(str)) + assert out.columns.to_list() == [ + ("S", "animal1", "nose", "x"), + ("S", "animal1", "nose", "y"), + ] + + +def test_complete_df_for_save_without_paths_preserves_existing_rows_and_completes_columns(): + header = DLCHeaderModel( + columns=cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose", "tail"), + coords=("x", "y"), + ) + ) + pts_meta = PointsMetadata(header=header, paths=None) + + sparse = pd.DataFrame( + [[10.0, 20.0]], + columns=cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ), + index=["img000.png"], + ) + + out = complete_df_for_save( + sparse, + pts_meta=pts_meta, + header=header, + ) + + assert out.index.to_list() == [("img000.png",)] + assert ("S", "", "tail", "x") in out.columns + assert pd.isna(out.loc[("img000.png",), ("S", "", "tail", "x")]) + + +# ----------------------------------------------------------------------------- +# 9) merge_save_df: save-scope overlay preserves intentional NaN deletions +# ----------------------------------------------------------------------------- + + +def test_merge_save_df_nan_in_new_clears_old_value(): + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ) + idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + + df_old = pd.DataFrame([[10.0, 20.0]], index=idx, columns=cols) + df_new = pd.DataFrame([[np.nan, np.nan]], index=idx, columns=cols) + + out = merge_save_df(df_old, df_new) + + assert pd.isna(out.loc[idx[0], ("S", "", "nose", "x")]) + assert pd.isna(out.loc[idx[0], ("S", "", "nose", "y")]) + + +def test_merge_save_df_preserves_rows_outside_new_save_scope(): + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ) + old_idx = pd.MultiIndex.from_tuples( + [ + ("labeled-data", "test", "img000.png"), + ("labeled-data", "test", "img999.png"), + ] + ) + new_idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + + df_old = pd.DataFrame( + [ + [10.0, 20.0], + [30.0, 40.0], + ], + index=old_idx, + columns=cols, + ) + df_new = pd.DataFrame( + [[np.nan, np.nan]], + index=new_idx, + columns=cols, + ) + + out = merge_save_df(df_old, df_new) + + assert pd.isna(out.loc[("labeled-data", "test", "img000.png"), ("S", "", "nose", "x")]) + assert out.loc[("labeled-data", "test", "img999.png"), ("S", "", "nose", "x")] == 30.0 + assert out.loc[("labeled-data", "test", "img999.png"), ("S", "", "nose", "y")] == 40.0 + + +def test_merge_save_df_preserves_old_columns_outside_new_columns(): + old_cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose", "tail"), + coords=("x", "y"), + ) + new_cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ) + idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + + df_old = pd.DataFrame([[10.0, 20.0, 30.0, 40.0]], index=idx, columns=old_cols) + df_new = pd.DataFrame([[11.0, 22.0]], index=idx, columns=new_cols) + + out = merge_save_df(df_old, df_new) + + assert out.loc[idx[0], ("S", "", "nose", "x")] == 11.0 + assert out.loc[idx[0], ("S", "", "nose", "y")] == 22.0 + + # Old tail columns were outside df_new's column scope, so they remain. + assert out.loc[idx[0], ("S", "", "tail", "x")] == 30.0 + assert out.loc[idx[0], ("S", "", "tail", "y")] == 40.0 + + +def test_merge_save_df_nan_clears_old_value_after_row_harmonization(): + """ + Deletion semantics depend on row labels matching after harmonization. + + Existing DLC files may use basename-only row keys while newly formed save + dataframes may use deeper DLC path keys. merge_save_df() should harmonize + those representations before assigning df_new into df_old, otherwise NaNs + in df_new would not clear the intended old values. + """ + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ) + + # Existing on-disk row uses basename representation. + df_old = pd.DataFrame( + [[10.0, 20.0]], + index=["img000.png"], + columns=cols, + ) + guarantee_multiindex_rows(df_old) + + # New save-scope row uses deeper DLC path representation. + df_new = pd.DataFrame( + [[np.nan, np.nan]], + index=["labeled-data/test/img000.png"], + columns=cols, + ) + guarantee_multiindex_rows(df_new) + + out = merge_save_df(df_old, df_new) + + # harmonize_keypoint_row_index() collapses the deep row to basename so the + # assignment hits the existing row and NaN clears the old value. + row = ("img000.png",) + assert row in out.index + assert pd.isna(out.loc[row, ("S", "", "nose", "x")]) + assert pd.isna(out.loc[row, ("S", "", "nose", "y")]) + + +def test_merge_save_df_refuses_no_row_overlap_after_harmonization(): + """ + If row labels do not overlap after harmonization, df_new cannot overwrite or + delete existing values. merge_save_df() refuses this case rather than silently + preserving old labels when deletion/overwrite semantics were expected. + """ + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ) + + old_idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + new_idx = pd.MultiIndex.from_tuples([("labeled-data", "other", "img999.png")]) + + df_old = pd.DataFrame( + [[10.0, 20.0]], + index=old_idx, + columns=cols, + ) + df_new = pd.DataFrame( + [[np.nan, np.nan]], + index=new_idx, + columns=cols, + ) + + with pytest.raises(ValueError, match="no row-index overlap after harmonization"): + merge_save_df(df_old, df_new) + + +# ----------------------------------------------------------------------------- +# 10) keypoint_deletions and keypoint_conflicts(include_deletions=True) +# ----------------------------------------------------------------------------- + + +def test_keypoint_deletions_detects_old_value_to_new_nan(): + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ) + idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + + df_old = pd.DataFrame([[10.0, 20.0]], index=idx, columns=cols) + df_new = pd.DataFrame([[np.nan, np.nan]], index=idx, columns=cols) + + deletions = keypoint_deletions(df_old, df_new) + + assert deletions.loc[idx[0], ("", "nose")] + + +def test_keypoint_deletions_does_not_flag_rows_outside_new_scope(): + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ) + old_idx = pd.MultiIndex.from_tuples( + [ + ("labeled-data", "test", "img000.png"), + ("labeled-data", "test", "img999.png"), + ] + ) + new_idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + + df_old = pd.DataFrame( + [ + [10.0, 20.0], + [30.0, 40.0], + ], + index=old_idx, + columns=cols, + ) + df_new = pd.DataFrame( + [[np.nan, np.nan]], + index=new_idx, + columns=cols, + ) + + deletions = keypoint_deletions(df_old, df_new) + + assert ("labeled-data", "test", "img000.png") in deletions.index + assert ("labeled-data", "test", "img999.png") not in deletions.index + assert deletions.loc[("labeled-data", "test", "img000.png"), ("", "nose")] + + +def test_keypoint_conflicts_include_deletions_combines_overwrites_and_deletions(): + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose", "tail"), + coords=("x", "y"), + ) + idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + + df_old = pd.DataFrame( + [[10.0, 20.0, 30.0, 40.0]], + index=idx, + columns=cols, + ) + df_new = pd.DataFrame( + [[11.0, 20.0, np.nan, np.nan]], + index=idx, + columns=cols, + ) + + overwrites_only = keypoint_conflicts(df_old, df_new) + combined = keypoint_conflicts(df_old, df_new, include_deletions=True) + + assert overwrites_only.loc[idx[0], ("", "nose")] + assert not overwrites_only.loc[idx[0], ("", "tail")] + + assert combined.loc[idx[0], ("", "nose")] + assert combined.loc[idx[0], ("", "tail")] + + +def test_keypoint_deletions_handles_basename_index_vs_deep_path_multiindex_regression(): + """ + Regression test for deletion detection with shallow old index and deep new save scope. + + Old on-disk dataframe may use basename rows: + ("img00001.png",) + + New save-scope dataframe may use DLC relative paths: + ("labeled-data", "test", "img00001.png") + + keypoint_deletions() must harmonize before restricting to the new scope. + """ + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y"), + ) + + df_old = pd.DataFrame( + [[10.0, 20.0]], + columns=cols, + index=["img00001.png"], + ) + guarantee_multiindex_rows(df_old) + + df_new = pd.DataFrame( + [[np.nan, np.nan]], + columns=cols, + index=["labeled-data/test/img00001.png"], + ) + guarantee_multiindex_rows(df_new) + + deletions = keypoint_deletions(df_old, df_new) + + assert isinstance(deletions.index, pd.MultiIndex) + assert deletions.index.nlevels == 1 + assert deletions.index.to_list() == [("img00001.png",)] + assert deletions.loc[("img00001.png",), ("", "nose")] + + +# ----------------------------------------------------------------------------- +# 11) build_overwrite_conflict_report: separate modified vs deleted entries +# ----------------------------------------------------------------------------- + + +def test_build_overwrite_conflict_report_counts_deletions_separately(): + idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + cols = pd.MultiIndex.from_tuples( + [ + ("", "nose"), + ("", "tail"), + ], + names=["individuals", "bodyparts"], + ) + + key_conflict = pd.DataFrame( + [[True, False]], + index=idx, + columns=cols, + ) + deletion_conflict = pd.DataFrame( + [[False, True]], + index=idx, + columns=cols, + ) + + report = build_overwrite_conflict_report( + key_conflict, + deletion_conflict=deletion_conflict, + layer_name="CollectedData_S", + destination_path="/tmp/CollectedData_S.h5", + ) + + assert report.has_conflicts + assert report.n_overwrites == 1 + assert report.n_deletions == 1 + assert report.n_frames == 1 + assert len(report.entries) == 1 + + entry = report.entries[0] + assert entry.frame_label == "labeled-data/test/img000.png" + assert entry.keypoints == ("nose",) + assert entry.deleted_keypoints == ("tail",) + + +def test_build_overwrite_conflict_report_prefers_deleted_when_same_key_is_both_modified_and_deleted(): + idx = pd.MultiIndex.from_tuples([("img000.png",)]) + cols = pd.MultiIndex.from_tuples( + [("", "nose")], + names=["individuals", "bodyparts"], + ) + + key_conflict = pd.DataFrame([[True]], index=idx, columns=cols) + deletion_conflict = pd.DataFrame([[True]], index=idx, columns=cols) + + report = build_overwrite_conflict_report( + key_conflict, + deletion_conflict=deletion_conflict, + ) + + assert report.n_overwrites == 0 + assert report.n_deletions == 1 + assert report.entries[0].keypoints == () + assert report.entries[0].deleted_keypoints == ("nose",) + + +# ----------------------------------------------------------------------------- +# 12) Regression: deleting a point clears it after completing + merging save df +# ----------------------------------------------------------------------------- + + +def test_deleted_keypoint_roundtrip_complete_then_merge_clears_old_value(): + header_cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose", "tail"), + coords=("x", "y"), + ) + header = DLCHeaderModel(columns=header_cols) + + pts_meta = PointsMetadata( + header=header, + paths=["labeled-data/test/img000.png"], + ) + + idx = pd.MultiIndex.from_tuples([("labeled-data", "test", "img000.png")]) + + df_old = pd.DataFrame( + [[10.0, 20.0, 30.0, 40.0]], + index=idx, + columns=header_cols, + ) + + # Current layer contains only tail. Nose was deleted from napari Points. + sparse_current = pd.DataFrame( + [[31.0, 41.0]], + index=idx, + columns=cols_4level( + scorer="S", + individuals=("",), + bodyparts=("tail",), + coords=("x", "y"), + ), + ) + + df_new = complete_df_for_save( + sparse_current, + pts_meta=pts_meta, + header=header, + ) + + out = merge_save_df(df_old, df_new) + + # Deleted nose should be cleared. + assert pd.isna(out.loc[idx[0], ("S", "", "nose", "x")]) + assert pd.isna(out.loc[idx[0], ("S", "", "nose", "y")]) + + # Existing/current tail should be saved. + assert out.loc[idx[0], ("S", "", "tail", "x")] == 31.0 + assert out.loc[idx[0], ("S", "", "tail", "y")] == 41.0 + + +# ----------------------------------------------------------------------------- +# 13) drop_likelihood_columns +# ----------------------------------------------------------------------------- + + +def test_drop_likelihood_columns_removes_multiindex_likelihood_coord(): + cols = cols_4level( + scorer="S", + individuals=("",), + bodyparts=("nose",), + coords=("x", "y", "likelihood"), + ) + df = pd.DataFrame([[1.0, 2.0, 0.9]], columns=cols) + + out = drop_likelihood_columns(df) + + assert out.columns.to_list() == [ + ("S", "", "nose", "x"), + ("S", "", "nose", "y"), + ] + + +def test_drop_likelihood_columns_removes_flat_likelihood_column(): + df = pd.DataFrame( + { + "x": [1.0], + "y": [2.0], + "likelihood": [0.9], + } + ) + + out = drop_likelihood_columns(df) + + assert list(out.columns) == ["x", "y"] diff --git a/src/napari_deeplabcut/_tests/e2e/conftest.py b/src/napari_deeplabcut/_tests/e2e/conftest.py index 8fee2958..fee88c4e 100644 --- a/src/napari_deeplabcut/_tests/e2e/conftest.py +++ b/src/napari_deeplabcut/_tests/e2e/conftest.py @@ -78,23 +78,35 @@ def overwrite_confirm(monkeypatch): state = {"mode": "always_true", "result": True} def _patched_maybe_confirm_overwrite(parent, report): - n_pairs = getattr(report, "n_overwrites", 0) - n_images = getattr(report, "n_frames", None) + n_overwrites = int(getattr(report, "n_overwrites", 0) or 0) + n_deletions = int(getattr(report, "n_deletions", 0) or 0) + n_frames = getattr(report, "n_frames", None) + entries = tuple(getattr(report, "entries", ()) or ()) calls.append( { "parent_type": type(parent).__name__ if parent is not None else None, "layer_name": getattr(report, "layer_name", None), "destination_path": getattr(report, "destination_path", None), - "n_pairs": n_pairs, - "n_images": n_images, + # New canonical names + "n_overwrites": n_overwrites, + "n_deletions": n_deletions, + "n_frames": n_frames, + "entries": entries, "details_text": getattr(report, "details_text", None), + "report": report, + # Backwards-compatible aliases for existing tests + "n_pairs": n_overwrites, + "n_images": n_frames, } ) - # In "forbid" mode: only fail if there is a real overwrite. - if state["mode"] == "forbid" and n_pairs > 0: - raise AssertionError("maybe_confirm_overwrite was called unexpectedly for a real overwrite (n_pairs > 0).") + # In "forbid" mode: fail if confirmation is requested for any destructive change. + if state["mode"] == "forbid" and (n_overwrites > 0 or n_deletions > 0): + raise AssertionError( + "maybe_confirm_overwrite was called unexpectedly for a destructive save " + f"(n_overwrites={n_overwrites}, n_deletions={n_deletions})." + ) return state["result"] diff --git a/src/napari_deeplabcut/_tests/e2e/test_overwrite_and_merge.py b/src/napari_deeplabcut/_tests/e2e/test_overwrite_and_merge.py index d4e4a4ce..4c5b679e 100644 --- a/src/napari_deeplabcut/_tests/e2e/test_overwrite_and_merge.py +++ b/src/napari_deeplabcut/_tests/e2e/test_overwrite_and_merge.py @@ -13,57 +13,131 @@ @pytest.mark.usefixtures("qtbot") -def test_config_regression_no_silent_deletion(viewer, keypoint_controls, qtbot, tmp_path, caplog): +def test_config_layer_then_h5_then_config_again_uses_plugin_save_checks( + viewer, + keypoint_controls, + qtbot, + tmp_path, + overwrite_confirm, + monkeypatch, +): """ - Regression for the original report: - Save the WRONG (placeholder) layer and still preserve previous labels due to merge-on-save. + Regression workflow: + + 1. Load config.yaml and use the config-created Points layer as a DLC + annotation layer once frame/save context exists. + 2. Save annotations through the plugin workflow. + 3. Reload the resulting H5/folder. + 4. Add config.yaml again. + 5. Saving the real DLC annotation layer must go through normal + overwrite/deletion preflight, not generic napari save and not + "Nothing to save". """ - caplog.set_level(logging.DEBUG) + from napari_deeplabcut.core import keypoints - project, config_path, labeled_folder, h5_path = _make_minimal_dlc_project(tmp_path) + def fail_information(*args, **kwargs): + raise AssertionError("Unexpected QMessageBox.information; save should route through plugin DLC workflow.") - pre = pd.read_hdf(h5_path, key="df_with_missing") - assert np.isfinite(_get_coord_from_df(pre, "bodypart1", "x")) - assert np.isnan(_get_coord_from_df(pre, "bodypart2", "x")) + monkeypatch.setattr( + "napari_deeplabcut.ui.ui_dialogs.save.QMessageBox.information", + fail_information, + ) - from napari_deeplabcut.core import keypoints + class FailFileDialog: + def __init__(self, *args, **kwargs): + raise AssertionError( + "Unexpected QFileDialog; DLC config/annotation layers must not use generic napari save." + ) + + monkeypatch.setattr( + "napari_deeplabcut.ui.ui_dialogs.save.QFileDialog", + FailFileDialog, + ) + + project, config_path, labeled_folder, h5_path = _make_minimal_dlc_project(tmp_path) viewer.window.add_dock_widget(keypoint_controls, name="Keypoint controls", area="right") - # Open config first -> placeholder Points layer exists + # ------------------------------------------------------------------ + # Phase 1: config-created layer becomes a real annotation layer. + # ------------------------------------------------------------------ + viewer.open(str(config_path), plugin="napari-deeplabcut") - qtbot.waitUntil(lambda: len([ly for ly in viewer.layers if isinstance(ly, Points)]) >= 1, timeout=5_000) + qtbot.waitUntil( + lambda: len([ly for ly in viewer.layers if isinstance(ly, Points)]) >= 1, + timeout=5_000, + ) - placeholder = next((ly for ly in viewer.layers if isinstance(ly, Points)), None) - assert placeholder is not None - assert placeholder.data is None or len(placeholder.data) == 0 + config_layer = next(ly for ly in viewer.layers if isinstance(ly, Points)) + assert keypoint_controls.layer_manager.is_config_placeholder_points_layer(config_layer) - # Open folder -> images + GT points layer + # Load frames/folder so the config layer can acquire save context. + # Depending on reader behavior this may also load an existing H5 annotation + # layer. The important thing is that the config-created layer should now + # be promotable once it has annotations/save context. viewer.open(str(labeled_folder), plugin="napari-deeplabcut") qtbot.waitUntil(lambda: len(viewer.layers) >= 2, timeout=10_000) - qtbot.wait(200) - - # Placeholder should still be present for this regression to apply - assert placeholder in viewer.layers + qtbot.wait(300) - store = keypoint_controls.get_layer_store(placeholder) + store = keypoint_controls.get_layer_store(config_layer) assert store is not None - # Add a new bodypart2 point to placeholder using (frame, y, x) + # Add concrete annotations to the config-created layer. + store.current_keypoint = keypoints.Keypoint("bodypart1", "") + config_layer.add(np.array([0.0, 10.0, 20.0], dtype=float)) + store.current_keypoint = keypoints.Keypoint("bodypart2", "") - placeholder.add(np.array([0.0, 33.0, 44.0], dtype=float)) + config_layer.add(np.array([0.0, 33.0, 44.0], dtype=float)) - viewer.layers.selection.active = placeholder - viewer.layers.save("__dlc__.h5", selected=True, plugin="napari-deeplabcut") - qtbot.wait(200) + # NOTE: this kind of thing is why the layer identity system is useful + assert not keypoint_controls.layer_manager.is_config_placeholder_points_layer(config_layer) - post = pd.read_hdf(h5_path, key="df_with_missing") - b1x_post = _get_coord_from_df(post, "bodypart1", "x") - b2x_post = _get_coord_from_df(post, "bodypart2", "x") + viewer.layers.selection.clear() + viewer.layers.selection.add(config_layer) + viewer.layers.selection.active = config_layer + + keypoint_controls._save_layers_dialog(selected=True) + qtbot.wait(300) + + assert h5_path.exists() + + # ------------------------------------------------------------------ + # Phase 2: reload saved H5/folder, add config.yaml again, then save + # through overwrite/deletion-aware plugin workflow. + # ------------------------------------------------------------------ + + viewer.layers.clear() + qtbot.wait(300) + + viewer.open(str(labeled_folder), plugin="napari-deeplabcut") + qtbot.waitUntil(lambda: len(viewer.layers) >= 2, timeout=10_000) + qtbot.wait(300) + + points = _get_points_layer_with_data(viewer) + + # Add config.yaml again. This should not create an invalid generic-save path. + viewer.open(str(config_path), plugin="napari-deeplabcut") + qtbot.wait(300) + + store = keypoint_controls.get_layer_store(points) + assert store is not None + + # Create an overwrite conflict to prove the save path runs preflight. + overwrite_confirm.capture().reset_calls() + + _set_or_add_bodypart_xy(points, store, "bodypart1", x=99.0, y=88.0) - assert np.isfinite(b1x_post), "bodypart1 must be preserved (no silent deletion)." - assert np.isfinite(b2x_post), "bodypart2 must be saved." - assert b2x_post == 44.0 + viewer.layers.selection.clear() + viewer.layers.selection.add(points) + viewer.layers.selection.active = points + + keypoint_controls._save_layers_dialog(selected=True) + qtbot.wait(300) + + assert len(overwrite_confirm.calls) == 1 + + post = pd.read_hdf(h5_path, key="df_with_missing") + assert _get_coord_from_df(post, "bodypart1", "x") == 99.0 @pytest.mark.usefixtures("qtbot") @@ -126,7 +200,8 @@ def test_no_overwrite_warning_when_only_filling_nans(viewer, keypoint_controls, _set_or_add_bodypart_xy(points, store, "bodypart2", x=44.0, y=33.0) viewer.layers.selection.active = points - viewer.layers.save("__dlc__.h5", selected=True, plugin="napari-deeplabcut") + # viewer.layers.save("__dlc__.h5", selected=True, plugin="napari-deeplabcut") + keypoint_controls._save_layers_dialog(selected=True) qtbot.wait(200) post = pd.read_hdf(h5_path, key="df_with_missing") @@ -167,6 +242,68 @@ def test_overwrite_warning_triggers_on_conflict(viewer, keypoint_controls, qtbot assert _get_coord_from_df(post, "bodypart1", "x") == 99.0 +@pytest.mark.usefixtures("qtbot") +def test_deletion_warning_triggers_when_existing_keypoint_is_removed( + viewer, + keypoint_controls, + qtbot, + tmp_path, + overwrite_confirm, + monkeypatch, +): + """ + Removing an existing saved keypoint from a real loaded DLC annotation layer + must trigger confirmation before the saved coordinate is cleared. + """ + overwrite_confirm.capture().reset_calls() + monkeypatch.setattr( + "napari_deeplabcut.ui.ui_dialogs.save.QMessageBox.warning", + lambda *args, **kwargs: pytest.fail("Unexpected warning dialog; deletion preflight should run."), + ) + + _, _, labeled_folder, h5_path = _make_minimal_dlc_project(tmp_path) + + pre = pd.read_hdf(h5_path, key="df_with_missing") + assert np.isfinite(_get_coord_from_df(pre, "bodypart1", "x")) + + viewer.window.add_dock_widget(keypoint_controls, name="Keypoint controls", area="right") + + viewer.open(str(labeled_folder), plugin="napari-deeplabcut") + qtbot.waitUntil(lambda: len(viewer.layers) >= 2, timeout=10_000) + qtbot.wait(200) + + points = _get_points_layer_with_data(viewer) + labels = np.asarray(points.properties.get("label")) + mask = labels != "bodypart1" + + points.data = np.asarray(points.data)[mask] + for key, values in list(points.properties.items()): + try: + arr = np.asarray(values) + if len(arr) == len(mask): + points.properties[key] = arr[mask] + except Exception: + pass + + viewer.layers.selection.clear() + viewer.layers.selection.add(points) + viewer.layers.selection.active = points + + keypoint_controls._save_layers_dialog(selected=True) + qtbot.wait(200) + + call = overwrite_confirm.calls[0] + assert call["n_deletions"] >= 1 + + deleted = [] + for entry in call["entries"]: + deleted.extend(getattr(entry, "deleted_keypoints", ()) or ()) + assert "bodypart1" in deleted + + post = pd.read_hdf(h5_path, key="df_with_missing") + assert np.isnan(_get_coord_from_df(post, "bodypart1", "x")) + + @pytest.mark.usefixtures("qtbot") def test_overwrite_warning_cancel_aborts_write(viewer, keypoint_controls, qtbot, tmp_path, overwrite_confirm): """ diff --git a/src/napari_deeplabcut/_tests/e2e/test_routing_and_provenance.py b/src/napari_deeplabcut/_tests/e2e/test_routing_and_provenance.py index 243e1056..2799bd19 100644 --- a/src/napari_deeplabcut/_tests/e2e/test_routing_and_provenance.py +++ b/src/napari_deeplabcut/_tests/e2e/test_routing_and_provenance.py @@ -75,6 +75,65 @@ def _skip(*args, **kwargs): return calls +def _get_single_point_label_at_xy( + layer: Points, + *, + x: float, + y: float, + atol: float = 1e-6, +) -> str: + """ + Return the plugin-visible label for the unique point at x/y. + + napari Points data is [frame, y, x], so: + data[:, 1] == y + data[:, 2] == x + """ + labels = np.asarray(layer.properties.get("label", []), dtype=object) + data = np.asarray(layer.data) + + assert data.ndim == 2 and data.shape[1] >= 3, data + assert len(labels) == len(data), f"Property/data length mismatch: labels={len(labels)} data={len(data)}" + + mask = np.isclose(data[:, 2], x, atol=atol) & np.isclose(data[:, 1], y, atol=atol) + + assert mask.any(), f"Expected a point at x={x}, y={y}. Observed data: {data}; labels: {list(labels)}" + + matching_labels = labels[mask] + assert len(matching_labels) == 1, ( + f"Expected exactly one point at x={x}, y={y}; got labels={list(matching_labels)} data={data[mask]}" + ) + + return str(matching_labels[0]) + + +def _assert_points_layer_has_label_xy( + layer: Points, + label: str, + *, + x: float, + y: float, + atol: float = 1e-6, +): + """ + Assert a plugin-visible Points layer contains `label` at x/y. + + napari Points data is [frame, y, x]. + """ + labels = np.asarray(layer.properties.get("label", []), dtype=object) + data = np.asarray(layer.data) + + assert data.ndim == 2 and data.shape[1] >= 3, data + assert len(labels) == len(data), f"Property/data length mismatch: labels={len(labels)} data={len(data)}" + + mask = labels == label + assert mask.any(), f"Expected at least one point for {label!r}. Observed labels: {list(labels)}" + + xy_match = np.isclose(data[mask, 2], x, atol=atol) & np.isclose(data[mask, 1], y, atol=atol) + + assert xy_match.any(), f"Expected {label!r} at x={x}, y={y}. Observed data for label: {data[mask]}" + + @pytest.mark.usefixtures("qtbot") def test_save_routes_to_correct_gt_when_multiple_gt_exist( viewer, keypoint_controls, qtbot, tmp_path, overwrite_confirm @@ -482,18 +541,28 @@ def test_projectless_folder_save_can_associate_with_config_and_coerce_paths_to_d Contract: a project-less labeled folder can be associated with a chosen DLC project at save time by rewriting safe paths to canonical DLC row keys. - Goals - ----- - - Use current external folder name as the target dataset name. - - Save safe paths as labeled-data//. - - Use the same normalized metadata for overwrite preflight and actual write. + E2E goals + --------- + - Use the current external folder name as the target dataset name. + - Save into the chosen DLC project's labeled-data// folder. + - Use normalized DLC row-key metadata consistently for overwrite preflight + and actual write. - Persist the improved metadata on the live layer after successful save. + - Reopen the saved H5 through the plugin and recover the same plugin-visible + keypoint annotation. Non-goals --------- - - Do NOT require the current files to already be inside the selected project. - - Do NOT coerce nested/multi-folder layouts into DLC row keys. - - Do NOT rewrite unrelated outside paths. + - Do not assert the raw pandas/HDF row-index representation. + Mixed-depth paths may be represented internally as padded MultiIndex rows. + That is a lower-level writer/dataframe contract, not this E2E contract. + - Do not require the current files to already be inside the selected project. + - Do not move external image files on disk. + - Unrelated outside paths are invalid for a successful association save and + are covered by the refusal test. + - Do not test active-keypoint selection mechanics. This test records the + actual plugin-visible label assigned to the added point and verifies that + label survives save/readback. """ overwrite_confirm.forbid() @@ -512,29 +581,43 @@ def test_projectless_folder_save_can_associate_with_config_and_coerce_paths_to_d outside_img = outside_dir / "img999.png" outside_img.write_bytes(b"placeholder") - from napari_deeplabcut.core import keypoints - - # Open config first -> placeholder points layer + # Open config first -> placeholder/config-derived Points layer. viewer.open(str(config_path), plugin="napari-deeplabcut") - qtbot.waitUntil(lambda: any(isinstance(ly, Points) for ly in viewer.layers), timeout=5_000) + qtbot.waitUntil( + lambda: any(isinstance(ly, Points) for ly in viewer.layers), + timeout=5_000, + ) + qtbot.wait(200) points = next(ly for ly in viewer.layers if isinstance(ly, Points)) store = keypoint_controls.get_layer_store(points) assert store is not None - # Simulate project-less folder metadata: + # Simulate a project-less labeled folder association candidate. + # + # This mirrors a layer that has image/path context, but no known DLC project. + # The save workflow should ask the user to associate it with config.yaml. points.metadata = dict(points.metadata or {}) points.metadata["root"] = str(external_folder) points.metadata["paths"] = [ str(inside_abs), # direct child of source_root -> should coerce "img002.png", # basename -> should coerce f"labeled-data/{dataset}/img003.png", # already canonical -> preserve - str(outside_img), # unrelated absolute path -> preserve unchanged ] points.metadata.pop("project", None) - store.current_keypoint = keypoints.Keypoint("bodypart1", "") - points.add(np.array([0.0, 11.0, 22.0], dtype=float)) + # Add one visible annotation using the plugin-facing helper. + # + # The plugin add wrapper owns keypoint assignment. In this E2E test we + # intentionally record the actual plugin-visible label assigned to the + # point, then assert that the same label/coordinate survives save + reopen. + _set_or_add_bodypart_xy(points, store, "bodypart1", x=22.0, y=11.0) + + saved_label = _get_single_point_label_at_xy( + points, + x=22.0, + y=11.0, + ) import napari_deeplabcut.core.conflicts as conflicts from napari_deeplabcut.ui import dialogs as ui_dialogs @@ -546,6 +629,7 @@ def _wrapped_compute(data, attributes): captured["attributes"] = attributes return real_compute(data, attributes) + # Simulate the user choosing the project config.yaml. monkeypatch.setattr( save_workflow_mod, "prompt_for_project_config_for_save", @@ -554,11 +638,15 @@ def _wrapped_compute(data, attributes): config_path=str(config_path), ), ) + + # Simulate accepting the metadata path rewrite confirmation. monkeypatch.setattr( save_workflow_mod, "maybe_confirm_dataset_path_rewrite", lambda *args, **kwargs: True, ) + + # Capture the exact metadata seen by overwrite preflight. monkeypatch.setattr( save_workflow_mod, "compute_overwrite_report_for_points_save", @@ -573,15 +661,15 @@ def _wrapped_compute(data, attributes): qtbot.wait(300) # After project association, save should route into the chosen project's - # labeled-data// folder inferred from the rewritten metadata. + # labeled-data// folder inferred from rewritten metadata. expected_dataset_dir = project / "labeled-data" / dataset expected_h5 = expected_dataset_dir / "CollectedData_John.h5" expected_csv = expected_dataset_dir / "CollectedData_John.csv" - assert expected_h5.exists() - assert expected_csv.exists() + assert expected_h5.exists(), f"Expected H5 to be created: {expected_h5}" + assert expected_csv.exists(), f"Expected CSV to be created: {expected_csv}" - # And it should NOT create a GT file next to the external source folder. + # It must not create GT files next to the external source folder. assert not (external_folder / "CollectedData_John.h5").exists() assert not (external_folder / "CollectedData_John.csv").exists() @@ -589,28 +677,44 @@ def _wrapped_compute(data, attributes): f"labeled-data/{dataset}/{inside_abs.name}", f"labeled-data/{dataset}/img002.png", f"labeled-data/{dataset}/img003.png", - outside_img.as_posix(), ] - # Preflight saw normalized metadata + # Preflight saw normalized metadata. assert captured["attributes"]["metadata"]["project"] == str(project) assert captured["attributes"]["metadata"]["paths"] == expected_paths - # Live layer metadata persisted the successful normalization + # Live layer metadata persisted the successful normalization. assert points.metadata["project"] == str(project) assert points.metadata["paths"] == expected_paths - # H5 row index contains canonical DLC row keys for the safe cases - df = pd.read_hdf(expected_h5, key="df_with_missing") - if isinstance(df.index, pd.MultiIndex): - observed_rows = ["/".join(map(str, idx)) for idx in df.index] - else: - observed_rows = [str(idx).replace("\\", "/") for idx in df.index] + # E2E readback contract: + # the plugin should be able to reopen the saved H5 and recover the same + # plugin-visible annotation. + viewer.layers.clear() + qtbot.wait(200) + + viewer.open(str(expected_h5), plugin="napari-deeplabcut") + qtbot.waitUntil( + lambda: any(isinstance(ly, Points) for ly in viewer.layers), + timeout=10_000, + ) + qtbot.wait(200) + + reopened = next(ly for ly in viewer.layers if isinstance(ly, Points)) + + _assert_points_layer_has_label_xy( + reopened, + saved_label, + x=22.0, + y=11.0, + ) + + reopened_paths = [str(path).replace("\\", "/") for path in (reopened.metadata.get("paths") or [])] - assert f"labeled-data/{dataset}/{inside_abs.name}" in observed_rows - assert f"labeled-data/{dataset}/img002.png" not in observed_rows - assert f"labeled-data/{dataset}/img003.png" not in observed_rows - assert outside_img.as_posix() not in observed_rows + expected_saved_path = f"labeled-data/{dataset}/{inside_abs.name}" + assert expected_saved_path in reopened_paths, ( + f"Expected reopened layer metadata paths to include {expected_saved_path!r}. Observed paths: {reopened_paths}" + ) @pytest.mark.usefixtures("qtbot") @@ -955,3 +1059,117 @@ def _unexpected_save(*args, **kwargs): # No GT file should have been created in the dataset folder assert not (labeled_folder / "CollectedData_John.h5").exists() + + +@pytest.mark.usefixtures("qtbot") +def test_projectless_folder_save_refuses_unresolved_outside_paths( + viewer, + keypoint_controls, + qtbot, + tmp_path, + monkeypatch, + overwrite_confirm, + save_workflow_mod, +): + """ + Contract: when a project-less labeled folder is associated with an + existing DLC project via config.yaml, all image paths must be safely + rewritable to DLC project dataset row keys. + + In this project-associated save mode, DLC annotation H5 row keys must be: + + labeled-data// + + Unrelated absolute paths must never be kept as-is or written into the + destination CollectedData_*.h5. If any path cannot be safely rewritten, + the save must be refused before confirmation/overwrite preflight. + """ + overwrite_confirm.forbid() + + project, config_path, _project_dataset_folder = _make_project_config_and_frames_no_gt(tmp_path) + + external_folder = tmp_path / "session_external" + external_folder.mkdir() + + inside_abs = external_folder / "img001.png" + inside_abs.write_bytes(b"placeholder") + dataset = external_folder.name + + outside_dir = tmp_path / "external-images" + outside_dir.mkdir() + outside_img = outside_dir / "img999.png" + outside_img.write_bytes(b"placeholder") + + viewer.open(str(config_path), plugin="napari-deeplabcut") + qtbot.waitUntil( + lambda: any(isinstance(ly, Points) for ly in viewer.layers), + timeout=5_000, + ) + qtbot.wait(200) + + points = next(ly for ly in viewer.layers if isinstance(ly, Points)) + store = keypoint_controls.get_layer_store(points) + assert store is not None + + points.metadata = dict(points.metadata or {}) + points.metadata["root"] = str(external_folder) + points.metadata["paths"] = [ + str(inside_abs), + str(outside_img), + ] + points.metadata.pop("project", None) + + _set_or_add_bodypart_xy(points, store, "bodypart1", x=22.0, y=11.0) + + from napari_deeplabcut.ui import dialogs as ui_dialogs + + monkeypatch.setattr( + save_workflow_mod, + "prompt_for_project_config_for_save", + lambda *args, **kwargs: ui_dialogs.ProjectConfigPromptResult( + action=ui_dialogs.ProjectConfigPromptAction.ASSOCIATE, + config_path=str(config_path), + ), + ) + + monkeypatch.setattr( + save_workflow_mod, + "maybe_confirm_dataset_path_rewrite", + lambda *args, **kwargs: pytest.fail( + "Dataset path rewrite confirmation must not be shown when unresolved paths exist." + ), + ) + + monkeypatch.setattr( + save_workflow_mod, + "compute_overwrite_report_for_points_save", + lambda *args, **kwargs: pytest.fail("Overwrite preflight must not run when unresolved outside paths exist."), + ) + + warned = {"called": False, "title": "", "text": ""} + + def _warn(parent, title, text, *args, **kwargs): + warned["called"] = True + warned["title"] = title + warned["text"] = text + return save_workflow_mod.QMessageBox.Ok + + monkeypatch.setattr(save_workflow_mod.QMessageBox, "warning", _warn) + + keypoint_controls.viewer.layers.selection.active = points + keypoint_controls.viewer.layers.selection.select_only(points) + + outcome = keypoint_controls._save_workflow.save_layers(selected=True) + qtbot.wait(200) + + assert outcome.saved is False + assert warned["called"] is True + assert "could not be safely rewritten" in warned["text"] + assert outside_img.name in warned["text"] + + expected_dataset_dir = project / "labeled-data" / dataset + assert not (expected_dataset_dir / "CollectedData_John.h5").exists() + assert not (expected_dataset_dir / "CollectedData_John.csv").exists() + + assert not (external_folder / "CollectedData_John.h5").exists() + assert not (external_folder / "CollectedData_John.csv").exists() diff --git a/src/napari_deeplabcut/_tests/test_writer.py b/src/napari_deeplabcut/_tests/test_writer.py index b9228a66..9b183301 100644 --- a/src/napari_deeplabcut/_tests/test_writer.py +++ b/src/napari_deeplabcut/_tests/test_writer.py @@ -389,3 +389,81 @@ def test_write_hdf_promotion_creates_gt_when_missing(tmp_path, fake_keypoints, m assert not (root / "machinelabels-iter0.h5").exists(), ( "Writer should not create/overwrite prediction files during promotion." ) + + +def test_write_hdf_gt_merge_persists_deleted_keypoint(tmp_path): + """ + Regression test for the full writer path. + + If an existing GT file contains a keypoint and the current Points layer no + longer contains that keypoint within the editable save scope, write_hdf() + must persist that deletion by writing NaN into CollectedData_*.h5. + """ + root = tmp_path / "proj" + root.mkdir() + + cols = pd.MultiIndex.from_product( + [["me"], ["nose", "tail"], ["x", "y"]], + names=["scorer", "bodyparts", "coords"], + ) + + header = DLCHeaderModel(columns=cols) + + gt_path = root / "CollectedData_me.h5" + + # Existing GT has both nose and tail. + df_gt = pd.DataFrame( + [[10.0, 20.0, 30.0, 40.0]], + index=["img000.png"], + columns=cols, + ) + guarantee_multiindex_rows(df_gt) + df_gt.to_hdf(gt_path, key="df_with_missing", mode="w") + + metadata = { + "name": "CollectedData_me", + "properties": { + # Current Points layer contains only tail. + # Nose was deleted from napari Points. + "label": ["tail"], + "id": [""], + "likelihood": [1.0], + }, + "metadata": { + "header": header, + "paths": ["img000.png"], + "root": str(root), + }, + } + + # Explicit GT provenance routes the writer back to the existing GT file. + _add_source_io( + metadata, + root=root, + kind=AnnotationKind.GT, + source_name="CollectedData_me.h5", + ) + + # One napari point: [frame, y, x]. + points = np.array( + [ + [0.0, 41.0, 31.0], + ] + ) + + fnames = _writer.write_hdf_napari_dlc("ignored.h5", points, metadata) + + assert Path(fnames[0]) == gt_path + assert gt_path.exists() + + out = pd.read_hdf(gt_path, key="df_with_missing") + + row = ("img000.png",) + + # Deleted nose should be persisted as NaN. + assert pd.isna(out.loc[row, ("me", "nose", "x")]) + assert pd.isna(out.loc[row, ("me", "nose", "y")]) + + # Present tail should be updated from napari [y, x] -> DLC [x, y]. + assert out.loc[row, ("me", "tail", "x")] == 31.0 + assert out.loc[row, ("me", "tail", "y")] == 41.0 diff --git a/src/napari_deeplabcut/_tests/ui/test_dialogs.py b/src/napari_deeplabcut/_tests/ui/test_dialogs.py index 6b7951d1..7f3da3f0 100644 --- a/src/napari_deeplabcut/_tests/ui/test_dialogs.py +++ b/src/napari_deeplabcut/_tests/ui/test_dialogs.py @@ -10,6 +10,7 @@ import napari_deeplabcut.ui.dialogs as ui_dialogs from napari_deeplabcut.config.keybinds import iter_shortcuts +from napari_deeplabcut.config.models import ConflictEntry, OverwriteConflictReport from napari_deeplabcut.ui.dialogs import ( OverwriteConflictsDialog, ProjectConfigPromptAction, @@ -389,13 +390,22 @@ def test_maybe_confirm_overwrite_returns_true_when_confirmation_disabled(monkeyp def test_maybe_confirm_overwrite_delegates_to_confirm(monkeypatch, dialog_parent): - report = SimpleNamespace( - has_conflicts=True, - layer_name="pose-layer", - destination_path="/tmp/labels.h5", + report = OverwriteConflictReport( n_overwrites=3, + n_deletions=0, n_frames=2, - details_text="img001.png -> nose, tail", + entries=( + ConflictEntry( + frame_label="img001.png", + keypoints=("nose", "tail"), + ), + ConflictEntry( + frame_label="img002.png", + keypoints=("paw",), + ), + ), + layer_name="pose-layer", + destination_path="/tmp/labels.h5", ) monkeypatch.setattr( @@ -420,11 +430,15 @@ def fake_confirm(parent, **kwargs): assert result is False assert captured["parent"] is dialog_parent assert captured["kwargs"] == { + "title": "Overwrite warning", "summary": "Saving will overwrite existing keypoints in the destination file.", "layer_text": "pose-layer", "dest_text": "/tmp/labels.h5", "affected_text": "3 keypoint overwrite(s) across 2 frame(s)/image(s).", - "details": "img001.png -> nose, tail", + "details": ("img001.png\n Modified: nose, tail\n\nimg002.png\n Modified: paw"), + "details_label_text": "Conflicts (frame/image → keypoints):", + "confirm_button_text": "Save (overwrite) keypoints", + "dangerous_default_cancel": False, } diff --git a/src/napari_deeplabcut/config/models.py b/src/napari_deeplabcut/config/models.py index cec1e226..90315bc0 100644 --- a/src/napari_deeplabcut/config/models.py +++ b/src/napari_deeplabcut/config/models.py @@ -553,6 +553,7 @@ class PointsMetadata(BaseModel): class ConflictEntry: frame_label: str keypoints: tuple[str, ...] + deleted_keypoints: tuple[str, ...] = () @dataclass(frozen=True) @@ -580,6 +581,7 @@ class OverwriteConflictReport: """ n_overwrites: int + n_deletions: int n_frames: int entries: tuple[ConflictEntry, ...] truncated_entries: int = 0 @@ -588,17 +590,41 @@ class OverwriteConflictReport: @property def has_conflicts(self) -> bool: - return self.n_overwrites > 0 + return self.n_overwrites > 0 or self.n_deletions > 0 @property def details_text(self) -> str: + """ + Build plain-text conflict details for the scrollable details box. + + Expected report entry fields: + - frame_label + - keypoints: modified/overwritten keypoints + - deleted_keypoints: cleared/deleted keypoints + """ if not self.entries: return "No detailed conflicts." - lines = [f"{entry.frame_label} → {', '.join(entry.keypoints)}" for entry in self.entries] - if self.truncated_entries: + + lines: list[str] = [] + + for entry in self.entries: + lines.append(str(entry.frame_label)) + + modified = tuple(getattr(entry, "keypoints", ()) or ()) + deleted = tuple(getattr(entry, "deleted_keypoints", ()) or ()) + + if modified: + lines.append(f" Modified: {', '.join(modified)}") + + if deleted: + lines.append(f" Deleted / cleared: {', '.join(deleted)}") + lines.append("") + + if self.truncated_entries: lines.append(f"… and {self.truncated_entries} more frame/image entries.") - return "\n".join(lines) + + return "\n".join(lines).rstrip() # ----------------------------------------------------------------------------- diff --git a/src/napari_deeplabcut/core/conflicts.py b/src/napari_deeplabcut/core/conflicts.py index 597f9b0f..ed8b53aa 100644 --- a/src/napari_deeplabcut/core/conflicts.py +++ b/src/napari_deeplabcut/core/conflicts.py @@ -7,7 +7,14 @@ from napari_deeplabcut.config.models import AnnotationKind, OverwriteConflictReport, PointsMetadata from napari_deeplabcut.core import schemas as dlc_schemas -from napari_deeplabcut.core.dataframes import set_df_scorer +from napari_deeplabcut.core.dataframes import ( + build_overwrite_conflict_report, + complete_df_for_save, + form_df_from_validated, + keypoint_conflicts, + keypoint_deletions, + set_df_scorer, +) from napari_deeplabcut.core.errors import AmbiguousSaveError, MissingProvenanceError from napari_deeplabcut.core.io import DLC_CANONICAL_H5_KEY from napari_deeplabcut.core.metadata import parse_points_metadata @@ -60,11 +67,6 @@ def compute_overwrite_report_for_points_save( # Local imports keep core.conflicts free of import cycles: # - core.dataframes imports ConflictEntry / OverwriteConflictReport # - core.io imports dataframe helpers and metadata parsing - from napari_deeplabcut.core.dataframes import ( - build_overwrite_conflict_report, - form_df_from_validated, - keypoint_conflicts, - ) attrs = dlc_schemas.PointsLayerAttributesModel.model_validate(attributes or {}) pts_meta: PointsMetadata = parse_points_metadata(attrs.metadata, drop_header=False) @@ -84,16 +86,21 @@ def compute_overwrite_report_for_points_save( } ) - # Build the outgoing dataframe exactly like the writer df_new = form_df_from_validated(ctx) - # Resolve output path using the same provenance-first routing as write_hdf(...) out_path, target_scorer, source_kind = resolve_output_path_from_metadata(attributes) - # Promotion to GT may rewrite scorer level if target_scorer: df_new = set_df_scorer(df_new, target_scorer) + header_for_save = pts_meta.header.with_scorer(target_scorer) if target_scorer else pts_meta.header + + df_new = complete_df_for_save( + df_new, + pts_meta=pts_meta, + header=header_for_save, + ) + # Never write back to machine sources without an explicit promotion target if not out_path and source_kind == AnnotationKind.MACHINE: raise MissingProvenanceError("Cannot resolve provenance output path for MACHINE source.") @@ -149,9 +156,11 @@ def compute_overwrite_report_for_points_save( df_old = pd.read_hdf(out) key_conflict = keypoint_conflicts(df_old, df_new) + deletion_conflict = keypoint_deletions(df_old, df_new) report = build_overwrite_conflict_report( key_conflict, + deletion_conflict=deletion_conflict, layer_name=attributes.get("name"), destination_path=str(out), ) @@ -184,10 +193,6 @@ def compute_overwrite_report_for_extracted_labels_row( OverwriteConflictReport | None Report if overwrites would occur, otherwise None. """ - from napari_deeplabcut.core.dataframes import ( - build_overwrite_conflict_report, - keypoint_conflicts, - ) out = Path(destination_path) if not out.exists(): diff --git a/src/napari_deeplabcut/core/dataframes.py b/src/napari_deeplabcut/core/dataframes.py index 564a6034..a3d43d7a 100644 --- a/src/napari_deeplabcut/core/dataframes.py +++ b/src/napari_deeplabcut/core/dataframes.py @@ -7,12 +7,28 @@ import numpy as np import pandas as pd -from napari_deeplabcut.config.models import ConflictEntry, OverwriteConflictReport +from napari_deeplabcut.config.models import ConflictEntry, OverwriteConflictReport, PointsMetadata from napari_deeplabcut.core.schemas import DLCHeaderModel, PointsWriteInputModel logger = logging.getLogger(__name__) +def drop_likelihood_columns(df: pd.DataFrame) -> pd.DataFrame: + """Remove DLC likelihood columns from a dataframe if present.""" + # DLC-style wide dataframe: MultiIndex columns with a coords level + if isinstance(df.columns, pd.MultiIndex): + col_names = list(df.columns.names) + if "coords" in col_names: + mask = df.columns.get_level_values("coords").astype(str) != "likelihood" + return df.loc[:, mask] + + # Fallback for already-stacked / flat dataframes + if "likelihood" in df.columns: + return df.drop(columns="likelihood") + + return df + + def restore_dlc_on_disk_header_shape( df: pd.DataFrame, header: DLCHeaderModel, *, is_ma_project: bool | None = None ) -> pd.DataFrame: @@ -232,30 +248,20 @@ def form_df_from_validated(ctx: PointsWriteInputModel) -> pd.DataFrame: df = df.unstack(["scorer", "individuals", "bodyparts", "coords"]) df.index.name = None - hdr_cols = ctx.meta.header.as_multiindex() # pandas-only helper; raises if pandas missing - logger.debug("Before reindex: cols nlevels %s, names %s", df.columns.nlevels, df.columns.names) - logger.debug("header cols nlevels %s, names %s", hdr_cols.nlevels, hdr_cols.names) - # If df columns dropped individuals, drop it from header too (if present) - # if df.columns.nlevels == 3 and isinstance(hdr_cols, pd.MultiIndex) and hdr_cols.nlevels == 4: - # if "individuals" in hdr_cols.names: - # hdr_cols = hdr_cols.droplevel("individuals") + df = harmonize_keypoint_column_index(df) + hdr_cols = canonical_keypoint_columns_from_header(ctx.meta.header) - # If df columns kept individuals but header doesn't have it, add it (single-animal) - if df.columns.nlevels == 4 and isinstance(hdr_cols, pd.MultiIndex) and hdr_cols.nlevels == 3: - # Insert empty individuals level into header tuples - frame = hdr_cols.to_frame(index=False) - frame.insert(1, "individuals", "") - hdr_cols = pd.MultiIndex.from_frame(frame, names=["scorer", "individuals", "bodyparts", "coords"]) + logger.debug("header cols nlevels %s, names %s", hdr_cols.nlevels, hdr_cols.names) df = df.reindex(hdr_cols, axis=1) logger.debug("After reindex: cols nlevels %s, names %s", df.columns.nlevels, df.columns.names) logger.debug( "header cols nlevels %s, names %s", - ctx.meta.header.as_multiindex().nlevels, - ctx.meta.header.as_multiindex().names, + hdr_cols.nlevels, + hdr_cols.names, ) # Replace integer frame index with path keys if available @@ -401,54 +407,259 @@ def align_old_new(df_old: pd.DataFrame, df_new: pd.DataFrame) -> tuple[pd.DataFr ) -def keypoint_conflicts(df_old: pd.DataFrame, df_new: pd.DataFrame) -> pd.DataFrame: +def canonical_keypoint_columns_from_header(header: DLCHeaderModel) -> pd.MultiIndex: """ - Return a boolean DataFrame indexed by image, with columns as keypoints, - True when any coord (x/y[/likelihood]) would overwrite an existing value. + Return the canonical internal keypoint column index for save/merge/conflict logic. - Columns in output are MultiIndex levels subset: (individuals?, bodyparts) - or just (bodyparts) for single animal. + Internally we normalize DLC columns to 4 levels: + scorer / individuals / bodyparts / coords + even for single-animal projects, where individuals is represented as "". + Final on-disk shape is restored later by restore_dlc_on_disk_header_shape(). """ - old, new = align_old_new(df_old, df_new) + cols = header.as_multiindex() - old_has = old.notna() - new_has = new.notna() + if not isinstance(cols, pd.MultiIndex): + raise TypeError("DLC header columns must be a pandas MultiIndex.") - # cell-level conflicts: both have values and differ - cell_conflict = (old != new) & old_has & new_has + # Single-animal DLC header: scorer / bodyparts / coords + if cols.nlevels == 3: + frame = cols.to_frame(index=False) + + # Be defensive about unnamed levels. We assume DLC order: + # scorer, bodyparts, coords. + frame.columns = ["scorer", "bodyparts", "coords"] + frame.insert(1, "individuals", "") + + return pd.MultiIndex.from_frame( + frame, + names=["scorer", "individuals", "bodyparts", "coords"], + ) + + # Multi-animal DLC header: scorer / individuals / bodyparts / coords + if cols.nlevels == 4: + return cols.set_names(["scorer", "individuals", "bodyparts", "coords"]) + + raise ValueError(f"Unsupported DLC header column depth. Expected 3 or 4 levels, got {cols.nlevels}.") + + +def save_index_from_points_metadata(pts_meta: PointsMetadata) -> pd.Index | pd.MultiIndex | None: + """ + Build the editable row index from PointsMetadata for saving. + + If pts_meta.paths is present, those paths define the set of frames/images + currently represented by the layer. Missing keypoints within this row scope + should be saved as NaN, which is how deleted napari points clear old labels. + + Returns None if no explicit path scope is available. + """ + paths = list(pts_meta.paths or []) + if not paths: + return None + + idx = pd.Index(paths) + dummy = pd.DataFrame(index=idx) + guarantee_multiindex_rows(dummy) + return dummy.index + + +def complete_df_for_save( + df: pd.DataFrame, + *, + pts_meta: PointsMetadata, + header: DLCHeaderModel, +) -> pd.DataFrame: + """ + Napari Points.data contains only present/finite keypoints. If a user deletes + a point, that point disappears from Points.data and layer.properties. For + save semantics, absence inside the current editable scope must become NaN, + otherwise merge-on-save will preserve the old on-disk value. + + This reindexes the dataframe to: + + - all editable rows from pts_meta.paths, when available + - all expected keypoint columns from the DLC header + + Goal: + + - present keypoint -> finite x/y + - deleted/missing keypoint inside save scope -> NaN + - rows outside save scope are not included and can be preserved separately + during merge. + """ + df_copy = df.copy() + + guarantee_multiindex_rows(df_copy) + df_copy = harmonize_keypoint_column_index(df_copy) + df_copy = drop_likelihood_columns(df_copy) + + target_cols = canonical_keypoint_columns_from_header(header) + + # Always drop likelihood + coords = target_cols.get_level_values("coords").astype(str) + target_cols = target_cols[coords != "likelihood"] + + target_index = save_index_from_points_metadata(pts_meta) + + # If we have explicit paths, they define the editable frame/image scope. + # Otherwise, preserve the current dataframe rows and only complete columns. + if target_index is not None: + df_copy = df_copy.reindex(index=target_index, columns=target_cols) + else: + df_copy = df_copy.reindex(columns=target_cols) + + return df_copy + + +def merge_save_df( + df_old: pd.DataFrame, + df_new: pd.DataFrame, +) -> pd.DataFrame: + """ + Merge an existing DLC dataframe with a new save dataframe. + + Semantics: + - rows/columns outside df_new scope are preserved from df_old + - rows/columns inside df_new scope replace df_old, including NaN + - NaN in df_new therefore clears/deletes an old saved keypoint + """ + df_new2, df_old2 = harmonize_keypoint_row_index(df_new, df_old) + df_new2 = harmonize_keypoint_column_index(df_new2) + df_old2 = harmonize_keypoint_column_index(df_old2) + + if len(df_old2.index) and len(df_new2.index): + overlap = df_old2.index.intersection(df_new2.index) + if overlap.empty: + raise ValueError( + "Cannot merge save dataframe: no row-index overlap after harmonization. " + "Existing labels would be preserved instead of overwritten/deleted." + ) + + idx = df_old2.index.union(df_new2.index) + cols = df_old2.columns.union(df_new2.columns) + + df_out = df_old2.reindex(index=idx, columns=cols) - # Identify which levels exist - col_names = list(old.columns.names) + # Critical: assign df_new values directly, including NaN. + df_out.loc[df_new2.index, df_new2.columns] = df_new2 + + return df_out + + +def _keypoint_group_levels(columns: pd.Index | pd.MultiIndex) -> list[str] | None: + """ + For internal DLC comparisons, columns are expected to be normalized to: + scorer / individuals / bodyparts / coords + Conflict reporting ignores scorer and coords, grouping by: + individuals / bodyparts + or just: + bodyparts + for single-animal data. + """ + if not isinstance(columns, pd.MultiIndex): + return None + + col_names = list(columns.names or []) has_inds = "individuals" in col_names has_body = "bodyparts" in col_names has_coords = "coords" in col_names if not (has_body and has_coords): - # Unexpected format; fall back to cell-level summary - return cell_conflict.any(axis=1).to_frame(name="conflict") + return None - # Drop scorer level if present (not meaningful for end-user warning) - # We want to aggregate per (individual, bodypart) across coords. - # Build the grouping levels that define a "keypoint". - key_levels = [] + key_levels: list[str] = [] if has_inds: key_levels.append("individuals") key_levels.append("bodyparts") - # Reduce across coords first -> any conflict for that coord-set - # This yields a DataFrame with columns still multi-level including scorer and coords. - # We then group by key_levels. - # Ensure we can group by dropping coords by grouping over it using "any". - # We'll group over all columns that share the same (individual/bodypart), ignoring coords. - # To do that cleanly: swap coords to last, then groupby on key_levels. - conflict_cols = cell_conflict.copy() + return key_levels + + +def keypoint_deletions(df_old: pd.DataFrame, df_new: pd.DataFrame) -> pd.DataFrame: + """ + Return a keypoint-level boolean table for destructive deletions. + + True means: + old keypoint has at least one stored value, + new save-scope keypoint has no stored value. + """ + scoped_new = harmonize_keypoint_column_index(df_new.copy()) + guarantee_multiindex_rows(scoped_new) + + new_scope, old_scope = harmonize_keypoint_row_index(scoped_new, df_old) + + new_scope = harmonize_keypoint_column_index(new_scope) + old_scope = harmonize_keypoint_column_index(old_scope) - # Group columns by key_levels and reduce with any() across remaining levels (coords + scorer) - # pandas no longer allows groupby on axis=1 by level names - # we use .T to swap axes, groupby on rows, then .T back to original orientation instead - key_conflict = conflict_cols.T.groupby(level=key_levels).any().T + scope_index = new_scope.index + scope_columns = new_scope.columns - return key_conflict + old_scoped = old_scope.reindex(index=scope_index, columns=scope_columns) + new_scoped = new_scope.reindex(index=scope_index, columns=scope_columns) + + key_levels = _keypoint_group_levels(old_scoped.columns) + + if key_levels is None: + return (old_scoped.notna().any(axis=1) & ~new_scoped.notna().any(axis=1)).to_frame(name="deleted") + + old_key_has = old_scoped.notna().T.groupby(level=key_levels).any().T + new_key_has = new_scoped.notna().T.groupby(level=key_levels).any().T + + return old_key_has & ~new_key_has + + +def keypoint_conflicts( + df_old: pd.DataFrame, + df_new: pd.DataFrame, + *, + include_deletions: bool = False, +) -> pd.DataFrame: + """ + Return a boolean DataFrame indexed by image, with columns as keypoints. + + True means saving df_new would destructively affect an existing value in df_old. + + By default this reports overwrites only: + + old finite, new finite, values differ + + If include_deletions=True, this also reports keypoint deletions: + + old keypoint has any stored value, + new keypoint has no stored value + + Deletion detection only applies within df_new's own save scope. + Callers should pass a df_new that has already been expanded with + complete_df_for_save(). + """ + old, new = align_old_new(df_old, df_new) + + old_has = old.notna() + new_has = new.notna() + + # Cell-level overwrite conflicts: + # old and new both have values, but they differ. + cell_conflict = (old != new) & old_has & new_has + + key_levels = _keypoint_group_levels(old.columns) + + if key_levels is None: + overwrite_conflict = cell_conflict.any(axis=1).to_frame(name="conflict") + else: + overwrite_conflict = cell_conflict.T.groupby(level=key_levels).any().T + + if not include_deletions: + return overwrite_conflict + + deletion_conflict = keypoint_deletions(df_old, df_new) + + # Align both key-level tables and combine for legacy/simple callers. + idx = overwrite_conflict.index.union(deletion_conflict.index) + cols = overwrite_conflict.columns.union(deletion_conflict.columns) + + overwrite_conflict = overwrite_conflict.reindex(index=idx, columns=cols, fill_value=False) + deletion_conflict = deletion_conflict.reindex(index=idx, columns=cols, fill_value=False) + + return overwrite_conflict | deletion_conflict def _format_image_id(img_id) -> str: @@ -476,45 +687,74 @@ def _format_keypoint_id(kp) -> str: def build_overwrite_conflict_report( key_conflict: pd.DataFrame, *, + deletion_conflict: pd.DataFrame | None = None, max_entries: int = 50, layer_name: str | None = None, destination_path: str | None = None, ) -> OverwriteConflictReport: """ - Convert a pandas key-conflict table into a UI-facing overwrite report. + Convert pandas key-conflict tables into a UI-facing overwrite report. Parameters ---------- key_conflict: - Boolean-like DataFrame indexed by frame/image identifier, with columns - representing keypoints. Truthy cells indicate a keypoint overwrite conflict. + Boolean DataFrame indexed by frame/image identifier, with columns + representing keypoints. + Truthy cells indicate an existing keypoint value + will be overwritten by another finite value. + + deletion_conflict: + Optional boolean DataFrame with the same shape convention. + Truthy cells indicate an existing keypoint value will be cleared/deleted, + i.e. written as missing values. Returns ------- OverwriteConflictReport - Plain-Python UI contract describing overwrite counts and detailed entries. - - Notes - ----- - This function is the pandas boundary for overwrite reporting. The UI should - depend only on OverwriteConflictReport, not on DataFrame structure. """ + if deletion_conflict is None: + deletion_conflict = pd.DataFrame( + False, + index=key_conflict.index, + columns=key_conflict.columns, + ) + + idx = key_conflict.index.union(deletion_conflict.index) + cols = key_conflict.columns.union(deletion_conflict.columns) + + key_conflict = key_conflict.reindex(index=idx, columns=cols, fill_value=False) + deletion_conflict = deletion_conflict.reindex(index=idx, columns=cols, fill_value=False) + + # Prefer showing a keypoint as deleted if both flags somehow appear. + # This avoids duplicated UI entries like "Modified" and "Deleted" for the same keypoint. + key_conflict = key_conflict & ~deletion_conflict + n_overwrites = int(key_conflict.to_numpy().sum()) - n_frames = int(key_conflict.any(axis=1).to_numpy().sum()) + n_deletions = int(deletion_conflict.to_numpy().sum()) + + affected = key_conflict | deletion_conflict + n_frames = int(affected.any(axis=1).to_numpy().sum()) entries: list[ConflictEntry] = [] - for img, row in key_conflict.iterrows(): - conflicted: list[str] = [] - for kp, flag in row.items(): + for img in idx: + modified: list[str] = [] + deleted: list[str] = [] + + for kp, flag in key_conflict.loc[img].items(): + if bool(flag): + modified.append(_format_keypoint_id(kp)) + + for kp, flag in deletion_conflict.loc[img].items(): if bool(flag): - conflicted.append(_format_keypoint_id(kp)) + deleted.append(_format_keypoint_id(kp)) - if conflicted: + if modified or deleted: entries.append( ConflictEntry( frame_label=_format_image_id(img), - keypoints=tuple(conflicted), + keypoints=tuple(modified), + deleted_keypoints=tuple(deleted), ) ) @@ -523,6 +763,7 @@ def build_overwrite_conflict_report( return OverwriteConflictReport( n_overwrites=n_overwrites, + n_deletions=n_deletions, n_frames=n_frames, entries=shown, truncated_entries=truncated, diff --git a/src/napari_deeplabcut/core/io.py b/src/napari_deeplabcut/core/io.py index 18091dce..4cf4dd1e 100644 --- a/src/napari_deeplabcut/core/io.py +++ b/src/napari_deeplabcut/core/io.py @@ -53,11 +53,12 @@ from napari_deeplabcut.config.supported_files import SUPPORTED_IMAGES, SUPPORTED_VIDEOS from napari_deeplabcut.core import schemas as dlc_schemas from napari_deeplabcut.core.dataframes import ( + complete_df_for_save, + drop_likelihood_columns, form_df_from_validated, guarantee_multiindex_rows, - harmonize_keypoint_column_index, - harmonize_keypoint_row_index, merge_multiple_scorers, + merge_save_df, restore_dlc_on_disk_header_shape, set_df_scorer, ) @@ -340,22 +341,6 @@ def form_df( return df -def _drop_likelihood_columns(df: pd.DataFrame) -> pd.DataFrame: - """Remove DLC likelihood columns from a dataframe if present.""" - # DLC-style wide dataframe: MultiIndex columns with a coords level - if isinstance(df.columns, pd.MultiIndex): - col_names = list(df.columns.names) - if "coords" in col_names: - mask = df.columns.get_level_values("coords").astype(str) != "likelihood" - return df.loc[:, mask] - - # Fallback for already-stacked / flat dataframes - if "likelihood" in df.columns: - return df.drop(columns="likelihood") - - return df - - def _drop_likelihood_from_header(header: DLCHeaderModel) -> DLCHeaderModel: """ Return, "names": names}) Return a header model with likelihood removed from the coords level, @@ -465,7 +450,7 @@ def writer(path: str, data: Any, attributes: dict) -> List[str] logger.debug("WRITE header bodyparts=%s", ctx.meta.header.bodyparts) logger.debug("WRITE props labels unique=%s", list(dict.fromkeys(map(str, attrs.properties.get("label", []))))[:30]) df_new = form_df_from_validated(ctx) - df_new = _drop_likelihood_columns(df_new) + df_new = drop_likelihood_columns(df_new) logger.debug("DF_NEW columns nlevels: %s", df_new.columns.nlevels) logger.debug("DF_NEW columns names: %s", df_new.columns.names) @@ -486,6 +471,8 @@ def writer(path: str, data: Any, attributes: dict) -> List[str] header_for_write = pts_meta.header.with_scorer(target_scorer) if target_scorer else pts_meta.header header_for_write = _drop_likelihood_from_header(header_for_write) + df_new = complete_df_for_save(df_new, pts_meta=pts_meta, header=header_for_write) + # Never write back to machine sources without an explicit promotion target if not out_path and source_kind == AnnotationKind.MACHINE: raise MissingProvenanceError("Cannot resolve provenance output path for MACHINE source.") @@ -540,7 +527,7 @@ def writer(path: str, data: Any, attributes: dict) -> List[str] # Merge-on-save for GT if destination_kind == AnnotationKind.GT and out.exists(): - df_old = _drop_likelihood_columns(_read_hdf_any_key(out)) + df_old = drop_likelihood_columns(_read_hdf_any_key(out)) # Harmonize indices and merge try: @@ -554,10 +541,7 @@ def writer(path: str, data: Any, attributes: dict) -> List[str] ) pass - df_new, df_old = harmonize_keypoint_row_index(df_new, df_old) - df_new = harmonize_keypoint_column_index(df_new) - df_old = harmonize_keypoint_column_index(df_old) - df_out = df_new.combine_first(df_old) + df_out = merge_save_df(df_old, df_new) else: df_out = df_new @@ -574,7 +558,7 @@ def writer(path: str, data: Any, attributes: dict) -> List[str] pts_meta=pts_meta, ) df_out = restore_dlc_on_disk_header_shape(df_out, header_for_write, is_ma_project=is_ma_project) - df_out = _drop_likelihood_columns(df_out) + df_out = drop_likelihood_columns(df_out) logger.debug("FINAL WRITE first columns=%s", list(df_out.columns[:10])) logger.debug( diff --git a/src/napari_deeplabcut/ui/dialogs.py b/src/napari_deeplabcut/ui/dialogs.py index 37094f78..4e733fa1 100644 --- a/src/napari_deeplabcut/ui/dialogs.py +++ b/src/napari_deeplabcut/ui/dialogs.py @@ -14,6 +14,7 @@ from napari.layers import Points from qtpy.QtCore import QPoint, Qt +from qtpy.QtGui import QPalette from qtpy.QtWidgets import ( QDialog, QFileDialog, @@ -33,8 +34,8 @@ import napari_deeplabcut.core.io as io from napari_deeplabcut.config.keybinds import iter_shortcuts +from napari_deeplabcut.config.models import OverwriteConflictReport from napari_deeplabcut.config.settings import get_overwrite_confirmation_enabled -from napari_deeplabcut.core.conflicts import OverwriteConflictReport # from napari_deeplabcut.core.dataframes import summarize_keypoint_conflicts @@ -516,6 +517,16 @@ def update_nav_buttons(self): # -------------------------------------------------------------------------------- # Headless labeled data resolved to existing config.yaml project dialog # -------------------------------------------------------------------------------- +def _inverted_parent_bg_hex(widget: QWidget) -> str: + """Return inverse of the parent widget background color as #RRGGBB.""" + ref = widget.parentWidget() or widget + bg = ref.palette().color(QPalette.Window) + inv_r = 255 - bg.red() + inv_g = 255 - bg.green() + inv_b = 255 - bg.blue() + return f"#{inv_r:02x}{inv_g:02x}{inv_b:02x}" + + class ProjectConfigPromptAction(str, Enum): ASSOCIATE = "associate" SKIP = "skip" @@ -744,9 +755,43 @@ def warn_existing_dataset_folder_conflict( # -------------------------------------------------------------------------------------- # Conflict resolution dialog for overwriting existing keypoints when saving annotations. # -------------------------------------------------------------------------------------- +def _conflict_summary_text(report: OverwriteConflictReport) -> str: + n_deletions = int(getattr(report, "n_deletions", 0) or 0) + n_overwrites = int(getattr(report, "n_overwrites", 0) or 0) + + if n_deletions and n_overwrites: + return ( + "Saving will modify existing annotations in the destination file.

" + "Deletion warning: Some keypoints will be deleted." + ) + + if n_deletions: + return "Saving will delete existing keypoints from the destination file.

" + + return "Saving will overwrite existing keypoints in the destination file." + + +def _conflict_affected_text(report: OverwriteConflictReport) -> str: + n_deletions = int(getattr(report, "n_deletions", 0) or 0) + n_overwrites = int(getattr(report, "n_overwrites", 0) or 0) + + parts: list[str] = [] + + if n_overwrites: + parts.append(f"{n_overwrites} keypoint overwrite(s)") + + if n_deletions: + parts.append(f"{n_deletions} keypoint deletion(s)") + + if not parts: + parts.append("No keypoint changes") + + return f"{' and '.join(parts)} across {report.n_frames} frame(s)/image(s)." + + class OverwriteConflictsDialog(QDialog): """ - Warning dialog listing keypoints that will be overwritten. + Warning dialog listing keypoints that will be modified or deleted. Design goals: - compact initial size @@ -765,6 +810,9 @@ def __init__( dest_text: str, affected_text: str, details: str, + details_label_text: str = "Conflicts (frame/image → keypoints):", + confirm_button_text: str = "Overwrite", + dangerous_default_cancel: bool = False, ): super().__init__(parent) self.setWindowTitle(title) @@ -775,14 +823,13 @@ def __init__( layout.setContentsMargins(12, 12, 12, 12) layout.setSpacing(8) - # Summary summary_label = QLabel(summary) summary_label.setWordWrap(True) + summary_label.setTextFormat(Qt.RichText) summary_label.setTextInteractionFlags(Qt.TextSelectableByMouse) summary_label.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) layout.addWidget(summary_label) - # Context block layer_label = QLabel(f"Layer: {layer_text}") layer_label.setWordWrap(True) layer_label.setTextInteractionFlags(Qt.TextSelectableByMouse) @@ -792,23 +839,29 @@ def __init__( dest_label = QLabel(f"Destination: {dest_text}") dest_label.setWordWrap(True) dest_label.setTextInteractionFlags(Qt.TextSelectableByMouse) - dest_label.setToolTip(dest_text) # helpful for long paths + dest_label.setToolTip(dest_text) dest_label.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) layout.addWidget(dest_label) affected_label = QLabel(f"Affected: {affected_text}") affected_label.setWordWrap(True) + affected_label.setTextFormat(Qt.RichText) affected_label.setTextInteractionFlags(Qt.TextSelectableByMouse) affected_label.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) layout.addWidget(affected_label) - # Detail section label - details_label = QLabel("Conflicts (frame/image → keypoints):") + details_label = QLabel(details_label_text) details_label.setWordWrap(True) details_label.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) layout.addWidget(details_label) - # Scrollable conflict list + separator = QFrame(self) + separator.setFrameShape(QFrame.NoFrame) + separator.setFixedHeight(1) + separator.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed) + separator.setStyleSheet(f"background-color: {_inverted_parent_bg_hex(self)}; border: none;") + layout.addWidget(separator) + text = QPlainTextEdit(self) text.setReadOnly(True) text.setPlainText(details) @@ -819,7 +872,6 @@ def __init__( text.setCenterOnScroll(False) self.text = text - # Keep default height compact, but allow the user to resize larger fm = text.fontMetrics() line_h = fm.lineSpacing() text.setMinimumHeight(line_h * 5 + 16) @@ -827,29 +879,29 @@ def __init__( layout.addWidget(text) - # Buttons btn_row = QHBoxLayout() btn_row.addStretch(1) self.cancel_btn = QPushButton("Cancel") self.cancel_btn.clicked.connect(self.reject) - self.overwrite_btn = QPushButton("Overwrite") - self.overwrite_btn.setDefault(True) - self.overwrite_btn.setAutoDefault(True) + self.overwrite_btn = QPushButton(confirm_button_text) self.overwrite_btn.clicked.connect(self.accept) + if dangerous_default_cancel: + self.cancel_btn.setDefault(True) + self.cancel_btn.setAutoDefault(True) + self.overwrite_btn.setDefault(False) + self.overwrite_btn.setAutoDefault(False) + else: + self.overwrite_btn.setDefault(True) + self.overwrite_btn.setAutoDefault(True) + btn_row.addWidget(self.cancel_btn) btn_row.addWidget(self.overwrite_btn) layout.addLayout(btn_row) - # Let content drive the initial size instead of hardcoding a large minimum self.adjustSize() - self.sizeHint() - # self.resize( - # min(max(hint.width(), 480), 820), - # min(max(hint.height(), 240), 520), - # ) @staticmethod def confirm( @@ -861,6 +913,9 @@ def confirm( affected_text: str, details: str, title: str = "Overwrite warning", + details_label_text: str = "Conflicts (frame/image → keypoints):", + confirm_button_text: str = "Overwrite", + dangerous_default_cancel: bool = False, ) -> bool: dlg = OverwriteConflictsDialog( parent, @@ -870,6 +925,9 @@ def confirm( dest_text=dest_text, affected_text=affected_text, details=details, + details_label_text=details_label_text, + confirm_button_text=confirm_button_text, + dangerous_default_cancel=dangerous_default_cancel, ) return dlg.exec_() == QDialog.Accepted @@ -884,11 +942,28 @@ def maybe_confirm_overwrite( if not get_overwrite_confirmation_enabled(): return True + n_deletions = int(getattr(report, "n_deletions", 0) or 0) + has_deletions = n_deletions > 0 + + title = "Confirm keypoint deletions" if has_deletions else "Overwrite warning" + + confirm_button_text = "Save keypoints" if has_deletions else "Save (overwrite) keypoints" + + details_label_text = ( + "Conflicts (frame/image → modified or deleted keypoints):" + if has_deletions + else "Conflicts (frame/image → keypoints):" + ) + return OverwriteConflictsDialog.confirm( parent, - summary="Saving will overwrite existing keypoints in the destination file.", - layer_text=report.layer_name or "Unknown layer", - dest_text=report.destination_path or "Unknown destination", - affected_text=f"{report.n_overwrites} keypoint overwrite(s) across {report.n_frames} frame(s)/image(s).", + title=title, + summary=_conflict_summary_text(report), + layer_text=html.escape(report.layer_name or "Unknown layer"), + dest_text=html.escape(report.destination_path or "Unknown destination"), + affected_text=_conflict_affected_text(report), details=report.details_text, + details_label_text=details_label_text, + confirm_button_text=confirm_button_text, + dangerous_default_cancel=has_deletions, ) diff --git a/src/napari_deeplabcut/ui/ui_dialogs/save.py b/src/napari_deeplabcut/ui/ui_dialogs/save.py index 467e8f1e..ecd4c5ed 100644 --- a/src/napari_deeplabcut/ui/ui_dialogs/save.py +++ b/src/napari_deeplabcut/ui/ui_dialogs/save.py @@ -746,6 +746,39 @@ def _maybe_prepare_project_path_override_metadata(self, layer: Points) -> tuple[ dataset_name=dataset_name, ) + if unresolved: + + def _unresolved_path_label(item) -> str: + # coerce_paths_to_dlc_row_keys may return unresolved path indexes. + # Convert those back to the original path for user-facing diagnostics. + if isinstance(item, int) and not isinstance(item, bool) and 0 <= item < len(paths): + return str(paths[item]) + return str(item) + + unresolved_labels = [_unresolved_path_label(p) for p in unresolved] + unresolved_preview = "\n".join(f" {p}" for p in unresolved_labels[:10]) + more = "" + if len(unresolved_labels) > 10: + more = f"\n … and {len(unresolved_labels) - 10} more path(s)" + + QMessageBox.warning( + self.parent, + "Cannot associate folder with DLC project", + ( + "Some image paths could not be safely rewritten to DLC dataset row keys.\n\n" + "The save operation has been cancelled to avoid writing unrelated or " + "ambiguous image paths into the destination CollectedData file.\n\n" + "Please report the issue and share the debug log if this is unexpected.\n\n" + f"Dataset: {dataset_name}\n" + f"Source folder: {source_root_path}\n\n" + "Unresolved path(s):\n" + f"{unresolved_preview}" + f"{more}" + ), + QMessageBox.Ok, + ) + return None, True + if not maybe_confirm_dataset_path_rewrite( self.parent, project_root=project_root,