Skip to content

Commit bf9fdcf

Browse files
committed
add the "skip duplicates" options to the "add to library" and
"copy&import" dialogs.
1 parent 4837fac commit bf9fdcf

1 file changed

Lines changed: 186 additions & 99 deletions

File tree

src/libs/import.c

Lines changed: 186 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ static void _do_select_none(dt_lib_module_t* self);
6363
static uint32_t _do_select_new(dt_lib_module_t* self);
6464
static void _update_places_list(dt_lib_module_t* self);
6565
static gboolean _update_files_list(dt_lib_module_t* self);
66+
static void _import_resolve_duplicates(dt_lib_module_t *self);
6667
static void _update_folders_list(dt_lib_module_t* self);
6768
static void _update_images_number(dt_lib_module_t* self,
6869
const guint nb_sel);
@@ -84,6 +85,7 @@ typedef enum dt_import_cols_t
8485
DT_IMPORT_DATETIME, // file datetime
8586
DT_IMPORT_UI_DUPLICATE, // whether the image is a duplicate
8687
DT_IMPORT_TOOLTIP, // tooltip text
88+
DT_IMPORT_FILESIZE, // file size in bytes
8789
DT_IMPORT_NUM_COLS
8890
} dt_import_cols_t;
8991

@@ -369,75 +371,6 @@ static guint _import_from_camera_set_file_list(dt_lib_module_t *self)
369371
int nb = 0;
370372
const gboolean include_nonraws = !dt_conf_get_bool("ui_last/import_ignore_nonraws");
371373

372-
// Metadata duplicate pre-pass: check files by size and timestamp.
373-
// Group files with the same size and timestamp using a hash table.
374-
// When a duplicate pair is found, flag the one with the lexicographically
375-
// higher filename as a possible duplicate, referencing the primary file.
376-
GHashTable *metadata_hash = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
377-
for(GList *img = imgs; img; img = g_list_next(img))
378-
{
379-
dt_camera_files_t *file = img->data;
380-
const char *ext = g_strrstr(file->filename, ".");
381-
if(include_nonraws ||
382-
(ext && ((dt_imageio_is_raw_by_extension(ext)) ||
383-
!g_ascii_strncasecmp(ext, ".dng", sizeof(".dng")))))
384-
{
385-
// Only perform duplicate checking if valid metadata (size and timestamp) is present.
386-
// If either is missing (0), we cannot reliably identify duplicates.
387-
if(file->size > 0 && file->timestamp > 0)
388-
{
389-
char *key = g_strdup_printf("%" PRIu64 ":%" PRIu64, (uint64_t)file->size, (uint64_t)file->timestamp);
390-
dt_camera_files_t *existing = g_hash_table_lookup(metadata_hash, key);
391-
if(existing == NULL)
392-
{
393-
g_hash_table_insert(metadata_hash, key, file);
394-
file->possible_duplicate = FALSE;
395-
}
396-
else
397-
{
398-
if(strcmp(file->filename, existing->filename) > 0)
399-
{
400-
file->possible_duplicate = TRUE;
401-
g_free(file->duplicate_of);
402-
file->duplicate_of = g_strdup(existing->filename);
403-
g_free(key);
404-
}
405-
else
406-
{
407-
existing->possible_duplicate = TRUE;
408-
g_free(existing->duplicate_of);
409-
existing->duplicate_of = g_strdup(file->filename);
410-
g_hash_table_insert(metadata_hash, key, file);
411-
}
412-
}
413-
}
414-
else
415-
{
416-
file->possible_duplicate = FALSE;
417-
}
418-
}
419-
}
420-
421-
// Resolve duplicate chains so every duplicate points to the ultimate primary file.
422-
for(GList *img = imgs; img; img = g_list_next(img))
423-
{
424-
dt_camera_files_t *file = img->data;
425-
if(file->possible_duplicate && file->size > 0 && file->timestamp > 0)
426-
{
427-
char *key = g_strdup_printf("%" PRIu64 ":%" PRIu64, (uint64_t)file->size, (uint64_t)file->timestamp);
428-
dt_camera_files_t *primary = g_hash_table_lookup(metadata_hash, key);
429-
if(primary && primary != file)
430-
{
431-
g_free(file->duplicate_of);
432-
file->duplicate_of = g_strdup(primary->filename);
433-
}
434-
g_free(key);
435-
}
436-
}
437-
g_hash_table_destroy(metadata_hash);
438-
439-
const gboolean hide_duplicates = dt_conf_get_bool("ui_last/import_hide_duplicates");
440-
441374
for(GList *img = imgs; img; img = g_list_next(img))
442375
{
443376
dt_camera_files_t *file = img->data;
@@ -448,11 +381,6 @@ static guint _import_from_camera_set_file_list(dt_lib_module_t *self)
448381
(ext && ((dt_imageio_is_raw_by_extension(ext)) ||
449382
!g_ascii_strncasecmp(ext, ".dng", sizeof(".dng")))))
450383
{
451-
// If configured to hide duplicate camera images, skip adding them to the UI list store
452-
if(hide_duplicates && file->possible_duplicate)
453-
{
454-
continue;
455-
}
456384
const time_t datetime = file->timestamp;
457385
GDateTime *dt_datetime = g_date_time_new_from_unix_local(datetime);
458386
gchar *dt_txt = g_date_time_format(dt_datetime, "%x %X");
@@ -462,25 +390,15 @@ static guint _import_from_camera_set_file_list(dt_lib_module_t *self)
462390
const gboolean already_imported = dt_metadata_already_imported(basename, dtid);
463391
g_free(basename);
464392

465-
// Generate a tooltip indicating the primary file of which this is a duplicate
466-
gchar *tooltip = NULL;
467-
if(file->possible_duplicate && file->duplicate_of)
468-
{
469-
tooltip = g_strdup_printf(_("possible duplicate of %s"), file->duplicate_of);
470-
}
471-
472393
gtk_list_store_insert_with_values(d->from.store, NULL, -1,
473394
DT_IMPORT_UI_EXISTS, already_imported ? "✔" : " ",
474395
DT_IMPORT_UI_FILENAME, file->filename,
475396
DT_IMPORT_FILENAME, file->filename,
476397
DT_IMPORT_UI_DATETIME, dt_txt,
477398
DT_IMPORT_DATETIME, datetime,
478399
DT_IMPORT_THUMB, d->from.eye,
479-
// Columns showing duplicate indicator and mouseover tooltip
480-
DT_IMPORT_UI_DUPLICATE, file->possible_duplicate ? "⧉" : " ",
481-
DT_IMPORT_TOOLTIP, tooltip,
400+
DT_IMPORT_FILESIZE, file->size,
482401
-1);
483-
g_free(tooltip);
484402
nb++;
485403
g_free(dt_txt);
486404
g_date_time_unref(dt_datetime);
@@ -835,6 +753,9 @@ static void _import_add_file_callback(GObject *direnum,
835753
}
836754
else
837755
{
756+
// Scanning is complete; resolve duplicates in the store.
757+
_import_resolve_duplicates(self);
758+
838759
// Nothing more to parse, do select the images
839760
// according to the preference.
840761
uint32_t count_sel = 0;
@@ -896,6 +817,8 @@ static void _import_add_file_callback(GObject *direnum,
896817
const GFileType filetype = g_file_info_get_file_type(info);
897818
const time_t datetime =
898819
g_file_info_get_attribute_uint64(info, G_FILE_ATTRIBUTE_TIME_MODIFIED);
820+
const uint64_t size =
821+
g_file_info_get_attribute_uint64(info, G_FILE_ATTRIBUTE_STANDARD_SIZE);
899822

900823
/* g_file_info_get_is_hidden() always returns 0 on macOS, so we
901824
check if the filename starts with a '.' */
@@ -955,6 +878,7 @@ static void _import_add_file_callback(GObject *direnum,
955878
DT_IMPORT_UI_DATETIME, dt_txt,
956879
DT_IMPORT_DATETIME, datetime,
957880
DT_IMPORT_THUMB, d->from.eye,
881+
DT_IMPORT_FILESIZE, size,
958882
-1);
959883
d->from.nb++;
960884
g_free(dt_txt);
@@ -1014,7 +938,8 @@ static void _import_set_file_list(const gchar *folder,
1014938
G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME ","
1015939
G_FILE_ATTRIBUTE_TIME_MODIFIED ","
1016940
G_FILE_ATTRIBUTE_STANDARD_IS_HIDDEN ","
1017-
G_FILE_ATTRIBUTE_STANDARD_TYPE,
941+
G_FILE_ATTRIBUTE_STANDARD_TYPE ","
942+
G_FILE_ATTRIBUTE_STANDARD_SIZE,
1018943
G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
1019944
G_PRIORITY_LOW,
1020945
d->cancel_iter,
@@ -1093,6 +1018,170 @@ static void _usefn_toggled(GtkWidget *widget,
10931018
_update_layout(self);
10941019
}
10951020

1021+
/* Unified post-pass duplicate detector.
1022+
* Iterates through all rows currently in d->from.store, identifies groups of
1023+
* files with identical sizes and modification timestamps, and marks them.
1024+
* The lexicographically smallest filename in each group is treated as the primary file,
1025+
* and all others are flagged as duplicates. If "hide duplicates" is enabled, duplicate
1026+
* rows are removed from the store; otherwise, they are marked in the UI with a ⧉ symbol
1027+
* and a tooltip pointing to their primary file.
1028+
*/
1029+
static void _import_resolve_duplicates(dt_lib_module_t *self)
1030+
{
1031+
dt_lib_import_t *d = self->data;
1032+
const gboolean hide_duplicates = dt_conf_get_bool("ui_last/import_hide_duplicates");
1033+
1034+
// Hash table to group files by size and timestamp.
1035+
// Key: "size:timestamp", Value: primary_info_t struct of the current primary.
1036+
GHashTable *metadata_hash = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
1037+
1038+
typedef struct {
1039+
char *filename;
1040+
GtkTreeRowReference *row_ref;
1041+
} primary_info_t;
1042+
1043+
GtkTreeModel *model = GTK_TREE_MODEL(d->from.store);
1044+
GtkTreeIter iter;
1045+
gboolean valid = gtk_tree_model_get_iter_first(model, &iter);
1046+
1047+
// Keep a list of duplicate row references so we can update or remove them in a separate pass.
1048+
GList *duplicate_rows = NULL;
1049+
1050+
// Pass 1: Identify all duplicates and track the lexicographical minimum filename for each size/timestamp.
1051+
while(valid)
1052+
{
1053+
char *filename = NULL;
1054+
uint64_t timestamp = 0;
1055+
uint64_t size = 0;
1056+
1057+
gtk_tree_model_get(model, &iter,
1058+
DT_IMPORT_FILENAME, &filename,
1059+
DT_IMPORT_DATETIME, &timestamp,
1060+
DT_IMPORT_FILESIZE, &size,
1061+
-1);
1062+
1063+
if(size > 0 && timestamp > 0 && filename)
1064+
{
1065+
char *key = g_strdup_printf("%" PRIu64 ":%" PRIu64, size, timestamp);
1066+
primary_info_t *primary = g_hash_table_lookup(metadata_hash, key);
1067+
1068+
GtkTreePath *path = gtk_tree_model_get_path(model, &iter);
1069+
GtkTreeRowReference *row_ref = gtk_tree_row_reference_new(model, path);
1070+
gtk_tree_path_free(path);
1071+
1072+
if(primary == NULL)
1073+
{
1074+
// First time seeing this size/timestamp. Mark this file as the primary.
1075+
primary = g_malloc0(sizeof(primary_info_t));
1076+
primary->filename = g_strdup(filename);
1077+
primary->row_ref = row_ref;
1078+
g_hash_table_insert(metadata_hash, key, primary);
1079+
}
1080+
else
1081+
{
1082+
// Existing primary found. Compare filenames to find the lexicographically smaller one.
1083+
if(strcmp(filename, primary->filename) > 0)
1084+
{
1085+
// New file has a larger name; it is a duplicate of the existing primary.
1086+
duplicate_rows = g_list_append(duplicate_rows, row_ref);
1087+
g_free(key);
1088+
}
1089+
else
1090+
{
1091+
// New file has a smaller name; it becomes the new primary, and the old primary is demoted to a duplicate.
1092+
duplicate_rows = g_list_append(duplicate_rows, primary->row_ref);
1093+
1094+
g_free(primary->filename);
1095+
primary->filename = g_strdup(filename);
1096+
primary->row_ref = row_ref;
1097+
g_hash_table_insert(metadata_hash, key, primary);
1098+
}
1099+
}
1100+
}
1101+
g_free(filename);
1102+
valid = gtk_tree_model_iter_next(model, &iter);
1103+
}
1104+
1105+
// Pass 2: Update UI details or remove rows for duplicates.
1106+
for(GList *node = duplicate_rows; node; node = g_list_next(node))
1107+
{
1108+
GtkTreeRowReference *ref = node->data;
1109+
if(gtk_tree_row_reference_valid(ref))
1110+
{
1111+
GtkTreePath *path = gtk_tree_row_reference_get_path(ref);
1112+
gtk_tree_model_get_iter(model, &iter, path);
1113+
gtk_tree_path_free(path);
1114+
1115+
char *filename = NULL;
1116+
uint64_t timestamp = 0;
1117+
uint64_t size = 0;
1118+
gtk_tree_model_get(model, &iter,
1119+
DT_IMPORT_FILENAME, &filename,
1120+
DT_IMPORT_DATETIME, &timestamp,
1121+
DT_IMPORT_FILESIZE, &size,
1122+
-1);
1123+
1124+
if(size > 0 && timestamp > 0)
1125+
{
1126+
char *key = g_strdup_printf("%" PRIu64 ":%" PRIu64, size, timestamp);
1127+
primary_info_t *primary = g_hash_table_lookup(metadata_hash, key);
1128+
1129+
// Ensure every duplicate links directly to the resolved primary (resolves duplicate chains).
1130+
if(primary && strcmp(filename, primary->filename) != 0)
1131+
{
1132+
if(!hide_duplicates)
1133+
{
1134+
// Update the duplicate indicator and mouseover tooltip.
1135+
char *tooltip = g_strdup_printf(_("possible duplicate of %s"), primary->filename);
1136+
gtk_list_store_set(d->from.store, &iter,
1137+
DT_IMPORT_UI_DUPLICATE, "⧉",
1138+
DT_IMPORT_TOOLTIP, tooltip,
1139+
-1);
1140+
g_free(tooltip);
1141+
}
1142+
}
1143+
g_free(key);
1144+
}
1145+
g_free(filename);
1146+
}
1147+
}
1148+
1149+
// Pass 3: If configured to hide duplicates, remove them from the list store.
1150+
if(hide_duplicates)
1151+
{
1152+
for(GList *node = duplicate_rows; node; node = g_list_next(node))
1153+
{
1154+
GtkTreeRowReference *ref = node->data;
1155+
if(gtk_tree_row_reference_valid(ref))
1156+
{
1157+
GtkTreePath *path = gtk_tree_row_reference_get_path(ref);
1158+
gtk_tree_model_get_iter(model, &iter, path);
1159+
gtk_tree_path_free(path);
1160+
gtk_list_store_remove(d->from.store, &iter);
1161+
}
1162+
}
1163+
}
1164+
1165+
// Cleanup duplicate row references.
1166+
g_list_free_full(duplicate_rows, (GDestroyNotify)gtk_tree_row_reference_free);
1167+
1168+
// Cleanup hash table keys and custom structures.
1169+
GHashTableIter hash_iter;
1170+
gpointer key, value;
1171+
g_hash_table_iter_init(&hash_iter, metadata_hash);
1172+
while(g_hash_table_iter_next(&hash_iter, &key, &value))
1173+
{
1174+
primary_info_t *p = value;
1175+
g_free(p->filename);
1176+
gtk_tree_row_reference_free(p->row_ref);
1177+
g_free(p);
1178+
}
1179+
g_hash_table_destroy(metadata_hash);
1180+
1181+
// Sync d->from.nb with the final populated row count in the store.
1182+
d->from.nb = gtk_tree_model_iter_n_children(model, NULL);
1183+
}
1184+
10961185
static gboolean _update_files_list(dt_lib_module_t *self)
10971186
{
10981187
dt_lib_import_t *d = self->data;
@@ -1117,6 +1206,7 @@ static gboolean _update_files_list(dt_lib_module_t *self)
11171206
{
11181207
d->from.nb = _import_from_camera_set_file_list(self);
11191208
gtk_widget_hide(d->from.info);
1209+
_import_resolve_duplicates(self);
11201210
// Restore the user-selected sort column and sort order if available,
11211211
// otherwise fallback to filename sorting.
11221212
if(has_sort && sort_column_id != GTK_TREE_SORTABLE_UNSORTED_SORT_COLUMN_ID)
@@ -1956,7 +2046,7 @@ static void _set_files_list(GtkWidget *rbox, dt_lib_module_t* self)
19562046
G_TYPE_STRING, G_TYPE_STRING,
19572047
G_TYPE_STRING, G_TYPE_STRING,
19582048
G_TYPE_UINT64, G_TYPE_STRING,
1959-
G_TYPE_STRING);
2049+
G_TYPE_STRING, G_TYPE_UINT64);
19602050
d->from.eye = dt_draw_paint_to_pixbuf(GTK_WIDGET(d->from.dialog), 13, 0,
19612051
dtgtk_cairo_paint_eye);
19622052

@@ -1977,15 +2067,15 @@ static void _set_files_list(GtkWidget *rbox, dt_lib_module_t* self)
19772067
GtkWidget *header = gtk_tree_view_column_get_button(column);
19782068
gtk_widget_set_tooltip_text(header, _("mark already imported images"));
19792069

1980-
// Add a skinny 25px duplicate column next to the exists column (only visible for camera imports)
2070+
// Add a skinny 25px duplicate column next to the exists column
19812071
renderer = gtk_cell_renderer_text_new();
19822072
column = gtk_tree_view_column_new_with_attributes("⧉", renderer, "text",
19832073
DT_IMPORT_UI_DUPLICATE, NULL);
19842074
g_object_set(renderer, "xalign", 0.5, NULL);
19852075
gtk_tree_view_append_column(d->from.treeview, column);
19862076
gtk_tree_view_column_set_alignment(column, 0.5);
19872077
gtk_tree_view_column_set_min_width(column, DT_PIXEL_APPLY_DPI(25));
1988-
gtk_tree_view_column_set_visible(column, d->import_case == DT_IMPORT_CAMERA);
2078+
gtk_tree_view_column_set_visible(column, TRUE);
19892079
header = gtk_tree_view_column_get_button(column);
19902080
if(header)
19912081
gtk_widget_set_tooltip_text(header, _("mark possible duplicate images"));
@@ -2216,16 +2306,13 @@ static void _import_from_dialog_new(dt_lib_module_t* self)
22162306
g_signal_connect(G_OBJECT(ignore_nonraws), "toggled",
22172307
G_CALLBACK(_ignore_nonraws_toggled), self);
22182308

2219-
// For camera imports, add a checkbox to hide duplicate images and connect its toggled signal
2220-
if(d->import_case == DT_IMPORT_CAMERA)
2221-
{
2222-
col = 0;
2223-
GtkWidget *hide_duplicates =
2224-
dt_gui_preferences_bool(grid, "ui_last/import_hide_duplicates", col++, line, TRUE);
2225-
gtk_widget_set_hexpand(gtk_grid_get_child_at(grid, col++, line++), TRUE);
2226-
g_signal_connect(G_OBJECT(hide_duplicates), "toggled",
2227-
G_CALLBACK(_hide_duplicates_toggled), self);
2228-
}
2309+
// Add a checkbox to hide duplicate images and connect its toggled signal
2310+
col = 0;
2311+
GtkWidget *hide_duplicates =
2312+
dt_gui_preferences_bool(grid, "ui_last/import_hide_duplicates", col++, line, TRUE);
2313+
gtk_widget_set_hexpand(gtk_grid_get_child_at(grid, col++, line++), TRUE);
2314+
g_signal_connect(G_OBJECT(hide_duplicates), "toggled",
2315+
G_CALLBACK(_hide_duplicates_toggled), self);
22292316

22302317
gtk_box_pack_start(GTK_BOX(rbox), GTK_WIDGET(grid), FALSE, FALSE, 8);
22312318

0 commit comments

Comments
 (0)