@@ -63,6 +63,7 @@ static void _do_select_none(dt_lib_module_t* self);
6363static uint32_t _do_select_new (dt_lib_module_t * self );
6464static void _update_places_list (dt_lib_module_t * self );
6565static gboolean _update_files_list (dt_lib_module_t * self );
66+ static void _import_resolve_duplicates (dt_lib_module_t * self );
6667static void _update_folders_list (dt_lib_module_t * self );
6768static void _update_images_number (dt_lib_module_t * self ,
6869 const guint nb_sel );
@@ -84,6 +85,7 @@ typedef enum dt_import_cols_t
8485 DT_IMPORT_DATETIME , // file datetime
8586 DT_IMPORT_UI_DUPLICATE , // whether the image is a duplicate
8687 DT_IMPORT_TOOLTIP , // tooltip text
88+ DT_IMPORT_FILESIZE , // file size in bytes
8789 DT_IMPORT_NUM_COLS
8890} dt_import_cols_t ;
8991
@@ -369,75 +371,6 @@ static guint _import_from_camera_set_file_list(dt_lib_module_t *self)
369371 int nb = 0 ;
370372 const gboolean include_nonraws = !dt_conf_get_bool ("ui_last/import_ignore_nonraws" );
371373
372- // Metadata duplicate pre-pass: check files by size and timestamp.
373- // Group files with the same size and timestamp using a hash table.
374- // When a duplicate pair is found, flag the one with the lexicographically
375- // higher filename as a possible duplicate, referencing the primary file.
376- GHashTable * metadata_hash = g_hash_table_new_full (g_str_hash , g_str_equal , g_free , NULL );
377- for (GList * img = imgs ; img ; img = g_list_next (img ))
378- {
379- dt_camera_files_t * file = img -> data ;
380- const char * ext = g_strrstr (file -> filename , "." );
381- if (include_nonraws ||
382- (ext && ((dt_imageio_is_raw_by_extension (ext )) ||
383- !g_ascii_strncasecmp (ext , ".dng" , sizeof (".dng" )))))
384- {
385- // Only perform duplicate checking if valid metadata (size and timestamp) is present.
386- // If either is missing (0), we cannot reliably identify duplicates.
387- if (file -> size > 0 && file -> timestamp > 0 )
388- {
389- char * key = g_strdup_printf ("%" PRIu64 ":%" PRIu64 , (uint64_t )file -> size , (uint64_t )file -> timestamp );
390- dt_camera_files_t * existing = g_hash_table_lookup (metadata_hash , key );
391- if (existing == NULL )
392- {
393- g_hash_table_insert (metadata_hash , key , file );
394- file -> possible_duplicate = FALSE;
395- }
396- else
397- {
398- if (strcmp (file -> filename , existing -> filename ) > 0 )
399- {
400- file -> possible_duplicate = TRUE;
401- g_free (file -> duplicate_of );
402- file -> duplicate_of = g_strdup (existing -> filename );
403- g_free (key );
404- }
405- else
406- {
407- existing -> possible_duplicate = TRUE;
408- g_free (existing -> duplicate_of );
409- existing -> duplicate_of = g_strdup (file -> filename );
410- g_hash_table_insert (metadata_hash , key , file );
411- }
412- }
413- }
414- else
415- {
416- file -> possible_duplicate = FALSE;
417- }
418- }
419- }
420-
421- // Resolve duplicate chains so every duplicate points to the ultimate primary file.
422- for (GList * img = imgs ; img ; img = g_list_next (img ))
423- {
424- dt_camera_files_t * file = img -> data ;
425- if (file -> possible_duplicate && file -> size > 0 && file -> timestamp > 0 )
426- {
427- char * key = g_strdup_printf ("%" PRIu64 ":%" PRIu64 , (uint64_t )file -> size , (uint64_t )file -> timestamp );
428- dt_camera_files_t * primary = g_hash_table_lookup (metadata_hash , key );
429- if (primary && primary != file )
430- {
431- g_free (file -> duplicate_of );
432- file -> duplicate_of = g_strdup (primary -> filename );
433- }
434- g_free (key );
435- }
436- }
437- g_hash_table_destroy (metadata_hash );
438-
439- const gboolean hide_duplicates = dt_conf_get_bool ("ui_last/import_hide_duplicates" );
440-
441374 for (GList * img = imgs ; img ; img = g_list_next (img ))
442375 {
443376 dt_camera_files_t * file = img -> data ;
@@ -448,11 +381,6 @@ static guint _import_from_camera_set_file_list(dt_lib_module_t *self)
448381 (ext && ((dt_imageio_is_raw_by_extension (ext )) ||
449382 !g_ascii_strncasecmp (ext , ".dng" , sizeof (".dng" )))))
450383 {
451- // If configured to hide duplicate camera images, skip adding them to the UI list store
452- if (hide_duplicates && file -> possible_duplicate )
453- {
454- continue ;
455- }
456384 const time_t datetime = file -> timestamp ;
457385 GDateTime * dt_datetime = g_date_time_new_from_unix_local (datetime );
458386 gchar * dt_txt = g_date_time_format (dt_datetime , "%x %X" );
@@ -462,25 +390,15 @@ static guint _import_from_camera_set_file_list(dt_lib_module_t *self)
462390 const gboolean already_imported = dt_metadata_already_imported (basename , dtid );
463391 g_free (basename );
464392
465- // Generate a tooltip indicating the primary file of which this is a duplicate
466- gchar * tooltip = NULL ;
467- if (file -> possible_duplicate && file -> duplicate_of )
468- {
469- tooltip = g_strdup_printf (_ ("possible duplicate of %s" ), file -> duplicate_of );
470- }
471-
472393 gtk_list_store_insert_with_values (d -> from .store , NULL , -1 ,
473394 DT_IMPORT_UI_EXISTS , already_imported ? "✔" : " " ,
474395 DT_IMPORT_UI_FILENAME , file -> filename ,
475396 DT_IMPORT_FILENAME , file -> filename ,
476397 DT_IMPORT_UI_DATETIME , dt_txt ,
477398 DT_IMPORT_DATETIME , datetime ,
478399 DT_IMPORT_THUMB , d -> from .eye ,
479- // Columns showing duplicate indicator and mouseover tooltip
480- DT_IMPORT_UI_DUPLICATE , file -> possible_duplicate ? "⧉" : " " ,
481- DT_IMPORT_TOOLTIP , tooltip ,
400+ DT_IMPORT_FILESIZE , file -> size ,
482401 -1 );
483- g_free (tooltip );
484402 nb ++ ;
485403 g_free (dt_txt );
486404 g_date_time_unref (dt_datetime );
@@ -835,6 +753,9 @@ static void _import_add_file_callback(GObject *direnum,
835753 }
836754 else
837755 {
756+ // Scanning is complete; resolve duplicates in the store.
757+ _import_resolve_duplicates (self );
758+
838759 // Nothing more to parse, do select the images
839760 // according to the preference.
840761 uint32_t count_sel = 0 ;
@@ -896,6 +817,8 @@ static void _import_add_file_callback(GObject *direnum,
896817 const GFileType filetype = g_file_info_get_file_type (info );
897818 const time_t datetime =
898819 g_file_info_get_attribute_uint64 (info , G_FILE_ATTRIBUTE_TIME_MODIFIED );
820+ const uint64_t size =
821+ g_file_info_get_attribute_uint64 (info , G_FILE_ATTRIBUTE_STANDARD_SIZE );
899822
900823 /* g_file_info_get_is_hidden() always returns 0 on macOS, so we
901824 check if the filename starts with a '.' */
@@ -955,6 +878,7 @@ static void _import_add_file_callback(GObject *direnum,
955878 DT_IMPORT_UI_DATETIME , dt_txt ,
956879 DT_IMPORT_DATETIME , datetime ,
957880 DT_IMPORT_THUMB , d -> from .eye ,
881+ DT_IMPORT_FILESIZE , size ,
958882 -1 );
959883 d -> from .nb ++ ;
960884 g_free (dt_txt );
@@ -1014,7 +938,8 @@ static void _import_set_file_list(const gchar *folder,
1014938 G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME ","
1015939 G_FILE_ATTRIBUTE_TIME_MODIFIED ","
1016940 G_FILE_ATTRIBUTE_STANDARD_IS_HIDDEN ","
1017- G_FILE_ATTRIBUTE_STANDARD_TYPE ,
941+ G_FILE_ATTRIBUTE_STANDARD_TYPE ","
942+ G_FILE_ATTRIBUTE_STANDARD_SIZE ,
1018943 G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS ,
1019944 G_PRIORITY_LOW ,
1020945 d -> cancel_iter ,
@@ -1093,6 +1018,170 @@ static void _usefn_toggled(GtkWidget *widget,
10931018 _update_layout (self );
10941019}
10951020
1021+ /* Unified post-pass duplicate detector.
1022+ * Iterates through all rows currently in d->from.store, identifies groups of
1023+ * files with identical sizes and modification timestamps, and marks them.
1024+ * The lexicographically smallest filename in each group is treated as the primary file,
1025+ * and all others are flagged as duplicates. If "hide duplicates" is enabled, duplicate
1026+ * rows are removed from the store; otherwise, they are marked in the UI with a ⧉ symbol
1027+ * and a tooltip pointing to their primary file.
1028+ */
1029+ static void _import_resolve_duplicates (dt_lib_module_t * self )
1030+ {
1031+ dt_lib_import_t * d = self -> data ;
1032+ const gboolean hide_duplicates = dt_conf_get_bool ("ui_last/import_hide_duplicates" );
1033+
1034+ // Hash table to group files by size and timestamp.
1035+ // Key: "size:timestamp", Value: primary_info_t struct of the current primary.
1036+ GHashTable * metadata_hash = g_hash_table_new_full (g_str_hash , g_str_equal , g_free , NULL );
1037+
1038+ typedef struct {
1039+ char * filename ;
1040+ GtkTreeRowReference * row_ref ;
1041+ } primary_info_t ;
1042+
1043+ GtkTreeModel * model = GTK_TREE_MODEL (d -> from .store );
1044+ GtkTreeIter iter ;
1045+ gboolean valid = gtk_tree_model_get_iter_first (model , & iter );
1046+
1047+ // Keep a list of duplicate row references so we can update or remove them in a separate pass.
1048+ GList * duplicate_rows = NULL ;
1049+
1050+ // Pass 1: Identify all duplicates and track the lexicographical minimum filename for each size/timestamp.
1051+ while (valid )
1052+ {
1053+ char * filename = NULL ;
1054+ uint64_t timestamp = 0 ;
1055+ uint64_t size = 0 ;
1056+
1057+ gtk_tree_model_get (model , & iter ,
1058+ DT_IMPORT_FILENAME , & filename ,
1059+ DT_IMPORT_DATETIME , & timestamp ,
1060+ DT_IMPORT_FILESIZE , & size ,
1061+ -1 );
1062+
1063+ if (size > 0 && timestamp > 0 && filename )
1064+ {
1065+ char * key = g_strdup_printf ("%" PRIu64 ":%" PRIu64 , size , timestamp );
1066+ primary_info_t * primary = g_hash_table_lookup (metadata_hash , key );
1067+
1068+ GtkTreePath * path = gtk_tree_model_get_path (model , & iter );
1069+ GtkTreeRowReference * row_ref = gtk_tree_row_reference_new (model , path );
1070+ gtk_tree_path_free (path );
1071+
1072+ if (primary == NULL )
1073+ {
1074+ // First time seeing this size/timestamp. Mark this file as the primary.
1075+ primary = g_malloc0 (sizeof (primary_info_t ));
1076+ primary -> filename = g_strdup (filename );
1077+ primary -> row_ref = row_ref ;
1078+ g_hash_table_insert (metadata_hash , key , primary );
1079+ }
1080+ else
1081+ {
1082+ // Existing primary found. Compare filenames to find the lexicographically smaller one.
1083+ if (strcmp (filename , primary -> filename ) > 0 )
1084+ {
1085+ // New file has a larger name; it is a duplicate of the existing primary.
1086+ duplicate_rows = g_list_append (duplicate_rows , row_ref );
1087+ g_free (key );
1088+ }
1089+ else
1090+ {
1091+ // New file has a smaller name; it becomes the new primary, and the old primary is demoted to a duplicate.
1092+ duplicate_rows = g_list_append (duplicate_rows , primary -> row_ref );
1093+
1094+ g_free (primary -> filename );
1095+ primary -> filename = g_strdup (filename );
1096+ primary -> row_ref = row_ref ;
1097+ g_hash_table_insert (metadata_hash , key , primary );
1098+ }
1099+ }
1100+ }
1101+ g_free (filename );
1102+ valid = gtk_tree_model_iter_next (model , & iter );
1103+ }
1104+
1105+ // Pass 2: Update UI details or remove rows for duplicates.
1106+ for (GList * node = duplicate_rows ; node ; node = g_list_next (node ))
1107+ {
1108+ GtkTreeRowReference * ref = node -> data ;
1109+ if (gtk_tree_row_reference_valid (ref ))
1110+ {
1111+ GtkTreePath * path = gtk_tree_row_reference_get_path (ref );
1112+ gtk_tree_model_get_iter (model , & iter , path );
1113+ gtk_tree_path_free (path );
1114+
1115+ char * filename = NULL ;
1116+ uint64_t timestamp = 0 ;
1117+ uint64_t size = 0 ;
1118+ gtk_tree_model_get (model , & iter ,
1119+ DT_IMPORT_FILENAME , & filename ,
1120+ DT_IMPORT_DATETIME , & timestamp ,
1121+ DT_IMPORT_FILESIZE , & size ,
1122+ -1 );
1123+
1124+ if (size > 0 && timestamp > 0 )
1125+ {
1126+ char * key = g_strdup_printf ("%" PRIu64 ":%" PRIu64 , size , timestamp );
1127+ primary_info_t * primary = g_hash_table_lookup (metadata_hash , key );
1128+
1129+ // Ensure every duplicate links directly to the resolved primary (resolves duplicate chains).
1130+ if (primary && strcmp (filename , primary -> filename ) != 0 )
1131+ {
1132+ if (!hide_duplicates )
1133+ {
1134+ // Update the duplicate indicator and mouseover tooltip.
1135+ char * tooltip = g_strdup_printf (_ ("possible duplicate of %s" ), primary -> filename );
1136+ gtk_list_store_set (d -> from .store , & iter ,
1137+ DT_IMPORT_UI_DUPLICATE , "⧉" ,
1138+ DT_IMPORT_TOOLTIP , tooltip ,
1139+ -1 );
1140+ g_free (tooltip );
1141+ }
1142+ }
1143+ g_free (key );
1144+ }
1145+ g_free (filename );
1146+ }
1147+ }
1148+
1149+ // Pass 3: If configured to hide duplicates, remove them from the list store.
1150+ if (hide_duplicates )
1151+ {
1152+ for (GList * node = duplicate_rows ; node ; node = g_list_next (node ))
1153+ {
1154+ GtkTreeRowReference * ref = node -> data ;
1155+ if (gtk_tree_row_reference_valid (ref ))
1156+ {
1157+ GtkTreePath * path = gtk_tree_row_reference_get_path (ref );
1158+ gtk_tree_model_get_iter (model , & iter , path );
1159+ gtk_tree_path_free (path );
1160+ gtk_list_store_remove (d -> from .store , & iter );
1161+ }
1162+ }
1163+ }
1164+
1165+ // Cleanup duplicate row references.
1166+ g_list_free_full (duplicate_rows , (GDestroyNotify )gtk_tree_row_reference_free );
1167+
1168+ // Cleanup hash table keys and custom structures.
1169+ GHashTableIter hash_iter ;
1170+ gpointer key , value ;
1171+ g_hash_table_iter_init (& hash_iter , metadata_hash );
1172+ while (g_hash_table_iter_next (& hash_iter , & key , & value ))
1173+ {
1174+ primary_info_t * p = value ;
1175+ g_free (p -> filename );
1176+ gtk_tree_row_reference_free (p -> row_ref );
1177+ g_free (p );
1178+ }
1179+ g_hash_table_destroy (metadata_hash );
1180+
1181+ // Sync d->from.nb with the final populated row count in the store.
1182+ d -> from .nb = gtk_tree_model_iter_n_children (model , NULL );
1183+ }
1184+
10961185static gboolean _update_files_list (dt_lib_module_t * self )
10971186{
10981187 dt_lib_import_t * d = self -> data ;
@@ -1117,6 +1206,7 @@ static gboolean _update_files_list(dt_lib_module_t *self)
11171206 {
11181207 d -> from .nb = _import_from_camera_set_file_list (self );
11191208 gtk_widget_hide (d -> from .info );
1209+ _import_resolve_duplicates (self );
11201210 // Restore the user-selected sort column and sort order if available,
11211211 // otherwise fallback to filename sorting.
11221212 if (has_sort && sort_column_id != GTK_TREE_SORTABLE_UNSORTED_SORT_COLUMN_ID )
@@ -1956,7 +2046,7 @@ static void _set_files_list(GtkWidget *rbox, dt_lib_module_t* self)
19562046 G_TYPE_STRING , G_TYPE_STRING ,
19572047 G_TYPE_STRING , G_TYPE_STRING ,
19582048 G_TYPE_UINT64 , G_TYPE_STRING ,
1959- G_TYPE_STRING );
2049+ G_TYPE_STRING , G_TYPE_UINT64 );
19602050 d -> from .eye = dt_draw_paint_to_pixbuf (GTK_WIDGET (d -> from .dialog ), 13 , 0 ,
19612051 dtgtk_cairo_paint_eye );
19622052
@@ -1977,15 +2067,15 @@ static void _set_files_list(GtkWidget *rbox, dt_lib_module_t* self)
19772067 GtkWidget * header = gtk_tree_view_column_get_button (column );
19782068 gtk_widget_set_tooltip_text (header , _ ("mark already imported images" ));
19792069
1980- // Add a skinny 25px duplicate column next to the exists column (only visible for camera imports)
2070+ // Add a skinny 25px duplicate column next to the exists column
19812071 renderer = gtk_cell_renderer_text_new ();
19822072 column = gtk_tree_view_column_new_with_attributes ("⧉" , renderer , "text" ,
19832073 DT_IMPORT_UI_DUPLICATE , NULL );
19842074 g_object_set (renderer , "xalign" , 0.5 , NULL );
19852075 gtk_tree_view_append_column (d -> from .treeview , column );
19862076 gtk_tree_view_column_set_alignment (column , 0.5 );
19872077 gtk_tree_view_column_set_min_width (column , DT_PIXEL_APPLY_DPI (25 ));
1988- gtk_tree_view_column_set_visible (column , d -> import_case == DT_IMPORT_CAMERA );
2078+ gtk_tree_view_column_set_visible (column , TRUE );
19892079 header = gtk_tree_view_column_get_button (column );
19902080 if (header )
19912081 gtk_widget_set_tooltip_text (header , _ ("mark possible duplicate images" ));
@@ -2216,16 +2306,13 @@ static void _import_from_dialog_new(dt_lib_module_t* self)
22162306 g_signal_connect (G_OBJECT (ignore_nonraws ), "toggled" ,
22172307 G_CALLBACK (_ignore_nonraws_toggled ), self );
22182308
2219- // For camera imports, add a checkbox to hide duplicate images and connect its toggled signal
2220- if (d -> import_case == DT_IMPORT_CAMERA )
2221- {
2222- col = 0 ;
2223- GtkWidget * hide_duplicates =
2224- dt_gui_preferences_bool (grid , "ui_last/import_hide_duplicates" , col ++ , line , TRUE);
2225- gtk_widget_set_hexpand (gtk_grid_get_child_at (grid , col ++ , line ++ ), TRUE);
2226- g_signal_connect (G_OBJECT (hide_duplicates ), "toggled" ,
2227- G_CALLBACK (_hide_duplicates_toggled ), self );
2228- }
2309+ // Add a checkbox to hide duplicate images and connect its toggled signal
2310+ col = 0 ;
2311+ GtkWidget * hide_duplicates =
2312+ dt_gui_preferences_bool (grid , "ui_last/import_hide_duplicates" , col ++ , line , TRUE);
2313+ gtk_widget_set_hexpand (gtk_grid_get_child_at (grid , col ++ , line ++ ), TRUE);
2314+ g_signal_connect (G_OBJECT (hide_duplicates ), "toggled" ,
2315+ G_CALLBACK (_hide_duplicates_toggled ), self );
22292316
22302317 gtk_box_pack_start (GTK_BOX (rbox ), GTK_WIDGET (grid ), FALSE, FALSE, 8 );
22312318
0 commit comments