Skip to content

Commit cd7eb24

Browse files
Copilotswissspidy
andcommitted
Add --skip-duplicates flag to wp media import command
Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com>
1 parent 4f75073 commit cd7eb24

2 files changed

Lines changed: 126 additions & 2 deletions

File tree

features/media-import.feature

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,69 @@ Feature: Manage WordPress attachments
318318
"""
319319
/foo/large-image.jpg
320320
"""
321+
322+
Scenario: Skip importing a local file that was already imported
323+
Given download:
324+
| path | url |
325+
| {CACHE_DIR}/large-image.jpg | http://wp-cli.org/behat-data/large-image.jpg |
326+
327+
When I run `wp media import {CACHE_DIR}/large-image.jpg --porcelain`
328+
Then save STDOUT as {ATTACHMENT_ID}
329+
And STDOUT should not be empty
330+
331+
When I run `wp media import {CACHE_DIR}/large-image.jpg --skip-duplicates`
332+
Then STDOUT should contain:
333+
"""
334+
Skipped importing file
335+
"""
336+
And STDOUT should contain:
337+
"""
338+
already exists as attachment ID {ATTACHMENT_ID}
339+
"""
340+
And STDOUT should contain:
341+
"""
342+
Success: Imported 0 of 1 items (1 skipped).
343+
"""
344+
And the return code should be 0
345+
346+
Scenario: Skip importing a remote file that was already imported
347+
When I run `wp media import 'http://wp-cli.org/behat-data/codeispoetry.png' --porcelain`
348+
Then save STDOUT as {ATTACHMENT_ID}
349+
And STDOUT should not be empty
350+
351+
When I run `wp media import 'http://wp-cli.org/behat-data/codeispoetry.png' --skip-duplicates`
352+
Then STDOUT should contain:
353+
"""
354+
Skipped importing file
355+
"""
356+
And STDOUT should contain:
357+
"""
358+
already exists as attachment ID {ATTACHMENT_ID}
359+
"""
360+
And STDOUT should contain:
361+
"""
362+
Success: Imported 0 of 1 items (1 skipped).
363+
"""
364+
And the return code should be 0
365+
366+
Scenario: Import new file while skipping duplicates from a batch
367+
Given download:
368+
| path | url |
369+
| {CACHE_DIR}/large-image.jpg | http://wp-cli.org/behat-data/large-image.jpg |
370+
371+
When I run `wp media import {CACHE_DIR}/large-image.jpg`
372+
Then STDOUT should contain:
373+
"""
374+
Success: Imported 1 of 1 items.
375+
"""
376+
377+
When I run `wp media import {CACHE_DIR}/large-image.jpg 'http://wp-cli.org/behat-data/codeispoetry.png' --skip-duplicates`
378+
Then STDOUT should contain:
379+
"""
380+
Skipped importing file
381+
"""
382+
And STDOUT should contain:
383+
"""
384+
Success: Imported 1 of 2 items (1 skipped).
385+
"""
386+
And the return code should be 0

src/Media_Command.php

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,9 @@ public function regenerate( $args, $assoc_args = array() ) {
270270
* [--featured_image]
271271
* : If set, set the imported image as the Featured Image of the post it is attached to.
272272
*
273+
* [--skip-duplicates]
274+
* : If set, media files that have already been imported will be skipped.
275+
*
273276
* [--porcelain[=<field>]]
274277
* : Output a single field for each imported image. Defaults to attachment ID when used as flag.
275278
* ---
@@ -308,7 +311,7 @@ public function regenerate( $args, $assoc_args = array() ) {
308311
* http://wordpress-develop.dev/wp-header-logo/
309312
*
310313
* @param string[] $args Positional arguments.
311-
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
314+
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, 'skip-duplicates'?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
312315
* @return void
313316
*/
314317
public function import( $args, $assoc_args = array() ) {
@@ -361,6 +364,7 @@ public function import( $args, $assoc_args = array() ) {
361364
$number = 0;
362365
$successes = 0;
363366
$errors = 0;
367+
$skips = 0;
364368
foreach ( $args as $file ) {
365369
++$number;
366370
if ( 0 === $number % self::WP_CLEAR_OBJECT_CACHE_INTERVAL ) {
@@ -379,6 +383,16 @@ public function import( $args, $assoc_args = array() ) {
379383
++$errors;
380384
continue;
381385
}
386+
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
387+
$existing = $this->find_duplicate_attachment( Utils\basename( $file ), false );
388+
if ( false !== $existing ) {
389+
if ( ! $porcelain ) {
390+
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
391+
}
392+
++$skips;
393+
continue;
394+
}
395+
}
382396
if ( Utils\get_flag_value( $assoc_args, 'skip-copy' ) ) {
383397
$tempfile = $file;
384398
} else {
@@ -390,6 +404,16 @@ public function import( $args, $assoc_args = array() ) {
390404
$file_time = @filemtime( $file );
391405
}
392406
} else {
407+
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
408+
$existing = $this->find_duplicate_attachment( $file, true );
409+
if ( false !== $existing ) {
410+
if ( ! $porcelain ) {
411+
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
412+
}
413+
++$skips;
414+
continue;
415+
}
416+
}
393417
$tempfile = download_url( $file );
394418
if ( is_wp_error( $tempfile ) ) {
395419
WP_CLI::warning(
@@ -542,7 +566,7 @@ public function import( $args, $assoc_args = array() ) {
542566

543567
// Report the result of the operation
544568
if ( ! Utils\get_flag_value( $assoc_args, 'porcelain' ) ) {
545-
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors );
569+
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors, $skips );
546570
} elseif ( $errors ) {
547571
WP_CLI::halt( 1 );
548572
}
@@ -692,6 +716,40 @@ private function make_copy( $path ) {
692716
return $filename;
693717
}
694718

719+
/**
720+
* Finds an existing attachment by filename or source URL.
721+
*
722+
* For local files, matches against the basename of the `_wp_attached_file` meta value.
723+
* This will match the first attachment found when multiple files share the same basename
724+
* in different upload subdirectories.
725+
*
726+
* @param string $file_or_name Basename of the local file, or full URL for remote files.
727+
* @param bool $is_remote Whether to search by source URL (remote) or by filename (local).
728+
* @return int|false Attachment ID if found, false otherwise.
729+
*/
730+
private function find_duplicate_attachment( $file_or_name, $is_remote ) {
731+
global $wpdb;
732+
733+
if ( $is_remote ) {
734+
$result = $wpdb->get_var(
735+
$wpdb->prepare(
736+
"SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_source_url' AND meta_value = %s LIMIT 1",
737+
$file_or_name
738+
)
739+
);
740+
} else {
741+
$result = $wpdb->get_var(
742+
$wpdb->prepare(
743+
"SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_wp_attached_file' AND (meta_value = %s OR meta_value LIKE %s) LIMIT 1",
744+
$file_or_name,
745+
'%/' . $wpdb->esc_like( $file_or_name )
746+
)
747+
);
748+
}
749+
750+
return $result ? (int) $result : false;
751+
}
752+
695753
/**
696754
* Returns a human-readable description for one or more image size names.
697755
*

0 commit comments

Comments
 (0)