Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions features/media-import.feature
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,69 @@ Feature: Manage WordPress attachments
"""
/foo/large-image.jpg
"""

Scenario: Skip importing a local file that was already imported
Given download:
| path | url |
| {CACHE_DIR}/large-image.jpg | http://wp-cli.github.io/behat-data/large-image.jpg |

When I run `wp media import {CACHE_DIR}/large-image.jpg --porcelain`
Then save STDOUT as {ATTACHMENT_ID}
And STDOUT should not be empty

When I run `wp media import {CACHE_DIR}/large-image.jpg --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
already exists as attachment ID {ATTACHMENT_ID}
"""
And STDOUT should contain:
"""
Success: Imported 0 of 1 items (1 skipped).
"""
And the return code should be 0

Scenario: Skip importing a remote file that was already imported
When I run `wp media import 'http://wp-cli.github.io/behat-data/codeispoetry.png' --porcelain`
Then save STDOUT as {ATTACHMENT_ID}
And STDOUT should not be empty

When I run `wp media import 'http://wp-cli.github.io/behat-data/codeispoetry.png' --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
already exists as attachment ID {ATTACHMENT_ID}
"""
And STDOUT should contain:
"""
Success: Imported 0 of 1 items (1 skipped).
"""
And the return code should be 0

Scenario: Import new file while skipping duplicates from a batch
Given download:
| path | url |
| {CACHE_DIR}/large-image.jpg | http://wp-cli.github.io/behat-data/large-image.jpg |

When I run `wp media import {CACHE_DIR}/large-image.jpg`
Then STDOUT should contain:
"""
Success: Imported 1 of 1 items.
"""

When I run `wp media import {CACHE_DIR}/large-image.jpg 'http://wp-cli.github.io/behat-data/codeispoetry.png' --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
Success: Imported 1 of 2 items (1 skipped).
"""
And the return code should be 0
91 changes: 88 additions & 3 deletions src/Media_Command.php
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@ public function prune( $args, $assoc_args = array() ) {
* [--featured_image]
* : If set, set the imported image as the Featured Image of the post it is attached to.
*
* [--skip-duplicates]
* : If set, media files that have already been imported will be skipped.
*
* [--porcelain[=<field>]]
* : Output a single field for each imported image. Defaults to attachment ID when used as flag.
* ---
Expand Down Expand Up @@ -465,7 +468,7 @@ public function prune( $args, $assoc_args = array() ) {
* Success: Imported 1 of 1 items.
*
* @param string[] $args Positional arguments.
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, 'skip-duplicates'?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
* @return void
*/
public function import( $args, $assoc_args = array() ) {
Expand Down Expand Up @@ -518,6 +521,7 @@ public function import( $args, $assoc_args = array() ) {
$number = 0;
$successes = 0;
$errors = 0;
$skips = 0;
foreach ( $args as $file ) {
++$number;
if ( 0 === $number % self::WP_CLEAR_OBJECT_CACHE_INTERVAL ) {
Expand Down Expand Up @@ -603,6 +607,16 @@ public function import( $args, $assoc_args = array() ) {
++$errors;
continue;
}
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
$existing = $this->find_duplicate_attachment( Path::basename( $file ) );
if ( false !== $existing ) {
Comment thread
swissspidy marked this conversation as resolved.
if ( ! $porcelain ) {
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
}
++$skips;
continue;
}
}
if ( Utils\get_flag_value( $assoc_args, 'skip-copy' ) ) {
$tempfile = $file;
} else {
Expand All @@ -614,6 +628,16 @@ public function import( $args, $assoc_args = array() ) {
$file_time = @filemtime( $file );
}
} else {
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
$existing = $this->find_duplicate_attachment( (string) explode( '?', Path::basename( $file ), 2 )[0] );
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The expression (string) explode( '?', Path::basename( $file ), 2 )[0] is calculated twice for each remote file (here and again on line 653). For better performance in bulk import operations and improved maintainability, consider extracting this to a variable (e.g., $basename) before the duplicate check and reusing it.

if ( false !== $existing ) {
if ( ! $porcelain ) {
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
}
++$skips;
continue;
}
}
$tempfile = download_url( $file );
if ( is_wp_error( $tempfile ) ) {
WP_CLI::warning(
Expand All @@ -626,7 +650,7 @@ public function import( $args, $assoc_args = array() ) {
++$errors;
continue;
}
$name = (string) strtok( Path::basename( $file ), '?' );
$name = (string) explode( '?', Path::basename( $file ), 2 )[0];
}
}

Expand Down Expand Up @@ -769,7 +793,7 @@ public function import( $args, $assoc_args = array() ) {

// Report the result of the operation
if ( ! Utils\get_flag_value( $assoc_args, 'porcelain' ) ) {
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors );
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors, Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ? $skips : null );
} elseif ( $errors ) {
WP_CLI::halt( 1 );
}
Expand Down Expand Up @@ -1081,6 +1105,67 @@ private function make_copy( $path ) {
return $filename;
}

/**
* Finds an existing attachment whose basename matches the given filename.
*
* Searches the `_wp_attached_file` post meta, which stores the path relative to
* the uploads directory (e.g. '2026/03/image.jpg' or just 'image.jpg'). Also
* checks for the WP 5.3+ big-image scaled variant (e.g. 'image-scaled.jpg') so
* that re-importing a large file that was scaled on first import is correctly
* detected as a duplicate. Matches the first attachment found when multiple files
* share the same basename across different upload subdirectories.
*
* @param string $basename Filename basename to search for (e.g. 'image.jpg').
* @return int|false Attachment ID if found, false otherwise.
*/
private function find_duplicate_attachment( $basename ) {
global $wpdb;

// WP 5.3+ big-image scaling renames 'image.jpg' → 'image-scaled.jpg' and
// stores the scaled name in _wp_attached_file, so search for both variants.
$ext = pathinfo( $basename, PATHINFO_EXTENSION );
$name = pathinfo( $basename, PATHINFO_FILENAME );
$scaled_basename = $name . '-scaled' . ( $ext ? '.' . $ext : '' );
Comment thread
swissspidy marked this conversation as resolved.

$slash_basename = '/' . $basename;
$slash_scaled_basename = '/' . $scaled_basename;

if ( function_exists( 'mb_strlen' ) ) {
$slash_basename_length = mb_strlen( $slash_basename, 'UTF-8' );
$slash_scaled_basename_length = mb_strlen( $slash_scaled_basename, 'UTF-8' );
} else {
$slash_basename_length = strlen( $slash_basename );
$slash_scaled_basename_length = strlen( $slash_scaled_basename );
}

$result = $wpdb->get_var(
Comment thread
swissspidy marked this conversation as resolved.
Outdated
$wpdb->prepare(
"SELECT p.ID
FROM {$wpdb->posts} p
INNER JOIN {$wpdb->postmeta} pm
ON p.ID = pm.post_id
WHERE p.post_type = 'attachment'
AND p.post_status != 'trash'
AND pm.meta_key = '_wp_attached_file'
AND (
pm.meta_value = %s
OR RIGHT(pm.meta_value, %d) = %s
OR pm.meta_value = %s
OR RIGHT(pm.meta_value, %d) = %s
)
LIMIT 1",
$basename,
$slash_basename_length,
$slash_basename,
$scaled_basename,
$slash_scaled_basename_length,
$slash_scaled_basename
)
);

return $result ? (int) $result : false;
}

/**
* Returns a human-readable description for one or more image size names.
*
Expand Down
Loading