Skip to content

Commit d8d3532

Browse files
authored
Merge pull request #18744 from craftcms/feature/alt-exif-extract
Extract alt text from image metadata on upload
2 parents b3ff124 + 85bf82c commit d8d3532

2 files changed

Lines changed: 142 additions & 0 deletions

File tree

CHANGELOG-WIP.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
- Element indexes now automatically refresh after duplicating elements and the queue is completed, if there’s an active search term. ([#18636](https://github.com/craftcms/cms/issues/18636))
2323
- Timestamps in the control panel now include their time zone abbreviation. ([#18639](https://github.com/craftcms/cms/pull/18639))
2424
- Generated field values are no longer truncated within element cards. ([#18646](https://github.com/craftcms/cms/discussions/18646))
25+
- Assets’ Alternative Text values are now automatically set on upload, based on descriptive text data found in the uploaded file’s metadata. ([#18744](https://github.com/craftcms/cms/pull/18744))
2526

2627
### Administration
2728
- Sections now have a “Min Authors” setting. ([#18662](https://github.com/craftcms/cms/pull/18662))

src/elements/Asset.php

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@
7777
use DateTime;
7878
use GraphQL\Type\Definition\Type;
7979
use Illuminate\Support\Collection;
80+
use Imagick;
81+
use Throwable;
8082
use Twig\Markup;
8183
use yii\base\Exception;
8284
use yii\base\InvalidArgumentException;
@@ -3313,6 +3315,15 @@ private function _setKind(): void
33133315
public function afterSave(bool $isNew): void
33143316
{
33153317
if (!$this->propagating) {
3318+
// Auto-populate alt text from IPTC/XMP metadata on upload, before any cleaning strips it
3319+
if (
3320+
$this->alt === null &&
3321+
isset($this->tempFilePath) &&
3322+
in_array($this->getScenario(), [self::SCENARIO_CREATE, self::SCENARIO_REPLACE], true)
3323+
) {
3324+
$this->alt = $this->_getAltFromXmpMetadata($this->tempFilePath) ?? $this->_getAltFromIptcMetadata($this->tempFilePath);
3325+
}
3326+
33163327
// Are we uploading an image that needs to be sanitized?
33173328
if (
33183329
isset($this->tempFilePath) &&
@@ -3807,4 +3818,134 @@ private function allowTransforms(): bool
38073818
default => true,
38083819
};
38093820
}
3821+
3822+
/**
3823+
* Attempts to extract alt text from XMP metadata embedded in an image file.
3824+
* Checks Iptc4xmpCore:AltTextAccessibility first, then dc:description.
3825+
*
3826+
* @param string $filePath
3827+
* @return string|null
3828+
*/
3829+
private function _getAltFromXmpMetadata(string $filePath): ?string
3830+
{
3831+
try {
3832+
$xmp = null;
3833+
3834+
if (Craft::$app->getImages()->getIsImagick() && class_exists(Imagick::class)) {
3835+
$imagick = new Imagick($filePath);
3836+
$xmp = $imagick->getImageProfile('xmp') ?: null;
3837+
$imagick->clear();
3838+
}
3839+
3840+
if ($xmp === null) {
3841+
// Fall back to scanning the raw file for the XMP packet
3842+
$handle = fopen($filePath, 'rb');
3843+
if ($handle === false) {
3844+
return null;
3845+
}
3846+
$chunk = fread($handle, 131072); // 128KB covers the XMP packet in most images
3847+
fclose($handle);
3848+
3849+
if ($chunk !== false) {
3850+
$xmpStart = strpos($chunk, '<x:xmpmeta');
3851+
if ($xmpStart !== false) {
3852+
$xmpEnd = strpos($chunk, '</x:xmpmeta>', $xmpStart);
3853+
if ($xmpEnd !== false) {
3854+
$xmp = substr($chunk, $xmpStart, $xmpEnd - $xmpStart + strlen('</x:xmpmeta>'));
3855+
}
3856+
}
3857+
}
3858+
}
3859+
3860+
if (empty($xmp)) {
3861+
return null;
3862+
}
3863+
3864+
$dom = new \DOMDocument();
3865+
if (!@$dom->loadXML($xmp)) {
3866+
return null;
3867+
}
3868+
3869+
$xpath = new \DOMXPath($dom);
3870+
$xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
3871+
$xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/');
3872+
$xpath->registerNamespace('Iptc4xmpCore', 'http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/');
3873+
$xpath->registerNamespace('Iptc4xmpExt', 'http://iptc.org/std/Iptc4xmpExt/2008-02-29/');
3874+
3875+
// Try Iptc4xmpCore:AltTextAccessibility (LangAlt rdf:Alt/rdf:li structure)
3876+
foreach ([
3877+
'//Iptc4xmpCore:AltTextAccessibility/rdf:Alt/rdf:li',
3878+
'//Iptc4xmpExt:AltTextAccessibility/rdf:Alt/rdf:li',
3879+
'//Iptc4xmpCore:AltTextAccessibility[not(rdf:Alt)]',
3880+
'//Iptc4xmpExt:AltTextAccessibility[not(rdf:Alt)]',
3881+
] as $query) {
3882+
$nodes = $xpath->query($query);
3883+
if ($nodes !== false) {
3884+
foreach ($nodes as $node) {
3885+
$value = trim($node->textContent);
3886+
if ($value !== '') {
3887+
return $value;
3888+
}
3889+
}
3890+
}
3891+
}
3892+
3893+
// Try dc:description as an rdf:Alt structure
3894+
$nodes = $xpath->query('//dc:description/rdf:Alt/rdf:li');
3895+
if ($nodes !== false) {
3896+
foreach ($nodes as $node) {
3897+
$value = trim($node->textContent);
3898+
if ($value !== '') {
3899+
return $value;
3900+
}
3901+
}
3902+
}
3903+
3904+
// Try dc:description as a plain string value
3905+
$nodes = $xpath->query('//dc:description[not(rdf:Alt)]');
3906+
if ($nodes !== false) {
3907+
foreach ($nodes as $node) {
3908+
$value = trim($node->textContent);
3909+
if ($value !== '') {
3910+
return $value;
3911+
}
3912+
}
3913+
}
3914+
} catch (Throwable) {
3915+
// Ignore errors and fall through to IPTC
3916+
}
3917+
3918+
return null;
3919+
}
3920+
3921+
/**
3922+
* Attempts to extract alt text from IPTC Caption/Abstract (Iptc.Application2.Caption, field 2:120).
3923+
*
3924+
* @param string $filePath
3925+
* @return string|null
3926+
*/
3927+
private function _getAltFromIptcMetadata(string $filePath): ?string
3928+
{
3929+
try {
3930+
$imageInfo = [];
3931+
@getimagesize($filePath, $imageInfo);
3932+
3933+
if (!isset($imageInfo['APP13'])) {
3934+
return null;
3935+
}
3936+
3937+
$iptc = iptcparse($imageInfo['APP13']);
3938+
3939+
if (!empty($iptc['2#120'])) {
3940+
$value = trim(implode(' ', $iptc['2#120']));
3941+
if ($value !== '') {
3942+
return $value;
3943+
}
3944+
}
3945+
} catch (Throwable) {
3946+
// Ignore errors
3947+
}
3948+
3949+
return null;
3950+
}
38103951
}

0 commit comments

Comments
 (0)