|
77 | 77 | use DateTime; |
78 | 78 | use GraphQL\Type\Definition\Type; |
79 | 79 | use Illuminate\Support\Collection; |
| 80 | +use Imagick; |
| 81 | +use Throwable; |
80 | 82 | use Twig\Markup; |
81 | 83 | use yii\base\Exception; |
82 | 84 | use yii\base\InvalidArgumentException; |
@@ -3313,6 +3315,15 @@ private function _setKind(): void |
3313 | 3315 | public function afterSave(bool $isNew): void |
3314 | 3316 | { |
3315 | 3317 | if (!$this->propagating) { |
| 3318 | + // Auto-populate alt text from IPTC/XMP metadata on upload, before any cleaning strips it |
| 3319 | + if ( |
| 3320 | + $this->alt === null && |
| 3321 | + isset($this->tempFilePath) && |
| 3322 | + in_array($this->getScenario(), [self::SCENARIO_CREATE, self::SCENARIO_REPLACE], true) |
| 3323 | + ) { |
| 3324 | + $this->alt = $this->_getAltFromXmpMetadata($this->tempFilePath) ?? $this->_getAltFromIptcMetadata($this->tempFilePath); |
| 3325 | + } |
| 3326 | + |
3316 | 3327 | // Are we uploading an image that needs to be sanitized? |
3317 | 3328 | if ( |
3318 | 3329 | isset($this->tempFilePath) && |
@@ -3807,4 +3818,134 @@ private function allowTransforms(): bool |
3807 | 3818 | default => true, |
3808 | 3819 | }; |
3809 | 3820 | } |
| 3821 | + |
| 3822 | + /** |
| 3823 | + * Attempts to extract alt text from XMP metadata embedded in an image file. |
| 3824 | + * Checks Iptc4xmpCore:AltTextAccessibility first, then dc:description. |
| 3825 | + * |
| 3826 | + * @param string $filePath |
| 3827 | + * @return string|null |
| 3828 | + */ |
| 3829 | + private function _getAltFromXmpMetadata(string $filePath): ?string |
| 3830 | + { |
| 3831 | + try { |
| 3832 | + $xmp = null; |
| 3833 | + |
| 3834 | + if (Craft::$app->getImages()->getIsImagick() && class_exists(Imagick::class)) { |
| 3835 | + $imagick = new Imagick($filePath); |
| 3836 | + $xmp = $imagick->getImageProfile('xmp') ?: null; |
| 3837 | + $imagick->clear(); |
| 3838 | + } |
| 3839 | + |
| 3840 | + if ($xmp === null) { |
| 3841 | + // Fall back to scanning the raw file for the XMP packet |
| 3842 | + $handle = fopen($filePath, 'rb'); |
| 3843 | + if ($handle === false) { |
| 3844 | + return null; |
| 3845 | + } |
| 3846 | + $chunk = fread($handle, 131072); // 128KB covers the XMP packet in most images |
| 3847 | + fclose($handle); |
| 3848 | + |
| 3849 | + if ($chunk !== false) { |
| 3850 | + $xmpStart = strpos($chunk, '<x:xmpmeta'); |
| 3851 | + if ($xmpStart !== false) { |
| 3852 | + $xmpEnd = strpos($chunk, '</x:xmpmeta>', $xmpStart); |
| 3853 | + if ($xmpEnd !== false) { |
| 3854 | + $xmp = substr($chunk, $xmpStart, $xmpEnd - $xmpStart + strlen('</x:xmpmeta>')); |
| 3855 | + } |
| 3856 | + } |
| 3857 | + } |
| 3858 | + } |
| 3859 | + |
| 3860 | + if (empty($xmp)) { |
| 3861 | + return null; |
| 3862 | + } |
| 3863 | + |
| 3864 | + $dom = new \DOMDocument(); |
| 3865 | + if (!@$dom->loadXML($xmp)) { |
| 3866 | + return null; |
| 3867 | + } |
| 3868 | + |
| 3869 | + $xpath = new \DOMXPath($dom); |
| 3870 | + $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
| 3871 | + $xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/'); |
| 3872 | + $xpath->registerNamespace('Iptc4xmpCore', 'http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/'); |
| 3873 | + $xpath->registerNamespace('Iptc4xmpExt', 'http://iptc.org/std/Iptc4xmpExt/2008-02-29/'); |
| 3874 | + |
| 3875 | + // Try Iptc4xmpCore:AltTextAccessibility (LangAlt rdf:Alt/rdf:li structure) |
| 3876 | + foreach ([ |
| 3877 | + '//Iptc4xmpCore:AltTextAccessibility/rdf:Alt/rdf:li', |
| 3878 | + '//Iptc4xmpExt:AltTextAccessibility/rdf:Alt/rdf:li', |
| 3879 | + '//Iptc4xmpCore:AltTextAccessibility[not(rdf:Alt)]', |
| 3880 | + '//Iptc4xmpExt:AltTextAccessibility[not(rdf:Alt)]', |
| 3881 | + ] as $query) { |
| 3882 | + $nodes = $xpath->query($query); |
| 3883 | + if ($nodes !== false) { |
| 3884 | + foreach ($nodes as $node) { |
| 3885 | + $value = trim($node->textContent); |
| 3886 | + if ($value !== '') { |
| 3887 | + return $value; |
| 3888 | + } |
| 3889 | + } |
| 3890 | + } |
| 3891 | + } |
| 3892 | + |
| 3893 | + // Try dc:description as an rdf:Alt structure |
| 3894 | + $nodes = $xpath->query('//dc:description/rdf:Alt/rdf:li'); |
| 3895 | + if ($nodes !== false) { |
| 3896 | + foreach ($nodes as $node) { |
| 3897 | + $value = trim($node->textContent); |
| 3898 | + if ($value !== '') { |
| 3899 | + return $value; |
| 3900 | + } |
| 3901 | + } |
| 3902 | + } |
| 3903 | + |
| 3904 | + // Try dc:description as a plain string value |
| 3905 | + $nodes = $xpath->query('//dc:description[not(rdf:Alt)]'); |
| 3906 | + if ($nodes !== false) { |
| 3907 | + foreach ($nodes as $node) { |
| 3908 | + $value = trim($node->textContent); |
| 3909 | + if ($value !== '') { |
| 3910 | + return $value; |
| 3911 | + } |
| 3912 | + } |
| 3913 | + } |
| 3914 | + } catch (Throwable) { |
| 3915 | + // Ignore errors and fall through to IPTC |
| 3916 | + } |
| 3917 | + |
| 3918 | + return null; |
| 3919 | + } |
| 3920 | + |
| 3921 | + /** |
| 3922 | + * Attempts to extract alt text from IPTC Caption/Abstract (Iptc.Application2.Caption, field 2:120). |
| 3923 | + * |
| 3924 | + * @param string $filePath |
| 3925 | + * @return string|null |
| 3926 | + */ |
| 3927 | + private function _getAltFromIptcMetadata(string $filePath): ?string |
| 3928 | + { |
| 3929 | + try { |
| 3930 | + $imageInfo = []; |
| 3931 | + @getimagesize($filePath, $imageInfo); |
| 3932 | + |
| 3933 | + if (!isset($imageInfo['APP13'])) { |
| 3934 | + return null; |
| 3935 | + } |
| 3936 | + |
| 3937 | + $iptc = iptcparse($imageInfo['APP13']); |
| 3938 | + |
| 3939 | + if (!empty($iptc['2#120'])) { |
| 3940 | + $value = trim(implode(' ', $iptc['2#120'])); |
| 3941 | + if ($value !== '') { |
| 3942 | + return $value; |
| 3943 | + } |
| 3944 | + } |
| 3945 | + } catch (Throwable) { |
| 3946 | + // Ignore errors |
| 3947 | + } |
| 3948 | + |
| 3949 | + return null; |
| 3950 | + } |
3810 | 3951 | } |
0 commit comments