Skip to content

Commit c5a25ba

Browse files
committed
Ignore data: URIs
1 parent 1553159 commit c5a25ba

3 files changed

Lines changed: 19 additions & 2 deletions

File tree

CHANGELOG

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
Unreleased
2+
3+
* Ignore `data:` URIs.
4+
15
2.4.0 (2025-09-28)
26

37
* Add index to Link (David Venhoff, #202)

linkcheck/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def handle_starttag(self, tag, attrs):
3232
self.text += f' [image:{src[0]}] '
3333

3434
def handle_endtag(self, tag):
35-
if tag == 'a' and self.in_a:
35+
if tag == 'a' and self.in_a and not self.url.startswith("data:"):
3636
self.urls.append((self.text[:256], self.url))
3737
self.in_a = False
3838
self.text = ''
@@ -48,7 +48,7 @@ class ImageLister(Lister):
4848
def handle_starttag(self, tag, attrs):
4949
if tag == 'img':
5050
src = [v for k, v in attrs if k == 'src']
51-
if src:
51+
if src and not src[0].startswith("data:"):
5252
self.urls.append(('', src[0]))
5353

5454

linkcheck/tests/test_linkcheck.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,19 @@ def test_urls_exceeding_max_length(self):
943943
)
944944
self.assertEqual(Url.objects.all().count(), 1)
945945

946+
def test_data_urls_ignored(self):
947+
self.assertEqual(Url.objects.all().count(), 0)
948+
Book.objects.create(
949+
title="My Title",
950+
description=(
951+
'This is a normal link: <a href="https://www.example.org">Example</a>, '
952+
'This is a data link: <a href="data:image/jpeg;base64,/9j/4AAQSkZJRgABAgAAZABkAAD">Example 2</a>, '
953+
'This is a data img: <img src="data:image/jpeg;base64,/9j/4AAQGffsbRR4AAZABkAAD">'
954+
)
955+
)
956+
# Only the normal link is extracted
957+
self.assertEqual(Url.objects.all().count(), 1)
958+
946959
def test_empty_url_field(self):
947960
"""
948961
Test that URLField empty content is excluded depending on ignore_empty list.

0 commit comments

Comments
 (0)