Skip to content

Commit 02daebc

Browse files
committed
Added lazy image byte loading and EagerlyLoadImageBytes option
1 parent b05f86a commit 02daebc

7 files changed

Lines changed: 80 additions & 29 deletions

File tree

src/UglyToad.PdfPig.Tests/TestPdfImage.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ public class TestPdfImage : IPdfImage
3838

3939
public Memory<byte> DecodedBytes { get; set; }
4040

41+
public bool HasLoadedBytes => !RawMemory.IsEmpty;
42+
4143
public IPdfImage? MaskImage { get; }
4244

4345
public bool TryGetBytesAsMemory(out Memory<byte> bytes)

src/UglyToad.PdfPig/Content/IPdfImage.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,23 @@ public interface IPdfImage: IBoundingBox
3838

3939
/// <summary>
4040
/// The encoded memory of the image with all filters still applied.
41+
/// Accesing this property may trigger loading of the image bytes from the PDF stream.
4142
/// </summary>
4243
Memory<byte> RawMemory { get; }
4344

4445
/// <summary>
4546
/// The encoded memory span of the image with all filters still applied.
47+
/// Accesing this property may trigger loading of the image bytes from the PDF stream.
4648
/// </summary>
4749
Span<byte> RawBytes { get; }
4850

51+
/// <summary>
52+
/// Whether the image byte data is available. Returns <see langword="false"/> when
53+
/// <see cref="ParsingOptions.EagerlyLoadImageBytes"/> is <see langword="false"/>.
54+
/// Image metadata (dimensions, color space, bounding box) is available regardless.
55+
/// </summary>
56+
bool HasLoadedBytes { get; }
57+
4958
/// <summary>
5059
/// The color rendering intent to be used when rendering the image.
5160
/// </summary>

src/UglyToad.PdfPig/Content/InlineImage.cs

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
public class InlineImage : IPdfImage
1818
{
1919
private readonly Lazy<Memory<byte>>? memoryFactory;
20+
private readonly Memory<byte> rawMemory;
2021

2122
/// <inheritdoc />
2223
public PdfRectangle BoundingBox { get; }
@@ -53,11 +54,14 @@ public class InlineImage : IPdfImage
5354
public bool Interpolate { get; }
5455

5556
/// <inheritdoc />
56-
public Memory<byte> RawMemory { get; }
57+
public Memory<byte> RawMemory => rawMemory;
5758

5859
/// <inheritdoc />
5960
public Span<byte> RawBytes => RawMemory.Span;
6061

62+
/// <inheritdoc />
63+
public bool HasLoadedBytes => !rawMemory.IsEmpty;
64+
6165
/// <inheritdoc />
6266
public ColorSpaceDetails ColorSpaceDetails { get; }
6367

@@ -92,32 +96,35 @@ internal InlineImage(PdfRectangle bounds,
9296
RenderingIntent = renderingIntent;
9397
Interpolate = interpolate;
9498
ImageDictionary = streamDictionary;
95-
RawMemory = rawMemory;
99+
this.rawMemory = rawMemory;
96100
ColorSpaceDetails = colorSpaceDetails;
97101

98-
var filters = filterProvider.GetNamedFilters(filterNames);
99-
100-
var supportsFilters = true;
101-
foreach (var filter in filters)
102+
if (!rawMemory.IsEmpty)
102103
{
103-
if (!filter.IsSupported)
104-
{
105-
supportsFilters = false;
106-
break;
107-
}
108-
}
104+
var filters = filterProvider.GetNamedFilters(filterNames);
109105

110-
memoryFactory = supportsFilters ? new Lazy<Memory<byte>>(() =>
111-
{
112-
var b = RawMemory;
113-
for (var i = 0; i < filters.Count; i++)
106+
var supportsFilters = true;
107+
foreach (var filter in filters)
114108
{
115-
var filter = filters[i];
116-
b = filter.Decode(b, streamDictionary, filterProvider, i);
109+
if (!filter.IsSupported)
110+
{
111+
supportsFilters = false;
112+
break;
113+
}
117114
}
118115

119-
return b;
120-
}) : null;
116+
memoryFactory = supportsFilters ? new Lazy<Memory<byte>>(() =>
117+
{
118+
var b = RawMemory;
119+
for (var i = 0; i < filters.Count; i++)
120+
{
121+
var filter = filters[i];
122+
b = filter.Decode(b, streamDictionary, filterProvider, i);
123+
}
124+
125+
return b;
126+
}) : null;
127+
}
121128

122129
MaskImage = softMaskImage;
123130
}

src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,16 @@ public virtual void ApplyXObject(NameToken xObjectName)
435435
}
436436
else if (subType.Equals(NameToken.Image))
437437
{
438+
var imageStream = xObjectStream;
439+
if (!ParsingOptions.EagerlyLoadImageBytes)
440+
{
441+
// Replace the stream data with empty bytes so we dont hold image data in memory.
442+
// The dictionary is kept for metadata (width, height, etc).
443+
imageStream = new StreamToken(xObjectStream.StreamDictionary, Memory<byte>.Empty);
444+
}
445+
438446
var contentRecord = new XObjectContentRecord(XObjectType.Image,
439-
xObjectStream,
447+
imageStream,
440448
matrix,
441449
state.RenderingIntent,
442450
state.ColorSpaceContext?.CurrentStrokingColorSpace ?? DeviceRgbColorSpaceDetails.Instance);
@@ -887,7 +895,7 @@ public virtual void EndInlineImage(Memory<byte> bytes)
887895
return;
888896
}
889897

890-
InlineImageBuilder.Bytes = bytes;
898+
InlineImageBuilder.Bytes = ParsingOptions.EagerlyLoadImageBytes ? bytes : Memory<byte>.Empty;
891899

892900
var image = InlineImageBuilder.CreateInlineImage(CurrentTransformationMatrix,
893901
FilterProvider,

src/UglyToad.PdfPig/ParsingOptions.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,15 @@ public sealed class ParsingOptions
6363
/// Filter provider to use while parsing the document. The <see cref="DefaultFilterProvider"/> will be used if set to <c>null</c>.
6464
/// </summary>
6565
public IFilterProvider? FilterProvider { get; set; } = null;
66+
67+
/// <summary>
68+
/// Whether to load image byte data when enumerating images on a page.
69+
/// When <see langword="true"/> (the default), image bytes are available via <see cref="Content.IPdfImage.RawMemory"/>
70+
/// and <see cref="Content.IPdfImage.TryGetBytesAsMemory(out System.Memory{byte})"/>.
71+
/// When <see langword="false"/>, image metadata (width, height, bounding box, color space, etc.)
72+
/// is still available but image byte data is not retained, reducing memory usage
73+
/// for documents with large or numerous images.
74+
/// </summary>
75+
public bool EagerlyLoadImageBytes { get; set; } = true;
6676
}
6777
}

src/UglyToad.PdfPig/XObjects/XObjectFactory.cs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,18 @@ public static XObjectImage ReadImage(XObjectContentRecord xObject,
109109
if (dictionary.TryGet(NameToken.BitsPerComponent, out NumericToken? bitsPerComponentToken))
110110
{
111111
bitsPerComponent = bitsPerComponentToken.Int;
112-
System.Diagnostics.Debug.Assert(bitsPerComponent == Jpeg2000Helper.GetBitsPerComponent(xObject.Stream.Data.Span));
112+
System.Diagnostics.Debug.Assert(xObject.Stream.Data.IsEmpty || bitsPerComponent == Jpeg2000Helper.GetBitsPerComponent(xObject.Stream.Data.Span));
113113
}
114-
else
114+
else if (!xObject.Stream.Data.IsEmpty)
115115
{
116116
bitsPerComponent = Jpeg2000Helper.GetBitsPerComponent(xObject.Stream.Data.Span);
117117
System.Diagnostics.Debug.Assert(new int[] { 1, 2, 4, 8, 16 }.Contains(bitsPerComponent));
118118
}
119+
else
120+
{
121+
// Image bytes not available, default to 8 bpc for JPX
122+
bitsPerComponent = 8;
123+
}
119124
}
120125
else
121126
{
@@ -148,8 +153,10 @@ public static XObjectImage ReadImage(XObjectContentRecord xObject,
148153
}
149154
}
150155

156+
var rawBytes = new Lazy<Memory<byte>>(() => xObject.Stream.Data);
151157
var streamToken = new StreamToken(dictionary, xObject.Stream.Data); // Needed as Resolve(pdfScanner) was called on the dictionary
152-
var decodedBytes = supportsFilters ? new Lazy<Memory<byte>>(() => streamToken.Decode(filterProvider, pdfScanner))
158+
var decodedBytes = supportsFilters && !xObject.Stream.Data.IsEmpty
159+
? new Lazy<Memory<byte>>(() => streamToken.Decode(filterProvider, pdfScanner))
153160
: null;
154161

155162
var decode = Array.Empty<double>();
@@ -193,7 +200,8 @@ public static XObjectImage ReadImage(XObjectContentRecord xObject,
193200
interpolate,
194201
decode,
195202
dictionary,
196-
xObject.Stream.Data,
203+
rawBytes,
204+
!xObject.Stream.Data.IsEmpty,
197205
decodedBytes,
198206
details,
199207
softMaskImage);

src/UglyToad.PdfPig/XObjects/XObjectImage.cs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
public class XObjectImage : IPdfImage
1818
{
1919
private readonly Lazy<Memory<byte>>? memoryFactory;
20+
private readonly Lazy<Memory<byte>> rawMemoryFactory;
21+
private readonly bool hasBytesAvailable;
2022

2123
/// <inheritdoc />
2224
public PdfRectangle BoundingBox { get; }
@@ -61,11 +63,14 @@ public class XObjectImage : IPdfImage
6163
public DictionaryToken ImageDictionary { get; }
6264

6365
/// <inheritdoc />
64-
public Memory<byte> RawMemory { get; }
66+
public Memory<byte> RawMemory => rawMemoryFactory.Value;
6567

6668
/// <inheritdoc />
6769
public Span<byte> RawBytes => RawMemory.Span;
6870

71+
/// <inheritdoc />
72+
public bool HasLoadedBytes => hasBytesAvailable;
73+
6974
/// <inheritdoc />
7075
public ColorSpaceDetails? ColorSpaceDetails { get; }
7176

@@ -85,7 +90,8 @@ internal XObjectImage(PdfRectangle bounds,
8590
bool interpolate,
8691
IReadOnlyList<double> decode,
8792
DictionaryToken imageDictionary,
88-
Memory<byte> rawMemory,
93+
Lazy<Memory<byte>> rawMemory,
94+
bool hasBytesAvailable,
8995
Lazy<Memory<byte>>? bytes,
9096
ColorSpaceDetails? colorSpaceDetails,
9197
IPdfImage? softMaskImage)
@@ -100,7 +106,8 @@ internal XObjectImage(PdfRectangle bounds,
100106
Interpolate = interpolate;
101107
Decode = decode;
102108
ImageDictionary = imageDictionary ?? throw new ArgumentNullException(nameof(imageDictionary));
103-
RawMemory = rawMemory;
109+
rawMemoryFactory = rawMemory ?? throw new ArgumentNullException(nameof(rawMemory));
110+
this.hasBytesAvailable = hasBytesAvailable;
104111
ColorSpaceDetails = colorSpaceDetails;
105112
memoryFactory = bytes;
106113
MaskImage = softMaskImage;

0 commit comments

Comments
 (0)