|
14 | 14 | // limitations under the License. |
15 | 15 | //===----------------------------------------------------------------------===// |
16 | 16 |
|
| 17 | +import Compression |
17 | 18 | import ContainerizationError |
18 | 19 | import Crypto |
19 | 20 | import Foundation |
20 | 21 | import NIOCore |
| 22 | +import zlib |
21 | 23 |
|
22 | 24 | /// Provides a context to write data into a directory. |
23 | 25 | public class ContentWriter { |
@@ -134,4 +136,168 @@ public class ContentWriter { |
134 | 136 | let data = try self.encoder.encode(content) |
135 | 137 | return try self.write(data) |
136 | 138 | } |
| 139 | + |
| 140 | + /// Computes the SHA256 digest of the uncompressed content of a gzip file. |
| 141 | + /// |
| 142 | + /// Per the OCI Image Specification, a DiffID is the SHA256 digest of the |
| 143 | + /// uncompressed layer content. This method streams the compressed file in |
| 144 | + /// chunks, decompresses through Apple's Compression framework, and feeds |
| 145 | + /// each decompressed chunk into an incremental SHA256 hasher. Neither the |
| 146 | + /// full compressed nor the full decompressed data is held in memory. |
| 147 | + /// |
| 148 | + /// - Parameter url: The URL of the gzip-compressed file. |
| 149 | + /// - Returns: The SHA256 digest of the uncompressed content. |
| 150 | + public static func diffID(of url: URL) throws -> SHA256.Digest { |
| 151 | + let fileHandle = try FileHandle(forReadingFrom: url) |
| 152 | + defer { fileHandle.closeFile() } |
| 153 | + |
| 154 | + let headerReadSize = 512 |
| 155 | + guard let headerData = Self.readExactly(fileHandle: fileHandle, count: headerReadSize), |
| 156 | + !headerData.isEmpty |
| 157 | + else { |
| 158 | + throw ContainerizationError(.internalError, message: "invalid gzip file") |
| 159 | + } |
| 160 | + let headerSize = try Self.gzipHeaderSize(headerData) |
| 161 | + |
| 162 | + fileHandle.seekToEndOfFile() |
| 163 | + let fileSize = fileHandle.offsetInFile |
| 164 | + guard fileSize >= 8 else { |
| 165 | + throw ContainerizationError(.internalError, message: "gzip trailer mismatch") |
| 166 | + } |
| 167 | + fileHandle.seek(toFileOffset: fileSize - 8) |
| 168 | + guard let trailerData = Self.readExactly(fileHandle: fileHandle, count: 8), |
| 169 | + trailerData.count == 8 |
| 170 | + else { |
| 171 | + throw ContainerizationError(.internalError, message: "gzip trailer mismatch") |
| 172 | + } |
| 173 | + let expectedCRC = |
| 174 | + UInt32(trailerData[trailerData.startIndex]) |
| 175 | + | (UInt32(trailerData[trailerData.startIndex + 1]) << 8) |
| 176 | + | (UInt32(trailerData[trailerData.startIndex + 2]) << 16) |
| 177 | + | (UInt32(trailerData[trailerData.startIndex + 3]) << 24) |
| 178 | + let expectedSize = |
| 179 | + UInt32(trailerData[trailerData.startIndex + 4]) |
| 180 | + | (UInt32(trailerData[trailerData.startIndex + 5]) << 8) |
| 181 | + | (UInt32(trailerData[trailerData.startIndex + 6]) << 16) |
| 182 | + | (UInt32(trailerData[trailerData.startIndex + 7]) << 24) |
| 183 | + |
| 184 | + fileHandle.seek(toFileOffset: UInt64(headerSize)) |
| 185 | + var compressedBytesRemaining = Int(fileSize) - headerSize - 8 |
| 186 | + guard compressedBytesRemaining >= 0 else { |
| 187 | + throw ContainerizationError(.internalError, message: "invalid gzip file") |
| 188 | + } |
| 189 | + |
| 190 | + let chunkSize = 65_536 |
| 191 | + let sourceBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize) |
| 192 | + let destinationBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize) |
| 193 | + defer { |
| 194 | + sourceBuffer.deallocate() |
| 195 | + destinationBuffer.deallocate() |
| 196 | + } |
| 197 | + |
| 198 | + let stream = UnsafeMutablePointer<compression_stream>.allocate(capacity: 1) |
| 199 | + defer { stream.deallocate() } |
| 200 | + |
| 201 | + var status = compression_stream_init(stream, COMPRESSION_STREAM_DECODE, COMPRESSION_ZLIB) |
| 202 | + guard status != COMPRESSION_STATUS_ERROR else { |
| 203 | + throw ContainerizationError(.internalError, message: "gzip decompression failed") |
| 204 | + } |
| 205 | + defer { compression_stream_destroy(stream) } |
| 206 | + |
| 207 | + stream.pointee.src_ptr = UnsafePointer(sourceBuffer) |
| 208 | + stream.pointee.src_size = 0 |
| 209 | + stream.pointee.dst_ptr = destinationBuffer |
| 210 | + stream.pointee.dst_size = chunkSize |
| 211 | + |
| 212 | + var hasher = SHA256() |
| 213 | + var runningCRC: uLong = crc32(0, nil, 0) |
| 214 | + var totalDecompressedSize: UInt64 = 0 |
| 215 | + var inputExhausted = false |
| 216 | + |
| 217 | + while status != COMPRESSION_STATUS_END { |
| 218 | + if stream.pointee.src_size == 0 && !inputExhausted { |
| 219 | + let toRead = min(chunkSize, compressedBytesRemaining) |
| 220 | + if toRead > 0, |
| 221 | + let chunk = fileHandle.readData(ofLength: toRead) as Data?, |
| 222 | + !chunk.isEmpty |
| 223 | + { |
| 224 | + compressedBytesRemaining -= chunk.count |
| 225 | + chunk.copyBytes(to: sourceBuffer, count: chunk.count) |
| 226 | + stream.pointee.src_ptr = UnsafePointer(sourceBuffer) |
| 227 | + stream.pointee.src_size = chunk.count |
| 228 | + } else { |
| 229 | + inputExhausted = true |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + stream.pointee.dst_ptr = destinationBuffer |
| 234 | + stream.pointee.dst_size = chunkSize |
| 235 | + |
| 236 | + let flags: Int32 = inputExhausted ? Int32(COMPRESSION_STREAM_FINALIZE.rawValue) : 0 |
| 237 | + status = compression_stream_process(stream, flags) |
| 238 | + |
| 239 | + switch status { |
| 240 | + case COMPRESSION_STATUS_OK, COMPRESSION_STATUS_END: |
| 241 | + let produced = chunkSize - stream.pointee.dst_size |
| 242 | + if produced > 0 { |
| 243 | + let buf = UnsafeBufferPointer(start: destinationBuffer, count: produced) |
| 244 | + hasher.update(bufferPointer: UnsafeRawBufferPointer(buf)) |
| 245 | + runningCRC = crc32(runningCRC, destinationBuffer, uInt(produced)) |
| 246 | + totalDecompressedSize += UInt64(produced) |
| 247 | + } |
| 248 | + default: |
| 249 | + throw ContainerizationError(.internalError, message: "gzip decompression failed") |
| 250 | + } |
| 251 | + } |
| 252 | + |
| 253 | + let actualCRC = UInt32(truncatingIfNeeded: runningCRC) |
| 254 | + let actualSize = UInt32(truncatingIfNeeded: totalDecompressedSize) |
| 255 | + |
| 256 | + guard expectedCRC == actualCRC, expectedSize == actualSize else { |
| 257 | + throw ContainerizationError(.internalError, message: "gzip trailer mismatch") |
| 258 | + } |
| 259 | + |
| 260 | + return hasher.finalize() |
| 261 | + } |
| 262 | + |
| 263 | + private static func readExactly(fileHandle: FileHandle, count: Int) -> Data? { |
| 264 | + let data = fileHandle.readData(ofLength: count) |
| 265 | + return data.isEmpty ? nil : data |
| 266 | + } |
| 267 | + |
| 268 | + private static func gzipHeaderSize(_ data: Data) throws -> Int { |
| 269 | + guard data.count >= 10, |
| 270 | + data[data.startIndex] == 0x1f, |
| 271 | + data[data.startIndex + 1] == 0x8b, |
| 272 | + data[data.startIndex + 2] == 0x08 |
| 273 | + else { |
| 274 | + throw ContainerizationError(.internalError, message: "invalid gzip file") |
| 275 | + } |
| 276 | + |
| 277 | + let start = data.startIndex |
| 278 | + let flags = data[start + 3] |
| 279 | + var offset = 10 |
| 280 | + |
| 281 | + if flags & 0x04 != 0 { |
| 282 | + guard data.count >= offset + 2 else { |
| 283 | + throw ContainerizationError(.internalError, message: "invalid gzip file") |
| 284 | + } |
| 285 | + let extraLen = Int(data[start + offset]) | (Int(data[start + offset + 1]) << 8) |
| 286 | + offset += 2 + extraLen |
| 287 | + } |
| 288 | + if flags & 0x08 != 0 { |
| 289 | + while offset < data.count && data[start + offset] != 0 { offset += 1 } |
| 290 | + offset += 1 |
| 291 | + } |
| 292 | + if flags & 0x10 != 0 { |
| 293 | + while offset < data.count && data[start + offset] != 0 { offset += 1 } |
| 294 | + offset += 1 |
| 295 | + } |
| 296 | + if flags & 0x02 != 0 { offset += 2 } |
| 297 | + |
| 298 | + guard offset < data.count else { |
| 299 | + throw ContainerizationError(.internalError, message: "invalid gzip file") |
| 300 | + } |
| 301 | + return offset |
| 302 | + } |
137 | 303 | } |
0 commit comments