From 91577a7fddf4c034379c763f9b39202d782a87b3 Mon Sep 17 00:00:00 2001 From: Pete Schwamb Date: Sun, 7 May 2023 11:22:52 -0500 Subject: [PATCH 1/3] Add support for streams of unknown size --- Sources/ZIPFoundation/Archive+Helpers.swift | 38 +++++++++++++------ Sources/ZIPFoundation/Archive+Writing.swift | 12 +++--- Sources/ZIPFoundation/Archive.swift | 2 + Sources/ZIPFoundation/Data+Compression.swift | 37 ++++++++++-------- .../Data+CompressionDeprecated.swift | 3 +- .../ZIPFoundationTests.swift | 1 + .../ZIPFoundationWritingTests.swift | 26 +++++++++++++ 7 files changed, 85 insertions(+), 34 deletions(-) diff --git a/Sources/ZIPFoundation/Archive+Helpers.swift b/Sources/ZIPFoundation/Archive+Helpers.swift index 7a291e47..8a4b8d1b 100644 --- a/Sources/ZIPFoundation/Archive+Helpers.swift +++ b/Sources/ZIPFoundation/Archive+Helpers.swift @@ -44,11 +44,12 @@ extension Archive { // MARK: - Writing - func writeEntry(uncompressedSize: Int64, type: Entry.EntryType, + func writeEntry(uncompressedSize: Int64? = nil, type: Entry.EntryType, compressionMethod: CompressionMethod, bufferSize: Int, progress: Progress? = nil, - provider: Provider) throws -> (sizeWritten: Int64, crc32: CRC32) { + provider: Provider) throws -> (totalRead: Int64, sizeWritten: Int64, crc32: CRC32) { var checksum = CRC32(0) var sizeWritten = Int64(0) + var totalRead = Int64(0) switch type { case .file: switch compressionMethod { @@ -57,7 +58,7 @@ extension Archive { bufferSize: bufferSize, progress: progress, provider: provider) case .deflate: - (sizeWritten, checksum) = try self.writeCompressed(size: uncompressedSize, + (totalRead, sizeWritten, checksum) = try self.writeCompressed(size: uncompressedSize, bufferSize: bufferSize, progress: progress, provider: provider) } @@ -65,12 +66,15 @@ extension Archive { _ = try provider(0, 0) if let progress = progress { progress.completedUnitCount = progress.totalUnitCount } case .symlink: + guard let uncompressedSize else { + throw ArchiveError.missingEntrySize + } let (linkSizeWritten, linkChecksum) = try self.writeSymbolicLink(size: Int(uncompressedSize), provider: provider) (sizeWritten, checksum) = (Int64(linkSizeWritten), linkChecksum) if let progress = progress { progress.completedUnitCount = progress.totalUnitCount } } - return (sizeWritten, checksum) + return (totalRead, sizeWritten, checksum) } func writeLocalFileHeader(path: String, compressionMethod: CompressionMethod, @@ -206,35 +210,45 @@ extension Archive { return (record, zip64EOCD) } - func writeUncompressed(size: Int64, bufferSize: Int, progress: Progress? = nil, + func writeUncompressed(size: Int64?, bufferSize: Int, progress: Progress? = nil, provider: Provider) throws -> (sizeWritten: Int64, checksum: CRC32) { var position: Int64 = 0 var sizeWritten: Int64 = 0 var checksum = CRC32(0) - while position < size { + var entryChunk: Data! + var atEnd = false + while !atEnd { if progress?.isCancelled == true { throw ArchiveError.cancelledOperation } - let readSize = (size - position) >= bufferSize ? bufferSize : Int(size - position) - let entryChunk = try provider(position, readSize) + let remaining = size != nil ? (size! - position) : Int64(bufferSize) + let readSize = remaining >= bufferSize ? Int64(bufferSize) : remaining + entryChunk = try provider(position, Int(readSize)) checksum = entryChunk.crc32(checksum: checksum) sizeWritten += Int64(try Data.write(chunk: entryChunk, to: self.archiveFile)) position += Int64(bufferSize) progress?.completedUnitCount = sizeWritten + + if let size, position >= size { + atEnd = true + } + if size == nil && entryChunk.count < readSize { + atEnd = true + } } return (sizeWritten, checksum) } - func writeCompressed(size: Int64, bufferSize: Int, progress: Progress? = nil, - provider: Provider) throws -> (sizeWritten: Int64, checksum: CRC32) { + func writeCompressed(size: Int64? = nil, bufferSize: Int, progress: Progress? = nil, + provider: Provider) throws -> (totalRead: Int64, sizeWritten: Int64, checksum: CRC32) { var sizeWritten: Int64 = 0 let consumer: Consumer = { data in sizeWritten += Int64(try Data.write(chunk: data, to: self.archiveFile)) } - let checksum = try Data.compress(size: size, bufferSize: bufferSize, + let (totalRead, checksum) = try Data.compress(size: size, bufferSize: bufferSize, provider: { (position, size) -> Data in if progress?.isCancelled == true { throw ArchiveError.cancelledOperation } let data = try provider(position, size) progress?.completedUnitCount += Int64(data.count) return data }, consumer: consumer) - return(sizeWritten, checksum) + return(totalRead, sizeWritten, checksum) } func writeSymbolicLink(size: Int, provider: Provider) throws -> (sizeWritten: Int, checksum: CRC32) { diff --git a/Sources/ZIPFoundation/Archive+Writing.swift b/Sources/ZIPFoundation/Archive+Writing.swift index b9e5c708..cdc408e6 100644 --- a/Sources/ZIPFoundation/Archive+Writing.swift +++ b/Sources/ZIPFoundation/Archive+Writing.swift @@ -113,14 +113,16 @@ extension Archive { /// - progress: A progress object that can be used to track or cancel the add operation. /// - provider: A closure that accepts a position and a chunk size. Returns a `Data` chunk. /// - Throws: An error if the source data is invalid or the receiver is not writable. - public func addEntry(with path: String, type: Entry.EntryType, uncompressedSize: Int64, + public func addEntry(with path: String, type: Entry.EntryType, uncompressedSize: Int64? = nil, modificationDate: Date = Date(), permissions: UInt16? = nil, compressionMethod: CompressionMethod = .none, bufferSize: Int = defaultWriteChunkSize, progress: Progress? = nil, provider: Provider) throws { guard self.accessMode != .read else { throw ArchiveError.unwritableArchive } // Directories and symlinks cannot be compressed let compressionMethod = type == .file ? compressionMethod : .none - progress?.totalUnitCount = type == .directory ? defaultDirectoryUnitCount : uncompressedSize + if let uncompressedSize { + progress?.totalUnitCount = type == .directory ? defaultDirectoryUnitCount : uncompressedSize + } let (eocdRecord, zip64EOCD) = (self.endOfCentralDirectoryRecord, self.zip64EndOfCentralDirectory) guard self.offsetToStartOfCentralDirectory <= .max else { throw ArchiveError.invalidCentralDirectoryOffset } var startOfCD = Int64(self.offsetToStartOfCentralDirectory) @@ -134,17 +136,17 @@ extension Archive { do { // Local File Header var localFileHeader = try self.writeLocalFileHeader(path: path, compressionMethod: compressionMethod, - size: (UInt64(uncompressedSize), 0), checksum: 0, + size: (UInt64(uncompressedSize ?? 0), 0), checksum: 0, modificationDateTime: modDateTime) // File Data - let (written, checksum) = try self.writeEntry(uncompressedSize: uncompressedSize, type: type, + let (totalRead, written, checksum) = try self.writeEntry(uncompressedSize: uncompressedSize, type: type, compressionMethod: compressionMethod, bufferSize: bufferSize, progress: progress, provider: provider) startOfCD = Int64(ftello(self.archiveFile)) // Write the local file header a second time. Now with compressedSize (if applicable) and a valid checksum. fseeko(self.archiveFile, off_t(fileHeaderStart), SEEK_SET) localFileHeader = try self.writeLocalFileHeader(path: path, compressionMethod: compressionMethod, - size: (UInt64(uncompressedSize), UInt64(written)), + size: (UInt64(uncompressedSize ?? totalRead), UInt64(written)), checksum: checksum, modificationDateTime: modDateTime) // Central Directory fseeko(self.archiveFile, off_t(startOfCD), SEEK_SET) diff --git a/Sources/ZIPFoundation/Archive.swift b/Sources/ZIPFoundation/Archive.swift index 7a3b2dd8..cf788e04 100644 --- a/Sources/ZIPFoundation/Archive.swift +++ b/Sources/ZIPFoundation/Archive.swift @@ -77,6 +77,8 @@ public final class Archive: Sequence { case invalidBufferSize /// Thrown when uncompressedSize/compressedSize exceeds `Int64.max` (Imposed by file API). case invalidEntrySize + /// Thrown when uncompressed size for a symbolic link is passed as nil + case missingEntrySize /// Thrown when the offset of local header data exceeds `Int64.max` (Imposed by file API). case invalidLocalHeaderDataOffset /// Thrown when the size of local header exceeds `Int64.max` (Imposed by file API). diff --git a/Sources/ZIPFoundation/Data+Compression.swift b/Sources/ZIPFoundation/Data+Compression.swift index 70386203..32603955 100644 --- a/Sources/ZIPFoundation/Data+Compression.swift +++ b/Sources/ZIPFoundation/Data+Compression.swift @@ -60,12 +60,12 @@ extension Data { /// Compress the output of `provider` and pass it to `consumer`. /// - Parameters: - /// - size: The uncompressed size of the data to be compressed. + /// - size: The uncompressed size of the data to be compressed, or nil if not known, in which case provider will be called until it returns an empty Data chunk. /// - bufferSize: The maximum size of the compression buffer. /// - provider: A closure that accepts a position and a chunk size. Returns a `Data` chunk. /// - consumer: A closure that processes the result of the compress operation. - /// - Returns: The checksum of the processed content. - public static func compress(size: Int64, bufferSize: Int, provider: Provider, consumer: Consumer) throws -> CRC32 { + /// - Returns: The total size of uncompressed data consumed, and checksum of the processed content. + public static func compress(size: Int64?, bufferSize: Int, provider: Provider, consumer: Consumer) throws -> (Int64, CRC32) { #if os(macOS) || os(iOS) || os(watchOS) || os(tvOS) return try self.process(operation: COMPRESSION_STREAM_ENCODE, size: size, bufferSize: bufferSize, provider: provider, consumer: consumer) @@ -85,8 +85,9 @@ extension Data { public static func decompress(size: Int64, bufferSize: Int, skipCRC32: Bool, provider: Provider, consumer: Consumer) throws -> CRC32 { #if os(macOS) || os(iOS) || os(watchOS) || os(tvOS) - return try self.process(operation: COMPRESSION_STREAM_DECODE, size: size, bufferSize: bufferSize, + let (_, crc) = try self.process(operation: COMPRESSION_STREAM_DECODE, size: size, bufferSize: bufferSize, skipCRC32: skipCRC32, provider: provider, consumer: consumer) + return crc #else return try self.decode(bufferSize: bufferSize, skipCRC32: skipCRC32, provider: provider, consumer: consumer) #endif @@ -100,8 +101,8 @@ import Compression extension Data { - static func process(operation: compression_stream_operation, size: Int64, bufferSize: Int, skipCRC32: Bool = false, - provider: Provider, consumer: Consumer) throws -> CRC32 { + static func process(operation: compression_stream_operation, size: Int64? = nil, bufferSize: Int, skipCRC32: Bool = false, + provider: Provider, consumer: Consumer) throws -> (Int64, CRC32) { var crc32 = CRC32(0) let destPointer = UnsafeMutablePointer.allocate(capacity: bufferSize) defer { destPointer.deallocate() } @@ -119,12 +120,11 @@ extension Data { repeat { let isExhausted = stream.src_size == 0 if isExhausted { - do { - sourceData = try provider(position, Int(Swift.min((size - position), Int64(bufferSize)))) - position += Int64(stream.prepare(for: sourceData)) - } catch { throw error } + let remaining = size != nil ? Int64(size! - position) : Int64(bufferSize) + sourceData = try provider(position, Int(Swift.min(remaining, Int64(bufferSize)))) + position += Int64(stream.prepare(for: sourceData)) } - if let sourceData = sourceData { + if let sourceData { sourceData.withUnsafeBytes { rawBufferPointer in if let baseAddress = rawBufferPointer.baseAddress { let pointer = baseAddress.assumingMemoryBound(to: UInt8.self) @@ -146,7 +146,7 @@ extension Data { default: throw CompressionError.corruptedData } } while status == COMPRESSION_STATUS_OK - return crc32 + return (position, crc32) } } @@ -166,7 +166,7 @@ private extension compression_stream { import CZlib extension Data { - static func encode(size: Int64, bufferSize: Int, provider: Provider, consumer: Consumer) throws -> CRC32 { + static func encode(size: Int64?, bufferSize: Int, provider: Provider, consumer: Consumer) throws -> CRC32 { var stream = z_stream() let streamSize = Int32(MemoryLayout.size) var result = deflateInit2_(&stream, Z_DEFAULT_COMPRESSION, @@ -177,7 +177,8 @@ extension Data { var position: Int64 = 0 var zipCRC32 = CRC32(0) repeat { - let readSize = Int(Swift.min((size - position), Int64(bufferSize))) + let remaining = size != nil ? Int64(size! - position) : Int64(bufferSize) + let readSize = Int(Swift.min(remaining, Int64(bufferSize))) var inputChunk = try provider(position, readSize) zipCRC32 = inputChunk.crc32(checksum: zipCRC32) stream.avail_in = UInt32(inputChunk.count) @@ -185,7 +186,11 @@ extension Data { if let baseAddress = rawBufferPointer.baseAddress { let pointer = baseAddress.assumingMemoryBound(to: UInt8.self) stream.next_in = pointer - flush = position + Int64(bufferSize) >= size ? Z_FINISH : Z_NO_FLUSH + if let size { + flush = position + Int64(bufferSize) >= size ? Z_FINISH : Z_NO_FLUSH + } else { + flush = inputChunk.isEmpty ? Z_FINISH : Z_NO_FLUSH + } } else if rawBufferPointer.count > 0 { throw CompressionError.corruptedData } else { @@ -211,7 +216,7 @@ extension Data { } position += Int64(readSize) } while flush != Z_FINISH - return zipCRC32 + return (position, zipCRC32) } static func decode(bufferSize: Int, skipCRC32: Bool, provider: Provider, consumer: Consumer) throws -> CRC32 { diff --git a/Sources/ZIPFoundation/Data+CompressionDeprecated.swift b/Sources/ZIPFoundation/Data+CompressionDeprecated.swift index 90bad790..0ca47074 100644 --- a/Sources/ZIPFoundation/Data+CompressionDeprecated.swift +++ b/Sources/ZIPFoundation/Data+CompressionDeprecated.swift @@ -17,7 +17,8 @@ public extension Data { provider: (_ position: Int, _ size: Int) throws -> Data, consumer: Consumer) throws -> CRC32 { let newProvider: Provider = { try provider(Int($0), $1) } - return try self.compress(size: Int64(size), bufferSize: bufferSize, provider: newProvider, consumer: consumer) + let (_, crc) = try self.compress(size: Int64(size), bufferSize: bufferSize, provider: newProvider, consumer: consumer) + return crc } @available(*, deprecated, message: "Please use `Int64` for `size` and provider `position`.") diff --git a/Tests/ZIPFoundationTests/ZIPFoundationTests.swift b/Tests/ZIPFoundationTests/ZIPFoundationTests.swift index 8da9b775..f47d21ee 100644 --- a/Tests/ZIPFoundationTests/ZIPFoundationTests.swift +++ b/Tests/ZIPFoundationTests/ZIPFoundationTests.swift @@ -205,6 +205,7 @@ extension ZIPFoundationTests { ("testCreateArchiveAddEntryErrorConditions", testCreateArchiveAddEntryErrorConditions), ("testCreateArchiveAddZeroSizeUncompressedEntry", testCreateArchiveAddZeroSizeUncompressedEntry), ("testCreateArchiveAddZeroSizeCompressedEntry", testCreateArchiveAddZeroSizeCompressedEntry), + ("testCreateArchiveAddLargeCompressedEntryWithUnknownLength", testCreateArchiveAddLargeCompressedEntryWithUnknownLength), ("testCreateArchiveAddLargeCompressedEntry", testCreateArchiveAddLargeCompressedEntry), ("testCreateArchiveAddLargeUncompressedEntry", testCreateArchiveAddLargeUncompressedEntry), ("testCreateArchiveAddSymbolicLink", testCreateArchiveAddSymbolicLink), diff --git a/Tests/ZIPFoundationTests/ZIPFoundationWritingTests.swift b/Tests/ZIPFoundationTests/ZIPFoundationWritingTests.swift index 359151e9..c70b4e22 100755 --- a/Tests/ZIPFoundationTests/ZIPFoundationWritingTests.swift +++ b/Tests/ZIPFoundationTests/ZIPFoundationWritingTests.swift @@ -222,6 +222,32 @@ extension ZIPFoundationTests { XCTAssert(archive.checkIntegrity()) } + func testCreateArchiveAddLargeCompressedEntryWithUnknownLength() { + let archive = self.archive(for: #function, mode: .create) + let size = 1024*1024*20 + let data = Data.makeRandomData(size: size) + let entryName = ProcessInfo.processInfo.globallyUniqueString + do { + try archive.addEntry(with: entryName, type: .file, uncompressedSize: nil, + compressionMethod: .deflate, + provider: { (position, bufferSize) -> Data in + let upperBound = Swift.min(size, Int(position) + bufferSize) + let range = Range(uncheckedBounds: (lower: Int(position), upper: upperBound)) + return data.subdata(in: range) + }) + } catch { + XCTFail("Failed to add large entry to compressed archive with error : \(error)") + } + guard let entry = archive[entryName] else { + XCTFail("Failed to add large entry to compressed archive") + return + } + let dataCRC32 = data.crc32(checksum: 0) + XCTAssert(entry.checksum == dataCRC32) + XCTAssert(archive.checkIntegrity()) + } + + func testRemoveUncompressedEntry() { let archive = self.archive(for: #function, mode: .update) guard let entryToRemove = archive["test/data.random"] else { From 91700d5f888090e88589f7c8d97e78417793e78e Mon Sep 17 00:00:00 2001 From: Pete Schwamb Date: Mon, 8 May 2023 10:03:58 -0500 Subject: [PATCH 2/3] Fix uncompressed write --- Sources/ZIPFoundation/Archive+Helpers.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Sources/ZIPFoundation/Archive+Helpers.swift b/Sources/ZIPFoundation/Archive+Helpers.swift index 8a4b8d1b..620421fc 100644 --- a/Sources/ZIPFoundation/Archive+Helpers.swift +++ b/Sources/ZIPFoundation/Archive+Helpers.swift @@ -57,6 +57,7 @@ extension Archive { (sizeWritten, checksum) = try self.writeUncompressed(size: uncompressedSize, bufferSize: bufferSize, progress: progress, provider: provider) + totalRead = sizeWritten case .deflate: (totalRead, sizeWritten, checksum) = try self.writeCompressed(size: uncompressedSize, bufferSize: bufferSize, @@ -230,7 +231,7 @@ extension Archive { if let size, position >= size { atEnd = true } - if size == nil && entryChunk.count < readSize { + if size == nil && entryChunk.count == 0 { atEnd = true } } From c67b7509ec82ee2b4b0ab3f97742b94ed9692494 Mon Sep 17 00:00:00 2001 From: Pete Schwamb Date: Fri, 7 Jul 2023 18:05:12 -0500 Subject: [PATCH 3/3] Fix read callback when doing unknown length compression and returned size is < bufferSize --- Sources/ZIPFoundation/Data+Compression.swift | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Sources/ZIPFoundation/Data+Compression.swift b/Sources/ZIPFoundation/Data+Compression.swift index 32603955..fc610483 100644 --- a/Sources/ZIPFoundation/Data+Compression.swift +++ b/Sources/ZIPFoundation/Data+Compression.swift @@ -129,7 +129,10 @@ extension Data { if let baseAddress = rawBufferPointer.baseAddress { let pointer = baseAddress.assumingMemoryBound(to: UInt8.self) stream.src_ptr = pointer.advanced(by: sourceData.count - stream.src_size) - let flags = sourceData.count < bufferSize ? Int32(COMPRESSION_STREAM_FINALIZE.rawValue) : 0 + var flags: Int32 = 0 + if (size == nil && sourceData.count == 0) || (size != nil && sourceData.count < bufferSize) { + flags = Int32(COMPRESSION_STREAM_FINALIZE.rawValue) + } status = compression_stream_process(&stream, flags) } }