Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow writing of entry when full uncompressed size is not known. #276

Open
wants to merge 4 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions Sources/ZIPFoundation/Archive+Helpers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -44,33 +44,38 @@ extension Archive {

// MARK: - Writing

func writeEntry(uncompressedSize: Int64, type: Entry.EntryType,
func writeEntry(uncompressedSize: Int64? = nil, type: Entry.EntryType,
compressionMethod: CompressionMethod, bufferSize: Int, progress: Progress? = nil,
provider: Provider) throws -> (sizeWritten: Int64, crc32: CRC32) {
provider: Provider) throws -> (totalRead: Int64, sizeWritten: Int64, crc32: CRC32) {
var checksum = CRC32(0)
var sizeWritten = Int64(0)
var totalRead = Int64(0)
switch type {
case .file:
switch compressionMethod {
case .none:
(sizeWritten, checksum) = try self.writeUncompressed(size: uncompressedSize,
bufferSize: bufferSize,
progress: progress, provider: provider)
totalRead = sizeWritten
case .deflate:
(sizeWritten, checksum) = try self.writeCompressed(size: uncompressedSize,
(totalRead, sizeWritten, checksum) = try self.writeCompressed(size: uncompressedSize,
bufferSize: bufferSize,
progress: progress, provider: provider)
}
case .directory:
_ = try provider(0, 0)
if let progress = progress { progress.completedUnitCount = progress.totalUnitCount }
case .symlink:
guard let uncompressedSize else {
throw ArchiveError.missingEntrySize
}
let (linkSizeWritten, linkChecksum) = try self.writeSymbolicLink(size: Int(uncompressedSize),
provider: provider)
(sizeWritten, checksum) = (Int64(linkSizeWritten), linkChecksum)
if let progress = progress { progress.completedUnitCount = progress.totalUnitCount }
}
return (sizeWritten, checksum)
return (totalRead, sizeWritten, checksum)
}

func writeLocalFileHeader(path: String, compressionMethod: CompressionMethod,
Expand Down Expand Up @@ -206,35 +211,45 @@ extension Archive {
return (record, zip64EOCD)
}

func writeUncompressed(size: Int64, bufferSize: Int, progress: Progress? = nil,
func writeUncompressed(size: Int64?, bufferSize: Int, progress: Progress? = nil,
provider: Provider) throws -> (sizeWritten: Int64, checksum: CRC32) {
var position: Int64 = 0
var sizeWritten: Int64 = 0
var checksum = CRC32(0)
while position < size {
var entryChunk: Data!
var atEnd = false
while !atEnd {
if progress?.isCancelled == true { throw ArchiveError.cancelledOperation }
let readSize = (size - position) >= bufferSize ? bufferSize : Int(size - position)
let entryChunk = try provider(position, readSize)
let remaining = size != nil ? (size! - position) : Int64(bufferSize)
let readSize = remaining >= bufferSize ? Int64(bufferSize) : remaining
entryChunk = try provider(position, Int(readSize))
checksum = entryChunk.crc32(checksum: checksum)
sizeWritten += Int64(try Data.write(chunk: entryChunk, to: self.archiveFile))
position += Int64(bufferSize)
progress?.completedUnitCount = sizeWritten

if let size, position >= size {
atEnd = true
}
if size == nil && entryChunk.count == 0 {
atEnd = true
}
}
return (sizeWritten, checksum)
}

func writeCompressed(size: Int64, bufferSize: Int, progress: Progress? = nil,
provider: Provider) throws -> (sizeWritten: Int64, checksum: CRC32) {
func writeCompressed(size: Int64? = nil, bufferSize: Int, progress: Progress? = nil,
provider: Provider) throws -> (totalRead: Int64, sizeWritten: Int64, checksum: CRC32) {
var sizeWritten: Int64 = 0
let consumer: Consumer = { data in sizeWritten += Int64(try Data.write(chunk: data, to: self.archiveFile)) }
let checksum = try Data.compress(size: size, bufferSize: bufferSize,
let (totalRead, checksum) = try Data.compress(size: size, bufferSize: bufferSize,
provider: { (position, size) -> Data in
if progress?.isCancelled == true { throw ArchiveError.cancelledOperation }
let data = try provider(position, size)
progress?.completedUnitCount += Int64(data.count)
return data
}, consumer: consumer)
return(sizeWritten, checksum)
return(totalRead, sizeWritten, checksum)
}

func writeSymbolicLink(size: Int, provider: Provider) throws -> (sizeWritten: Int, checksum: CRC32) {
Expand Down
12 changes: 7 additions & 5 deletions Sources/ZIPFoundation/Archive+Writing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,16 @@ extension Archive {
/// - progress: A progress object that can be used to track or cancel the add operation.
/// - provider: A closure that accepts a position and a chunk size. Returns a `Data` chunk.
/// - Throws: An error if the source data is invalid or the receiver is not writable.
public func addEntry(with path: String, type: Entry.EntryType, uncompressedSize: Int64,
public func addEntry(with path: String, type: Entry.EntryType, uncompressedSize: Int64? = nil,
modificationDate: Date = Date(), permissions: UInt16? = nil,
compressionMethod: CompressionMethod = .none, bufferSize: Int = defaultWriteChunkSize,
progress: Progress? = nil, provider: Provider) throws {
guard self.accessMode != .read else { throw ArchiveError.unwritableArchive }
// Directories and symlinks cannot be compressed
let compressionMethod = type == .file ? compressionMethod : .none
progress?.totalUnitCount = type == .directory ? defaultDirectoryUnitCount : uncompressedSize
if let uncompressedSize {
progress?.totalUnitCount = type == .directory ? defaultDirectoryUnitCount : uncompressedSize
}
let (eocdRecord, zip64EOCD) = (self.endOfCentralDirectoryRecord, self.zip64EndOfCentralDirectory)
guard self.offsetToStartOfCentralDirectory <= .max else { throw ArchiveError.invalidCentralDirectoryOffset }
var startOfCD = Int64(self.offsetToStartOfCentralDirectory)
Expand All @@ -134,17 +136,17 @@ extension Archive {
do {
// Local File Header
var localFileHeader = try self.writeLocalFileHeader(path: path, compressionMethod: compressionMethod,
size: (UInt64(uncompressedSize), 0), checksum: 0,
size: (UInt64(uncompressedSize ?? 0), 0), checksum: 0,
modificationDateTime: modDateTime)
// File Data
let (written, checksum) = try self.writeEntry(uncompressedSize: uncompressedSize, type: type,
let (totalRead, written, checksum) = try self.writeEntry(uncompressedSize: uncompressedSize, type: type,
compressionMethod: compressionMethod, bufferSize: bufferSize,
progress: progress, provider: provider)
startOfCD = Int64(ftello(self.archiveFile))
// Write the local file header a second time. Now with compressedSize (if applicable) and a valid checksum.
fseeko(self.archiveFile, off_t(fileHeaderStart), SEEK_SET)
localFileHeader = try self.writeLocalFileHeader(path: path, compressionMethod: compressionMethod,
size: (UInt64(uncompressedSize), UInt64(written)),
size: (UInt64(uncompressedSize ?? totalRead), UInt64(written)),
checksum: checksum, modificationDateTime: modDateTime)
// Central Directory
fseeko(self.archiveFile, off_t(startOfCD), SEEK_SET)
Expand Down
2 changes: 2 additions & 0 deletions Sources/ZIPFoundation/Archive.swift
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ public final class Archive: Sequence {
case invalidBufferSize
/// Thrown when uncompressedSize/compressedSize exceeds `Int64.max` (Imposed by file API).
case invalidEntrySize
/// Thrown when uncompressed size for a symbolic link is passed as nil
case missingEntrySize
/// Thrown when the offset of local header data exceeds `Int64.max` (Imposed by file API).
case invalidLocalHeaderDataOffset
/// Thrown when the size of local header exceeds `Int64.max` (Imposed by file API).
Expand Down
42 changes: 25 additions & 17 deletions Sources/ZIPFoundation/Data+Compression.swift
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ extension Data {

/// Compress the output of `provider` and pass it to `consumer`.
/// - Parameters:
/// - size: The uncompressed size of the data to be compressed.
/// - size: The uncompressed size of the data to be compressed, or nil if not known, in which case provider will be called until it returns an empty Data chunk.
/// - bufferSize: The maximum size of the compression buffer.
/// - provider: A closure that accepts a position and a chunk size. Returns a `Data` chunk.
/// - consumer: A closure that processes the result of the compress operation.
/// - Returns: The checksum of the processed content.
public static func compress(size: Int64, bufferSize: Int, provider: Provider, consumer: Consumer) throws -> CRC32 {
/// - Returns: The total size of uncompressed data consumed, and checksum of the processed content.
public static func compress(size: Int64?, bufferSize: Int, provider: Provider, consumer: Consumer) throws -> (Int64, CRC32) {
#if os(macOS) || os(iOS) || os(tvOS) || os(visionOS) || os(watchOS)
return try self.process(operation: COMPRESSION_STREAM_ENCODE, size: size, bufferSize: bufferSize,
provider: provider, consumer: consumer)
Expand All @@ -85,8 +85,9 @@ extension Data {
public static func decompress(size: Int64, bufferSize: Int, skipCRC32: Bool,
provider: Provider, consumer: Consumer) throws -> CRC32 {
#if os(macOS) || os(iOS) || os(tvOS) || os(visionOS) || os(watchOS)
return try self.process(operation: COMPRESSION_STREAM_DECODE, size: size, bufferSize: bufferSize,
let (_, crc) = try self.process(operation: COMPRESSION_STREAM_DECODE, size: size, bufferSize: bufferSize,
skipCRC32: skipCRC32, provider: provider, consumer: consumer)
return crc
#else
return try self.decode(bufferSize: bufferSize, skipCRC32: skipCRC32, provider: provider, consumer: consumer)
#endif
Expand All @@ -100,8 +101,8 @@ import Compression

extension Data {

static func process(operation: compression_stream_operation, size: Int64, bufferSize: Int, skipCRC32: Bool = false,
provider: Provider, consumer: Consumer) throws -> CRC32 {
static func process(operation: compression_stream_operation, size: Int64? = nil, bufferSize: Int, skipCRC32: Bool = false,
provider: Provider, consumer: Consumer) throws -> (Int64, CRC32) {
var crc32 = CRC32(0)
let destPointer = UnsafeMutablePointer<UInt8>.allocate(capacity: bufferSize)
defer { destPointer.deallocate() }
Expand All @@ -119,17 +120,19 @@ extension Data {
repeat {
let isExhausted = stream.src_size == 0
if isExhausted {
do {
sourceData = try provider(position, Int(Swift.min((size - position), Int64(bufferSize))))
position += Int64(stream.prepare(for: sourceData))
} catch { throw error }
let remaining = size != nil ? Int64(size! - position) : Int64(bufferSize)
sourceData = try provider(position, Int(Swift.min(remaining, Int64(bufferSize))))
position += Int64(stream.prepare(for: sourceData))
}
if let sourceData = sourceData {
if let sourceData {
sourceData.withUnsafeBytes { rawBufferPointer in
if let baseAddress = rawBufferPointer.baseAddress {
let pointer = baseAddress.assumingMemoryBound(to: UInt8.self)
stream.src_ptr = pointer.advanced(by: sourceData.count - stream.src_size)
let flags = sourceData.count < bufferSize ? Int32(COMPRESSION_STREAM_FINALIZE.rawValue) : 0
var flags: Int32 = 0
if (size == nil && sourceData.count == 0) || (size != nil && sourceData.count < bufferSize) {
flags = Int32(COMPRESSION_STREAM_FINALIZE.rawValue)
}
status = compression_stream_process(&stream, flags)
}
}
Expand All @@ -146,7 +149,7 @@ extension Data {
default: throw CompressionError.corruptedData
}
} while status == COMPRESSION_STATUS_OK
return crc32
return (position, crc32)
}
}

Expand All @@ -166,7 +169,7 @@ private extension compression_stream {
import CZlib

extension Data {
static func encode(size: Int64, bufferSize: Int, provider: Provider, consumer: Consumer) throws -> CRC32 {
static func encode(size: Int64?, bufferSize: Int, provider: Provider, consumer: Consumer) throws -> CRC32 {
var stream = z_stream()
let streamSize = Int32(MemoryLayout<z_stream>.size)
var result = deflateInit2_(&stream, Z_DEFAULT_COMPRESSION,
Expand All @@ -177,15 +180,20 @@ extension Data {
var position: Int64 = 0
var zipCRC32 = CRC32(0)
repeat {
let readSize = Int(Swift.min((size - position), Int64(bufferSize)))
let remaining = size != nil ? Int64(size! - position) : Int64(bufferSize)
let readSize = Int(Swift.min(remaining, Int64(bufferSize)))
var inputChunk = try provider(position, readSize)
zipCRC32 = inputChunk.crc32(checksum: zipCRC32)
stream.avail_in = UInt32(inputChunk.count)
try inputChunk.withUnsafeMutableBytes { (rawBufferPointer) in
if let baseAddress = rawBufferPointer.baseAddress {
let pointer = baseAddress.assumingMemoryBound(to: UInt8.self)
stream.next_in = pointer
flush = position + Int64(bufferSize) >= size ? Z_FINISH : Z_NO_FLUSH
if let size {
flush = position + Int64(bufferSize) >= size ? Z_FINISH : Z_NO_FLUSH
} else {
flush = inputChunk.isEmpty ? Z_FINISH : Z_NO_FLUSH
}
} else if rawBufferPointer.count > 0 {
throw CompressionError.corruptedData
} else {
Expand All @@ -211,7 +219,7 @@ extension Data {
}
position += Int64(readSize)
} while flush != Z_FINISH
return zipCRC32
return (position, zipCRC32)
}

static func decode(bufferSize: Int, skipCRC32: Bool, provider: Provider, consumer: Consumer) throws -> CRC32 {
Expand Down
3 changes: 2 additions & 1 deletion Sources/ZIPFoundation/Data+CompressionDeprecated.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ public extension Data {
provider: (_ position: Int, _ size: Int) throws -> Data,
consumer: Consumer) throws -> CRC32 {
let newProvider: Provider = { try provider(Int($0), $1) }
return try self.compress(size: Int64(size), bufferSize: bufferSize, provider: newProvider, consumer: consumer)
let (_, crc) = try self.compress(size: Int64(size), bufferSize: bufferSize, provider: newProvider, consumer: consumer)
return crc
}

@available(*, deprecated, message: "Please use `Int64` for `size` and provider `position`.")
Expand Down
1 change: 1 addition & 0 deletions Tests/ZIPFoundationTests/ZIPFoundationTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ extension ZIPFoundationTests {
("testCreateArchiveAddEntryErrorConditions", testCreateArchiveAddEntryErrorConditions),
("testCreateArchiveAddZeroSizeUncompressedEntry", testCreateArchiveAddZeroSizeUncompressedEntry),
("testCreateArchiveAddZeroSizeCompressedEntry", testCreateArchiveAddZeroSizeCompressedEntry),
("testCreateArchiveAddLargeCompressedEntryWithUnknownLength", testCreateArchiveAddLargeCompressedEntryWithUnknownLength),
("testCreateArchiveAddLargeCompressedEntry", testCreateArchiveAddLargeCompressedEntry),
("testCreateArchiveAddLargeUncompressedEntry", testCreateArchiveAddLargeUncompressedEntry),
("testCreateArchiveAddSymbolicLink", testCreateArchiveAddSymbolicLink),
Expand Down
26 changes: 26 additions & 0 deletions Tests/ZIPFoundationTests/ZIPFoundationWritingTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,32 @@ extension ZIPFoundationTests {
XCTAssert(archive.checkIntegrity())
}

func testCreateArchiveAddLargeCompressedEntryWithUnknownLength() {
let archive = self.archive(for: #function, mode: .create)
let size = 1024*1024*20
let data = Data.makeRandomData(size: size)
let entryName = ProcessInfo.processInfo.globallyUniqueString
do {
try archive.addEntry(with: entryName, type: .file, uncompressedSize: nil,
compressionMethod: .deflate,
provider: { (position, bufferSize) -> Data in
let upperBound = Swift.min(size, Int(position) + bufferSize)
let range = Range(uncheckedBounds: (lower: Int(position), upper: upperBound))
return data.subdata(in: range)
})
} catch {
XCTFail("Failed to add large entry to compressed archive with error : \(error)")
}
guard let entry = archive[entryName] else {
XCTFail("Failed to add large entry to compressed archive")
return
}
let dataCRC32 = data.crc32(checksum: 0)
XCTAssert(entry.checksum == dataCRC32)
XCTAssert(archive.checkIntegrity())
}


func testRemoveUncompressedEntry() {
let archive = self.archive(for: #function, mode: .update)
guard let entryToRemove = archive["test/data.random"] else {
Expand Down