Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added zip_entry_noallocreadwithoffset #359

Merged
merged 5 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,27 @@ zip_stream_close(zip);
free(buf);
```

* Extract a partial zip entry

```c
unsigned char buf[16];
size_t bufsize = sizeof(buf);

struct zip_t *zip = zip_open("foo.zip", 0, 'r');
{
zip_entry_open(zip, "foo-1.txt");
{
size_t offset = 4;
ssize_t nread = zip_entry_noallocread_offset(zip, offset, bufsize, (void *)buf);
}

zip_entry_close(zip);
}
zip_close(zip);

free(buf);
```

* List of all zip entries

```c
Expand Down
71 changes: 70 additions & 1 deletion src/zip.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ struct zip_entry_mark_t {
size_t lf_length;
};

static const char *const zip_errlist[33] = {
static const char *const zip_errlist[35] = {
NULL,
"not initialized\0",
"invalid entry name\0",
Expand Down Expand Up @@ -148,6 +148,8 @@ static const char *const zip_errlist[33] = {
"cannot initialize reader\0",
"cannot initialize writer\0",
"cannot initialize writer from reader\0",
"invalid argument\0",
"cannot initialize reader iterator\0",
};

const char *zip_strerror(int errnum) {
Expand Down Expand Up @@ -1654,6 +1656,73 @@ ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf, size_t bufsize) {
return (ssize_t)zip->entry.uncomp_size;
}

ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
size_t offset, size_t size, void *buf) {
mz_zip_archive *pzip = NULL;

if (!zip) {
// zip_t handler is not initialized
return (ssize_t)ZIP_ENOINIT;
}

if (offset > (size_t)zip->entry.uncomp_size) {
return (ssize_t)ZIP_EINVAL;
kuba-- marked this conversation as resolved.
Show resolved Hide resolved
}

if ((offset+size) > (size_t)zip->entry.uncomp_size) {
size = (ssize_t)zip->entry.uncomp_size - offset;
}

pzip = &(zip->archive);
if (pzip->m_zip_mode != MZ_ZIP_MODE_READING ||
zip->entry.index < (ssize_t)0) {
// the entry is not found or we do not have read access
return (ssize_t)ZIP_ENOENT;
}

mz_zip_reader_extract_iter_state* iter =
mz_zip_reader_extract_iter_new(pzip, (mz_uint)zip->entry.index, 0);
kuba-- marked this conversation as resolved.
Show resolved Hide resolved
if (!iter) {
return (ssize_t)ZIP_ENORITER;
}

mz_uint8 tmpbuf[ZIP_DEFAULT_ITER_BUF_SIZE];
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a define for users to control their stack usage.

size_t tmpbuf_size = sizeof(tmpbuf);
size_t file_offset = 0;
size_t write_cursor = 0;
size_t to_read = size;

// iterate until the requested offset is in range
while (file_offset < zip->entry.uncomp_size && to_read > 0)
{
size_t nread = mz_zip_reader_extract_iter_read(iter, tmpbuf, tmpbuf_size);

if (nread == 0)
break;

if (offset < (file_offset+nread)) {
size_t read_cursor = offset - file_offset;
MZ_ASSERT(read_cursor < tmpbuf_size);
size_t read_size = nread - read_cursor;

if (to_read < read_size)
read_size = to_read;
MZ_ASSERT(read_size <= tmpbuf_size);

memcpy(&((mz_uint8*)buf)[write_cursor], &tmpbuf[read_cursor], read_size);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need this tmpbuf, and memcpy here?
mz_zip_reader_extract_iter_read already calls memcpy. Maybe we can optimize it a litle bit, if we can get rid of tmpbuf.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, the question is, where should mz_zip_reader_extract_iter_read() copy to?

Perhaps you meant that the caller also provides the tempbuffer?
Something like:

ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
                                    size_t offset, size_t size, void *buf,
                                    size_t tmpbufsize, void *tmpbuf)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get what you mean now.
I think it should be possible.
It will likely still need a memmove, to put any "unaligned" bytes from the middle of the buffer, into the start of the buffer.


write_cursor += read_size;
offset += read_size;
to_read -= read_size;
}

file_offset += nread;
}

mz_zip_reader_extract_iter_free(iter);
return (ssize_t)write_cursor;
}

int zip_entry_fread(struct zip_t *zip, const char *filename) {
mz_zip_archive *pzip = NULL;
mz_uint idx;
Expand Down
25 changes: 25 additions & 0 deletions src/zip.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ typedef long ssize_t; /* byte count or error */
*/
#define ZIP_DEFAULT_COMPRESSION_LEVEL 6

/**
* Default zip iterator stack size (in bytes)
*/
#define ZIP_DEFAULT_ITER_BUF_SIZE 32*1024
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's a good default size?


/**
* Error codes
*/
Expand Down Expand Up @@ -96,6 +101,8 @@ typedef long ssize_t; /* byte count or error */
#define ZIP_ERINIT -30 // cannot initialize reader
#define ZIP_EWINIT -31 // cannot initialize writer
#define ZIP_EWRINIT -32 // cannot initialize writer from reader
#define ZIP_EINVAL -33 // invalid argument
#define ZIP_ENORITER -34 // cannot initialize reader iterator

/**
* Looks up the error message string corresponding to an error number.
Expand Down Expand Up @@ -373,6 +380,24 @@ extern ZIP_EXPORT ssize_t zip_entry_read(struct zip_t *zip, void **buf,
extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t *zip, void *buf,
size_t bufsize);

/**
* Extracts the part of the current zip entry into a memory buffer using no memory
* allocation for the buffer.
*
* @param zip zip archive handler.
* @param offset the offset of the entry (in bytes).
* @param size requested number of bytes (in bytes).
* @param buf preallocated output buffer.
*
* @note the iterator api uses an allocation to create its state
* @note each call will iterate from the start of the entry
*
* @return the return code - the number of bytes actually read on success.
* Otherwise a negative number (< 0) on error (e.g. offset is too large).
*/
extern ZIP_EXPORT ssize_t zip_entry_noallocread_offset(struct zip_t *zip,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe to follow similar API, we can name it zip_entry_noallocreadwithoffset. I know it's not the prettiest name, but at least it's consistent with other API ...witherror, ...withindex, etc.

size_t offset, size_t size, void *buf);

/**
* Extracts the current zip entry into output file.
*
Expand Down
38 changes: 38 additions & 0 deletions test/test_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,49 @@ MU_TEST(test_noallocread) {
zip_close(zip);
}


MU_TEST(test_noallocread_offset) {
size_t expected_size = strlen(TESTDATA2);
char *expected_data = calloc(expected_size, sizeof(char));

struct zip_t *zip = zip_open(ZIPNAME, 0, 'r');
mu_check(zip != NULL);
mu_assert_int_eq(1, zip_is64(zip));

mu_assert_int_eq(0, zip_entry_open(zip, "test/test-2.txt"));
zip_entry_noallocread(zip, (void *)expected_data, expected_size);

// Read the file in different chunk sizes
for (size_t i = 1; i <= expected_size; ++i) {
size_t buflen = i;
char *tmpbuf = calloc(buflen, sizeof(char));

size_t offset = 0;
while (offset < expected_size) {

ssize_t nread = zip_entry_noallocread_offset(zip, offset, buflen, tmpbuf);

mu_assert(nread <= buflen, "too many bytes read");
mu_assert(0u != nread, "no bytes read");

// check the data
for (ssize_t j = 0; j < nread; ++j) {
mu_assert_int_eq(expected_data[offset + j], tmpbuf[j]);
}

offset += nread;
}
}

zip_close(zip);
}

MU_TEST_SUITE(test_read_suite) {
MU_SUITE_CONFIGURE(&test_setup, &test_teardown);

MU_RUN_TEST(test_read);
MU_RUN_TEST(test_noallocread);
MU_RUN_TEST(test_noallocread_offset);
}

#define UNUSED(x) (void)x
Expand Down