From 36eca8dd444b0e3e13fda9fc334f596c05f41b5a Mon Sep 17 00:00:00 2001
From: PikaCat <Pika-Cat@qq.com>
Date: Fri, 27 Sep 2024 14:54:58 +0800
Subject: [PATCH] Switch decompression algorithm to zstandard

1. Improved decompression speed leads to faster engine startups(around 4 times faster).
2. Remove compatibility of loading not compressed nnue files.
3. Use some GLOB operations to simplify makefile.
---
 src/Makefile                                  |    26 +-
 src/external/common/allocations.h             |    54 +
 src/external/common/bits.h                    |   227 +
 src/external/common/bitstream.h               |   475 +
 src/external/common/compiler.h                |   492 +
 src/external/common/cpu.h                     |   248 +
 src/external/common/debug.cpp                 |    30 +
 src/external/common/debug.h                   |   126 +
 src/external/common/entropy_common.cpp        |   437 +
 src/external/common/error_private.cpp         |    97 +
 src/external/common/error_private.h           |   183 +
 src/external/common/fse.h                     |   691 +
 src/external/common/fse_decompress.cpp        |   399 +
 src/external/common/huf.h                     |   383 +
 src/external/common/mem.h                     |   412 +
 src/external/common/pool.cpp                  |   407 +
 src/external/common/pool.h                    |    89 +
 src/external/common/portability_macros.h      |   153 +
 src/external/common/threading.cpp             |   184 +
 src/external/common/threading.h               |   152 +
 src/external/common/xxhash.cpp                |    18 +
 src/external/common/xxhash.h                  |  7206 +++++++++++
 src/external/common/zstd_common.cpp           |    47 +
 src/external/common/zstd_deps.h               |   123 +
 src/external/common/zstd_internal.h           |   399 +
 src/external/common/zstd_trace.h              |   157 +
 src/external/decompress/huf_decompress.cpp    |  2184 ++++
 .../decompress/huf_decompress_amd64.S         |   595 +
 src/external/decompress/zstd_ddict.cpp        |   257 +
 src/external/decompress/zstd_ddict.h          |    43 +
 src/external/decompress/zstd_decompress.cpp   |  2664 ++++
 .../decompress/zstd_decompress_block.cpp      |  2636 ++++
 .../decompress/zstd_decompress_block.h        |    79 +
 .../decompress/zstd_decompress_internal.h     |   258 +
 src/external/miniz.h                          | 10568 ----------------
 src/external/zip.cpp                          |  2305 ----
 src/external/zip.h                            |   583 -
 src/external/zstd.h                           |  3284 +++++
 src/external/zstd_errors.h                    |   119 +
 src/misc.cpp                                  |    42 +-
 src/misc.h                                    |     2 +-
 src/nnue/network.cpp                          |     8 +-
 42 files changed, 25347 insertions(+), 13495 deletions(-)
 create mode 100644 src/external/common/allocations.h
 create mode 100644 src/external/common/bits.h
 create mode 100644 src/external/common/bitstream.h
 create mode 100644 src/external/common/compiler.h
 create mode 100644 src/external/common/cpu.h
 create mode 100644 src/external/common/debug.cpp
 create mode 100644 src/external/common/debug.h
 create mode 100644 src/external/common/entropy_common.cpp
 create mode 100644 src/external/common/error_private.cpp
 create mode 100644 src/external/common/error_private.h
 create mode 100644 src/external/common/fse.h
 create mode 100644 src/external/common/fse_decompress.cpp
 create mode 100644 src/external/common/huf.h
 create mode 100644 src/external/common/mem.h
 create mode 100644 src/external/common/pool.cpp
 create mode 100644 src/external/common/pool.h
 create mode 100644 src/external/common/portability_macros.h
 create mode 100644 src/external/common/threading.cpp
 create mode 100644 src/external/common/threading.h
 create mode 100644 src/external/common/xxhash.cpp
 create mode 100644 src/external/common/xxhash.h
 create mode 100644 src/external/common/zstd_common.cpp
 create mode 100644 src/external/common/zstd_deps.h
 create mode 100644 src/external/common/zstd_internal.h
 create mode 100644 src/external/common/zstd_trace.h
 create mode 100644 src/external/decompress/huf_decompress.cpp
 create mode 100644 src/external/decompress/huf_decompress_amd64.S
 create mode 100644 src/external/decompress/zstd_ddict.cpp
 create mode 100644 src/external/decompress/zstd_ddict.h
 create mode 100644 src/external/decompress/zstd_decompress.cpp
 create mode 100644 src/external/decompress/zstd_decompress_block.cpp
 create mode 100644 src/external/decompress/zstd_decompress_block.h
 create mode 100644 src/external/decompress/zstd_decompress_internal.h
 delete mode 100644 src/external/miniz.h
 delete mode 100644 src/external/zip.cpp
 delete mode 100644 src/external/zip.h
 create mode 100644 src/external/zstd.h
 create mode 100644 src/external/zstd_errors.h

diff --git a/src/Makefile b/src/Makefile
index 197437e2..31a04935 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -52,24 +52,13 @@ BINDIR = $(PREFIX)/bin
 PGOBENCH = $(WINE_PATH) ./$(EXE) bench
 
 ### Source and object files
-SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
-	misc.cpp movegen.cpp movepick.cpp position.cpp \
-	search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp \
-	nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp memory.cpp \
-	external/zip.cpp
+SRCS = $(shell find . -name '*.cpp') $(shell find . -name '*.S')
 
-HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h magics.h \
-           nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \
-           nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h nnue/layers/simd.h \
-           nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \
-           nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \
-           search.h thread.h thread_win32_osx.h timeman.h \
-           tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h \
-           external/zip.h external/miniz.h
+HEADERS = $(shell find . -name '*.h')
 
-OBJS = $(notdir $(SRCS:.cpp=.o))
+OBJS = $(notdir $(patsubst %.cpp,%.o,$(patsubst %.S,%.o,$(SRCS))))
 
-VPATH = external:nnue:nnue/features
+VPATH = $(shell find . -type d | tr '\n' ':')
 
 ### ==========================================================================
 ### Section 2. High-level Configuration
@@ -930,12 +919,12 @@ clean: objclean profileclean
 
 # clean binaries and objects
 objclean:
-	@rm -f pikafish pikafish.exe *.o ./external/*.o ./nnue/*.o ./nnue/features/*.o
+	@rm -f pikafish pikafish.exe $(shell find . -name '*.o')
 
 # clean auxiliary profiling files
 profileclean:
 	@rm -rf profdir
-	@rm -f bench.txt *.gcda *.gcno ./external/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s PGOBENCH.out
+	@rm -f bench.txt $(shell find . -name '*.gcda' -o -name '*.gcno') *.s PGOBENCH.out
 	@rm -f pikafish.profdata *.profraw
 	@rm -f pikafish.*args*
 	@rm -f pikafish.*lt*
@@ -1021,6 +1010,9 @@ config-sanity: net
 $(EXE): $(OBJS)
 	+$(CXX) -o $@ $(OBJS) $(LDFLAGS)
 
+%.o: %.S
+	$(CXX) $(CXXFLAGS)   -c -o $@ $<
+
 # Force recompilation to ensure version info is up-to-date
 misc.o: FORCE
 FORCE:
diff --git a/src/external/common/allocations.h b/src/external/common/allocations.h
new file mode 100644
index 00000000..84724808
--- /dev/null
+++ b/src/external/common/allocations.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This file provides custom allocation primitives
+ */
+
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
+
+#include "compiler.h" /* MEM_STATIC */
+#define ZSTD_STATIC_LINKING_ONLY
+#include "../zstd.h" /* ZSTD_customMem */
+
+#ifndef ZSTD_ALLOCATIONS_H
+    #define ZSTD_ALLOCATIONS_H
+
+/* custom memory allocation functions */
+
+MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) {
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return ZSTD_malloc(size);
+}
+
+MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) {
+    if (customMem.customAlloc)
+    {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        ZSTD_memset(ptr, 0, size);
+        return ptr;
+    }
+    return ZSTD_calloc(1, size);
+}
+
+MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) {
+    if (ptr != NULL)
+    {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            ZSTD_free(ptr);
+    }
+}
+
+#endif /* ZSTD_ALLOCATIONS_H */
diff --git a/src/external/common/bits.h b/src/external/common/bits.h
new file mode 100644
index 00000000..602bafec
--- /dev/null
+++ b/src/external/common/bits.h
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_BITS_H
+#define ZSTD_BITS_H
+
+#include "mem.h"
+
+MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) {
+    assert(val != 0);
+    {
+        static const U32 DeBruijnBytePos[32] = {0,  1,  28, 2,  29, 14, 24, 3,  30, 22, 20,
+                                                15, 25, 17, 4,  8,  31, 27, 13, 23, 21, 19,
+                                                16, 7,  26, 12, 18, 6,  11, 5,  10, 9};
+        return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
+    }
+}
+
+MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) {
+    assert(val != 0);
+#if defined(_MSC_VER)
+    #if STATIC_BMI2 == 1
+    return (unsigned) _tzcnt_u32(val);
+    #else
+    if (val != 0)
+    {
+        unsigned long r;
+        _BitScanForward(&r, val);
+        return (unsigned) r;
+    }
+    else
+    {
+        /* Should not reach this code path */
+        __assume(0);
+    }
+    #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+    return (unsigned) __builtin_ctz(val);
+#elif defined(__ICCARM__)
+    return (unsigned) __builtin_ctz(val);
+#else
+    return ZSTD_countTrailingZeros32_fallback(val);
+#endif
+}
+
+MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
+    assert(val != 0);
+    {
+        static const U32 DeBruijnClz[32] = {0,  9,  1,  10, 13, 21, 2,  29, 11, 14, 16,
+                                            18, 22, 25, 3,  30, 8,  12, 20, 28, 15, 17,
+                                            24, 7,  19, 27, 23, 6,  26, 5,  4,  31};
+        val |= val >> 1;
+        val |= val >> 2;
+        val |= val >> 4;
+        val |= val >> 8;
+        val |= val >> 16;
+        return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
+    }
+}
+
+MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val) {
+    assert(val != 0);
+#if defined(_MSC_VER)
+    #if STATIC_BMI2 == 1
+    return (unsigned) _lzcnt_u32(val);
+    #else
+    if (val != 0)
+    {
+        unsigned long r;
+        _BitScanReverse(&r, val);
+        return (unsigned) (31 - r);
+    }
+    else
+    {
+        /* Should not reach this code path */
+        __assume(0);
+    }
+    #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+    return (unsigned) __builtin_clz(val);
+#elif defined(__ICCARM__)
+    return (unsigned) __builtin_clz(val);
+#else
+    return ZSTD_countLeadingZeros32_fallback(val);
+#endif
+}
+
+MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) {
+    assert(val != 0);
+#if defined(_MSC_VER) && defined(_WIN64)
+    #if STATIC_BMI2 == 1
+    return (unsigned) _tzcnt_u64(val);
+    #else
+    if (val != 0)
+    {
+        unsigned long r;
+        _BitScanForward64(&r, val);
+        return (unsigned) r;
+    }
+    else
+    {
+        /* Should not reach this code path */
+        __assume(0);
+    }
+    #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
+    return (unsigned) __builtin_ctzll(val);
+#elif defined(__ICCARM__)
+    return (unsigned) __builtin_ctzll(val);
+#else
+    {
+        U32 mostSignificantWord  = (U32) (val >> 32);
+        U32 leastSignificantWord = (U32) val;
+        if (leastSignificantWord == 0)
+        {
+            return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
+        }
+        else
+        {
+            return ZSTD_countTrailingZeros32(leastSignificantWord);
+        }
+    }
+#endif
+}
+
+MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val) {
+    assert(val != 0);
+#if defined(_MSC_VER) && defined(_WIN64)
+    #if STATIC_BMI2 == 1
+    return (unsigned) _lzcnt_u64(val);
+    #else
+    if (val != 0)
+    {
+        unsigned long r;
+        _BitScanReverse64(&r, val);
+        return (unsigned) (63 - r);
+    }
+    else
+    {
+        /* Should not reach this code path */
+        __assume(0);
+    }
+    #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+    return (unsigned) (__builtin_clzll(val));
+#elif defined(__ICCARM__)
+    return (unsigned) (__builtin_clzll(val));
+#else
+    {
+        U32 mostSignificantWord  = (U32) (val >> 32);
+        U32 leastSignificantWord = (U32) val;
+        if (mostSignificantWord == 0)
+        {
+            return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
+        }
+        else
+        {
+            return ZSTD_countLeadingZeros32(mostSignificantWord);
+        }
+    }
+#endif
+}
+
+MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) {
+    if (MEM_isLittleEndian())
+    {
+        if (MEM_64bits())
+        {
+            return ZSTD_countTrailingZeros64((U64) val) >> 3;
+        }
+        else
+        {
+            return ZSTD_countTrailingZeros32((U32) val) >> 3;
+        }
+    }
+    else
+    { /* Big Endian CPU */
+        if (MEM_64bits())
+        {
+            return ZSTD_countLeadingZeros64((U64) val) >> 3;
+        }
+        else
+        {
+            return ZSTD_countLeadingZeros32((U32) val) >> 3;
+        }
+    }
+}
+
+MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
+{
+    assert(val != 0);
+    return 31 - ZSTD_countLeadingZeros32(val);
+}
+
+/* ZSTD_rotateRight_*():
+ * Rotates a bitfield to the right by "count" bits.
+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
+ */
+MEM_STATIC
+U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
+    assert(count < 64);
+    count &= 0x3F; /* for fickle pattern recognition */
+    return (value >> count) | (U64) (value << ((0U - count) & 0x3F));
+}
+
+MEM_STATIC
+U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
+    assert(count < 32);
+    count &= 0x1F; /* for fickle pattern recognition */
+    return (value >> count) | (U32) (value << ((0U - count) & 0x1F));
+}
+
+MEM_STATIC
+U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
+    assert(count < 16);
+    count &= 0x0F; /* for fickle pattern recognition */
+    return (value >> count) | (U16) (value << ((0U - count) & 0x0F));
+}
+
+#endif /* ZSTD_BITS_H */
diff --git a/src/external/common/bitstream.h b/src/external/common/bitstream.h
new file mode 100644
index 00000000..ad313eb3
--- /dev/null
+++ b/src/external/common/bitstream.h
@@ -0,0 +1,475 @@
+/* ******************************************************************
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include "mem.h"           /* unaligned access routines */
+#include "compiler.h"      /* UNLIKELY() */
+#include "debug.h"         /* assert(), DEBUGLOG(), RAWLOG() */
+#include "error_private.h" /* error codes and messages */
+#include "bits.h"          /* ZSTD_highbit32 */
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+#ifndef ZSTD_NO_INTRINSICS
+    #if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
+        #include <immintrin.h> /* support for bextr (experimental)/bzhi */
+    #elif defined(__ICCARM__)
+        #include <intrinsics.h>
+    #endif
+#endif
+
+#define STREAM_ACCUMULATOR_MIN_32 25
+#define STREAM_ACCUMULATOR_MIN_64 57
+#define STREAM_ACCUMULATOR_MIN \
+    ((U32) (MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
+ */
+typedef struct {
+    size_t   bitContainer;
+    unsigned bitPos;
+    char*    startPtr;
+    char*    ptr;
+    char*    endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef size_t BitContainerType;
+typedef struct {
+    BitContainerType bitContainer;
+    unsigned         bitsConsumed;
+    const char*      ptr;
+    const char*      start;
+    const char*      limitPtr;
+} BIT_DStream_t;
+
+typedef enum {
+    BIT_DStream_unfinished  = 0, /* fully refilled */
+    BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
+    BIT_DStream_completed   = 2, /* bitstream entirely consumed, bit-exact */
+    BIT_DStream_overflow    = 3  /* user requested more bits than present in bitstream */
+} BIT_DStream_status;            /* result of BIT_reloadDStream() */
+
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned           BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = {
+  0,         1,         3,          7,          0xF,       0x1F,     0x3F,     0x7F,      0xFF,
+  0x1FF,     0x3FF,     0x7FF,      0xFFF,      0x1FFF,    0x3FFF,   0x7FFF,   0xFFFF,    0x1FFFF,
+  0x3FFFF,   0x7FFFF,   0xFFFFF,    0x1FFFFF,   0x3FFFFF,  0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF,
+  0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
+#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(size_t)
+ *  @return : 0 if success,
+ *            otherwise an error code (can be tested using ERR_isError()) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity) {
+    bitC->bitContainer = 0;
+    bitC->bitPos       = 0;
+    bitC->startPtr     = (char*) startPtr;
+    bitC->ptr          = bitC->startPtr;
+    bitC->endPtr       = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+    if (dstCapacity <= sizeof(bitC->bitContainer))
+        return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) {
+#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
+    return _bzhi_u64(bitContainer, nbBits);
+#else
+    assert(nbBits < BIT_MASK_SIZE);
+    return bitContainer & BIT_mask[nbBits];
+#endif
+}
+
+/*! BIT_addBits() :
+ *  can add up to 31 bits into `bitC`.
+ *  Note : does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) {
+    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+    assert(nbBits < BIT_MASK_SIZE);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_,
+ *  meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) {
+    assert((value >> nbBits) == 0);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  assumption : bitContainer has not overflowed
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) {
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes * 8;
+}
+
+/*! BIT_flushBits() :
+ *  assumption : bitContainer has not overflowed
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow.
+ *  overflow will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) {
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr)
+        bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes * 8;
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+ *            or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) {
+    BIT_addBitsFast(bitC, 1, 1); /* endMark */
+    BIT_flushBits(bitC);
+    if (bitC->ptr >= bitC->endPtr)
+        return 0; /* overflow detected */
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+*  bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+ *  Initialize a BIT_DStream_t.
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
+ */
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) {
+    if (srcSize < 1)
+    {
+        ZSTD_memset(bitD, 0, sizeof(*bitD));
+        return ERROR(srcSize_wrong);
+    }
+
+    bitD->start    = (const char*) srcBuffer;
+    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
+    if (srcSize >= sizeof(bitD->bitContainer))
+    { /* normal case */
+        bitD->ptr          = (const char*) srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        {
+            BYTE const lastByte = ((const BYTE*) srcBuffer)[srcSize - 1];
+            bitD->bitsConsumed =
+              lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
+            if (lastByte == 0)
+                return ERROR(GENERIC); /* endMark not present */
+        }
+    }
+    else
+    {
+        bitD->ptr          = bitD->start;
+        bitD->bitContainer = *(const BYTE*) (bitD->start);
+        switch (srcSize)
+        {
+        case 7 :
+            bitD->bitContainer += (BitContainerType) (((const BYTE*) (srcBuffer))[6])
+                               << (sizeof(bitD->bitContainer) * 8 - 16);
+            ZSTD_FALLTHROUGH;
+
+        case 6 :
+            bitD->bitContainer += (BitContainerType) (((const BYTE*) (srcBuffer))[5])
+                               << (sizeof(bitD->bitContainer) * 8 - 24);
+            ZSTD_FALLTHROUGH;
+
+        case 5 :
+            bitD->bitContainer += (BitContainerType) (((const BYTE*) (srcBuffer))[4])
+                               << (sizeof(bitD->bitContainer) * 8 - 32);
+            ZSTD_FALLTHROUGH;
+
+        case 4 :
+            bitD->bitContainer += (BitContainerType) (((const BYTE*) (srcBuffer))[3]) << 24;
+            ZSTD_FALLTHROUGH;
+
+        case 3 :
+            bitD->bitContainer += (BitContainerType) (((const BYTE*) (srcBuffer))[2]) << 16;
+            ZSTD_FALLTHROUGH;
+
+        case 2 :
+            bitD->bitContainer += (BitContainerType) (((const BYTE*) (srcBuffer))[1]) << 8;
+            ZSTD_FALLTHROUGH;
+
+        default :
+            break;
+        }
+        {
+            BYTE const lastByte = ((const BYTE*) srcBuffer)[srcSize - 1];
+            bitD->bitsConsumed  = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
+            if (lastByte == 0)
+                return ERROR(corruption_detected); /* endMark not present */
+        }
+        bitD->bitsConsumed += (U32) (sizeof(bitD->bitContainer) - srcSize) * 8;
+    }
+
+    return srcSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start) {
+    return bitContainer >> start;
+}
+
+FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer,
+                                               U32 const        start,
+                                               U32 const        nbBits) {
+    U32 const regMask = sizeof(bitContainer) * 8 - 1;
+    /* if start > regMask, bitstream is corrupted, and result is undefined */
+    assert(nbBits < BIT_MASK_SIZE);
+    /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
+     * than accessing memory. When bmi2 instruction is not present, we consider
+     * such cpus old (pre-Haswell, 2013) and their performance is not of that
+     * importance.
+     */
+#if defined(__x86_64__) || defined(_M_X86)
+    return (bitContainer >> (start & regMask)) & ((((U64) 1) << nbBits) - 1);
+#else
+    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
+#endif
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ * @return : value extracted */
+FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) {
+    /* arbitrate between double-shift and shift+mask */
+#if 1
+    /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
+     * bitstream is likely corrupted, and result is undefined */
+    return BIT_getMiddleBits(
+      bitD->bitContainer, (sizeof(bitD->bitContainer) * 8) - bitD->bitsConsumed - nbBits, nbBits);
+#else
+    /* this code path is slower on my os-x laptop */
+    U32 const regMask = sizeof(bitD->bitContainer) * 8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1)
+        >> ((regMask - nbBits) & regMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+ *  unsafe version; only works if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) {
+    U32 const regMask = sizeof(bitD->bitContainer) * 8 - 1;
+    assert(nbBits >= 1);
+    return (bitD->bitContainer << (bitD->bitsConsumed & regMask))
+        >> (((regMask + 1) - nbBits) & regMask);
+}
+
+FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) {
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value. */
+FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) {
+    size_t const value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_readBitsFast() :
+ *  unsafe version; only works if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) {
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    assert(nbBits >= 1);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_reloadDStream_internal() :
+ *  Simple variant of BIT_reloadDStream(), with two conditions:
+ *  1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
+ *  2. look window is valid after shifted down : bitD->ptr >= bitD->start
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD) {
+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer) * 8);
+    bitD->ptr -= bitD->bitsConsumed >> 3;
+    assert(bitD->ptr >= bitD->start);
+    bitD->bitsConsumed &= 7;
+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+    return BIT_DStream_unfinished;
+}
+
+/*! BIT_reloadDStreamFast() :
+ *  Similar to BIT_reloadDStream(), but with two differences:
+ *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ *     point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) {
+    if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+        return BIT_DStream_overflow;
+    return BIT_reloadDStream_internal(bitD);
+}
+
+/*! BIT_reloadDStream() :
+ *  Refill `bitD` from buffer previously set in BIT_initDStream() .
+ *  This function is safe, it guarantees it will not never beyond src buffer.
+ * @return : status of `BIT_DStream_t` internal register.
+ *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) {
+    /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
+    if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)))
+    {
+        static const BitContainerType zeroFilled = 0;
+        bitD->ptr = (const char*) &zeroFilled; /* aliasing is allowed for char */
+        /* overflow detected, erroneous scenario or end of stream: no update */
+        return BIT_DStream_overflow;
+    }
+
+    assert(bitD->ptr >= bitD->start);
+
+    if (bitD->ptr >= bitD->limitPtr)
+    {
+        return BIT_reloadDStream_internal(bitD);
+    }
+    if (bitD->ptr == bitD->start)
+    {
+        /* reached end of bitStream => no update */
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8)
+            return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    /* start < ptr < limitPtr => cautious update */
+    {
+        U32                nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result  = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start)
+        {
+            nbBytes = (U32) (bitD->ptr - bitD->start); /* ptr > start */
+            result  = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes * 8;
+        bitD->bitContainer = MEM_readLEST(
+          bitD
+            ->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream() :
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
+ */
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) {
+    return ((DStream->ptr == DStream->start)
+            && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8));
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* BITSTREAM_H_MODULE */
diff --git a/src/external/common/compiler.h b/src/external/common/compiler.h
new file mode 100644
index 00000000..77a2184d
--- /dev/null
+++ b/src/external/common/compiler.h
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPILER_H
+#define ZSTD_COMPILER_H
+
+#include <stddef.h>
+
+#include "portability_macros.h"
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+/* force inlining */
+
+#if !defined(ZSTD_NO_INLINE)
+    #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) \
+      || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+        #define INLINE_KEYWORD inline
+    #else
+        #define INLINE_KEYWORD
+    #endif
+
+    #if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
+        #define FORCE_INLINE_ATTR __attribute__((always_inline))
+    #elif defined(_MSC_VER)
+        #define FORCE_INLINE_ATTR __forceinline
+    #else
+        #define FORCE_INLINE_ATTR
+    #endif
+
+#else
+
+    #define INLINE_KEYWORD
+    #define FORCE_INLINE_ATTR
+
+#endif
+
+/**
+  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
+  This explicitly marks such functions as __cdecl so that the code will still compile
+  if a CC other than __cdecl has been made the default.
+*/
+#if defined(_MSC_VER)
+    #define WIN_CDECL __cdecl
+#else
+    #define WIN_CDECL
+#endif
+
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
+    #define UNUSED_ATTR __attribute__((unused))
+#else
+    #define UNUSED_ATTR
+#endif
+
+/**
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
+ * branches.
+ */
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
+/**
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
+ * used for "templates", so it can be tweaked based on the compilers
+ * performance.
+ *
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
+ * always_inline attribute.
+ *
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
+ * attribute.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+    #define HINT_INLINE static INLINE_KEYWORD
+#else
+    #define HINT_INLINE FORCE_INLINE_TEMPLATE
+#endif
+
+/* "soft" inline :
+ * The compiler is free to select if it's a good idea to inline or not.
+ * The main objective is to silence compiler warnings
+ * when a defined function in included but not used.
+ *
+ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
+ * Updating the prefix is probably preferable, but requires a fairly large codemod,
+ * since this name is used everywhere.
+ */
+#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
+    #if defined(__GNUC__)
+        #define MEM_STATIC static __inline UNUSED_ATTR
+    #elif defined(__IAR_SYSTEMS_ICC__)
+        #define MEM_STATIC static inline UNUSED_ATTR
+    #elif defined(__cplusplus) \
+      || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+        #define MEM_STATIC static inline
+    #elif defined(_MSC_VER)
+        #define MEM_STATIC static __inline
+    #else
+        #define MEM_STATIC \
+            static /* this version may generate warnings for unused static functions; disable the relevant warning */
+    #endif
+#endif
+
+/* force no inlining */
+#ifdef _MSC_VER
+    #define FORCE_NOINLINE static __declspec(noinline)
+#else
+    #if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
+        #define FORCE_NOINLINE static __attribute__((__noinline__))
+    #else
+        #define FORCE_NOINLINE static
+    #endif
+#endif
+
+
+/* target attribute */
+#if defined(__GNUC__) || defined(__IAR_SYSTEMS_ICC__)
+    #define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+#else
+    #define TARGET_ATTRIBUTE(target)
+#endif
+
+/* Target attribute for BMI2 dynamic dispatch.
+ * Enable lzcnt, bmi, and bmi2.
+ * We test for bmi1 & bmi2. lzcnt is included in bmi1.
+ */
+#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
+
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH build macro */
+#if defined(NO_PREFETCH)
+    #define PREFETCH_L1(ptr) \
+        do \
+        { \
+            (void) (ptr); \
+        } while (0) /* disabled */
+    #define PREFETCH_L2(ptr) \
+        do \
+        { \
+            (void) (ptr); \
+        } while (0) /* disabled */
+#else
+    #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) \
+      && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
+        #include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+        #define PREFETCH_L1(ptr) _mm_prefetch((const char*) (ptr), _MM_HINT_T0)
+        #define PREFETCH_L2(ptr) _mm_prefetch((const char*) (ptr), _MM_HINT_T1)
+    #elif defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)))
+        #define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+        #define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+    #elif defined(__aarch64__)
+        #define PREFETCH_L1(ptr) \
+            do \
+            { \
+                __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); \
+            } while (0)
+        #define PREFETCH_L2(ptr) \
+            do \
+            { \
+                __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); \
+            } while (0)
+    #else
+        #define PREFETCH_L1(ptr) \
+            do \
+            { \
+                (void) (ptr); \
+            } while (0) /* disabled */
+        #define PREFETCH_L2(ptr) \
+            do \
+            { \
+                (void) (ptr); \
+            } while (0) /* disabled */
+    #endif
+#endif /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s) \
+    do \
+    { \
+        const char* const _ptr  = (const char*) (p); \
+        size_t const      _size = (size_t) (s); \
+        size_t            _pos; \
+        for (_pos = 0; _pos < _size; _pos += CACHELINE_SIZE) \
+        { \
+            PREFETCH_L2(_ptr + _pos); \
+        } \
+    } while (0)
+
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
+ * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
+    #if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+        #define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+    #else
+        #define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+    #endif
+#else
+    #define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#if defined(__GNUC__)
+    #define LIKELY(x) (__builtin_expect((x), 1))
+    #define UNLIKELY(x) (__builtin_expect((x), 0))
+#else
+    #define LIKELY(x) (x)
+    #define UNLIKELY(x) (x)
+#endif
+
+#if __has_builtin(__builtin_unreachable) \
+  || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
+    #define ZSTD_UNREACHABLE \
+        do \
+        { \
+            assert(0), __builtin_unreachable(); \
+        } while (0)
+#else
+    #define ZSTD_UNREACHABLE \
+        do \
+        { \
+            assert(0); \
+        } while (0)
+#endif
+
+/* disable warnings */
+#ifdef _MSC_VER                    /* Visual Studio */
+    #include <intrin.h>            /* For Visual 2005 */
+    #pragma warning(disable: 4100) /* disable: C4100: unreferenced formal parameter */
+    #pragma warning(disable: 4127) /* disable: C4127: conditional expression is constant */
+    #pragma warning(disable: 4204) /* disable: C4204: non-constant aggregate initializer */
+    #pragma warning(disable: 4214) /* disable: C4214: non-int bitfields */
+    #pragma warning(disable: 4324) /* disable: C4324: padded structure */
+#endif
+
+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
+#ifndef STATIC_BMI2
+    #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
+        #ifdef __AVX2__  //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
+            #define STATIC_BMI2 1
+        #endif
+    #elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__)
+        #define STATIC_BMI2 1
+    #endif
+#endif
+
+#ifndef STATIC_BMI2
+    #define STATIC_BMI2 0
+#endif
+
+/* compile time determination of SIMD support */
+#if !defined(ZSTD_NO_INTRINSICS)
+    #if defined(__SSE2__) || defined(_M_AMD64) \
+      || (defined(_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
+        #define ZSTD_ARCH_X86_SSE2
+    #endif
+    #if defined(__ARM_NEON) || defined(_M_ARM64)
+        #define ZSTD_ARCH_ARM_NEON
+    #endif
+    #
+    #if defined(ZSTD_ARCH_X86_SSE2)
+        #include <emmintrin.h>
+    #elif defined(ZSTD_ARCH_ARM_NEON)
+        #include <arm_neon.h>
+    #endif
+#endif
+
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+    #define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+    #define ZSTD_HAS_C_ATTRIBUTE(x) 0
+#endif
+
+/* Only use C++ attributes in C++. Some compilers report support for C++
+ * attributes when compiling with C.
+ */
+#if defined(__cplusplus) && defined(__has_cpp_attribute)
+    #define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+    #define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+
+/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
+ * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
+ * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * - Else: __attribute__((__fallthrough__))
+ */
+#ifndef ZSTD_FALLTHROUGH
+    #if ZSTD_HAS_C_ATTRIBUTE(fallthrough)
+        #define ZSTD_FALLTHROUGH [[fallthrough]]
+    #elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough)
+        #define ZSTD_FALLTHROUGH [[fallthrough]]
+    #elif __has_attribute(__fallthrough__)
+        /* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon
+ * gcc complains about: a label can only be part of a statement and a declaration is not a statement.
+ */
+        #define ZSTD_FALLTHROUGH \
+            ; \
+            __attribute__((__fallthrough__))
+    #else
+        #define ZSTD_FALLTHROUGH
+    #endif
+#endif
+
+/*-**************************************************************
+*  Alignment check
+*****************************************************************/
+
+/* this test was initially positioned in mem.h,
+ * but this file is removed (or replaced) for linux kernel
+ * so it's now hosted in compiler.h,
+ * which remains valid for both user & kernel spaces.
+ */
+
+#ifndef ZSTD_ALIGNOF
+    #if defined(__GNUC__) || defined(_MSC_VER)
+        /* covers gcc, clang & MSVC */
+        /* note : this section must come first, before C11,
+ * due to a limitation in the kernel source generator */
+        #define ZSTD_ALIGNOF(T) __alignof(T)
+
+    #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
+        /* C11 support */
+        #include <stdalign.h>
+        #define ZSTD_ALIGNOF(T) alignof(T)
+
+    #else
+        /* No known support for alignof() - imperfect backup */
+        #define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T))
+
+    #endif
+#endif /* ZSTD_ALIGNOF */
+
+/*-**************************************************************
+*  Sanitizer
+*****************************************************************/
+
+/**
+ * Zstd relies on pointer overflow in its decompressor.
+ * We add this attribute to functions that rely on pointer overflow.
+ */
+#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+    #if __has_attribute(no_sanitize)
+        #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
+            /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
+            #define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR \
+                __attribute__((no_sanitize("signed-integer-overflow")))
+        #else
+            /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
+            #define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR \
+                __attribute__((no_sanitize("pointer-overflow")))
+        #endif
+    #else
+        #define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+    #endif
+#endif
+
+/**
+ * Helper function to perform a wrapped pointer difference without triggering
+ * UBSAN.
+ *
+ * @returns lhs - rhs with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs) {
+    return lhs - rhs;
+}
+
+/**
+ * Helper function to perform a wrapped pointer add without triggering UBSAN.
+ *
+ * @return ptr + add with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add) {
+    return ptr + add;
+}
+
+/**
+ * Helper function to perform a wrapped pointer subtraction without triggering
+ * UBSAN.
+ *
+ * @return ptr - sub with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub) {
+    return ptr - sub;
+}
+
+/**
+ * Helper function to add to a pointer that works around C's undefined behavior
+ * of adding 0 to NULL.
+ *
+ * @returns `ptr + add` except it defines `NULL + 0 == NULL`.
+ */
+MEM_STATIC
+unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add) {
+    return add > 0 ? ptr + add : ptr;
+}
+
+/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
+ * abundance of caution, disable our custom poisoning on mingw. */
+#ifdef __MINGW32__
+    #ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
+        #define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
+    #endif
+    #ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
+        #define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
+    #endif
+#endif
+
+#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* Not all platforms that support msan provide sanitizers/msan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+    #include <stddef.h> /* size_t */
+    #define ZSTD_DEPS_NEED_STDINT
+    #include "zstd_deps.h" /* intptr_t */
+
+/* Make memory region fully initialized (without changing its contents). */
+void __msan_unpoison(const volatile void* a, size_t size);
+
+/* Make memory region fully uninitialized (without changing its contents).
+   This is a legacy interface that does not update origin information. Use
+   __msan_allocated_memory() instead. */
+void __msan_poison(const volatile void* a, size_t size);
+
+/* Returns the offset of the first (at least partially) poisoned byte in the
+   memory range, or -1 if the whole range is good. */
+intptr_t __msan_test_shadow(const volatile void* x, size_t size);
+
+/* Print shadow and origin for the memory range to stderr in a human-readable
+   format. */
+void __msan_print_shadow(const volatile void* x, size_t size);
+#endif
+
+#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* Not all platforms that support asan provide sanitizers/asan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+    #include <stddef.h> /* size_t */
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
+ *
+ * This memory must be previously allocated by your program. Instrumented
+ * code is forbidden from accessing addresses in this region until it is
+ * unpoisoned. This function is not guaranteed to poison the entire region -
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
+ * alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can poison or
+ * unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_poison_memory_region(void const volatile* addr, size_t size);
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
+ *
+ * This memory must be previously allocated by your program. Accessing
+ * addresses in this region is allowed until this region is poisoned again.
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
+ * to ASan alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can
+ * poison or unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_unpoison_memory_region(void const volatile* addr, size_t size);
+#endif
+
+#endif /* ZSTD_COMPILER_H */
diff --git a/src/external/common/cpu.h b/src/external/common/cpu.h
new file mode 100644
index 00000000..c2a23032
--- /dev/null
+++ b/src/external/common/cpu.h
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/**
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+#include "mem.h"
+
+#ifdef _MSC_VER
+    #include <intrin.h>
+#endif
+
+typedef struct {
+    U32 f1c;
+    U32 f1d;
+    U32 f7b;
+    U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+    U32 f1c = 0;
+    U32 f1d = 0;
+    U32 f7b = 0;
+    U32 f7c = 0;
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+    #if !defined(__clang__) || __clang_major__ >= 16
+    int reg[4];
+    __cpuid((int*) reg, 0);
+    {
+        int const n = reg[0];
+        if (n >= 1)
+        {
+            __cpuid((int*) reg, 1);
+            f1c = (U32) reg[2];
+            f1d = (U32) reg[3];
+        }
+        if (n >= 7)
+        {
+            __cpuidex((int*) reg, 7, 0);
+            f7b = (U32) reg[1];
+            f7c = (U32) reg[2];
+        }
+    }
+    #else
+    /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
+     * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
+     * to due to being a reserved register. So in that case, do the `cpuid`
+     * ourselves. Clang supports inline assembly anyway.
+     */
+    U32 n;
+    __asm__("pushq %%rbx\n\t"
+            "cpuid\n\t"
+            "popq %%rbx\n\t"
+            : "=a"(n)
+            : "a"(0)
+            : "rcx", "rdx");
+    if (n >= 1)
+    {
+        U32 f1a;
+        __asm__("pushq %%rbx\n\t"
+                "cpuid\n\t"
+                "popq %%rbx\n\t"
+                : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+                : "a"(1)
+                :);
+    }
+    if (n >= 7)
+    {
+        __asm__("pushq %%rbx\n\t"
+                "cpuid\n\t"
+                "movq %%rbx, %%rax\n\t"
+                "popq %%rbx"
+                : "=a"(f7b), "=c"(f7c)
+                : "a"(7), "c"(0)
+                : "rdx");
+    }
+    #endif
+#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+    /* The following block like the normal cpuid branch below, but gcc
+     * reserves ebx for use of its pic register so we must specially
+     * handle the save and restore to avoid clobbering the register
+     */
+    U32 n;
+    __asm__("pushl %%ebx\n\t"
+            "cpuid\n\t"
+            "popl %%ebx\n\t"
+            : "=a"(n)
+            : "a"(0)
+            : "ecx", "edx");
+    if (n >= 1)
+    {
+        U32 f1a;
+        __asm__("pushl %%ebx\n\t"
+                "cpuid\n\t"
+                "popl %%ebx\n\t"
+                : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+                : "a"(1));
+    }
+    if (n >= 7)
+    {
+        __asm__("pushl %%ebx\n\t"
+                "cpuid\n\t"
+                "movl %%ebx, %%eax\n\t"
+                "popl %%ebx"
+                : "=a"(f7b), "=c"(f7c)
+                : "a"(7), "c"(0)
+                : "edx");
+    }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+    U32 n;
+    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+    if (n >= 1)
+    {
+        U32 f1a;
+        __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+    }
+    if (n >= 7)
+    {
+        U32 f7a;
+        __asm__("cpuid" : "=a"(f7a), "=b"(f7b), "=c"(f7c) : "a"(7), "c"(0) : "edx");
+    }
+#endif
+    {
+        ZSTD_cpuid_t cpuid;
+        cpuid.f1c = f1c;
+        cpuid.f1d = f1d;
+        cpuid.f7b = f7b;
+        cpuid.f7c = f7c;
+        return cpuid;
+    }
+}
+
+#define X(name, r, bit) \
+    MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
+        return ((cpuid.r) & (1U << bit)) != 0; \
+    }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+C(sse3, 0)
+C(pclmuldq, 1)
+C(dtes64, 2)
+C(monitor, 3)
+C(dscpl, 4)
+C(vmx, 5)
+C(smx, 6)
+C(eist, 7)
+C(tm2, 8)
+C(ssse3, 9)
+C(cnxtid, 10)
+C(fma, 12)
+C(cx16, 13)
+C(xtpr, 14)
+C(pdcm, 15)
+C(pcid, 17)
+C(dca, 18)
+C(sse41, 19)
+C(sse42, 20)
+C(x2apic, 21)
+C(movbe, 22)
+C(popcnt, 23)
+C(tscdeadline, 24)
+C(aes, 25)
+C(xsave, 26)
+C(osxsave, 27)
+C(avx, 28)
+C(f16c, 29)
+C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+D(fpu, 0)
+D(vme, 1)
+D(de, 2)
+D(pse, 3)
+D(tsc, 4)
+D(msr, 5)
+D(pae, 6)
+D(mce, 7)
+D(cx8, 8)
+D(apic, 9)
+D(sep, 11)
+D(mtrr, 12)
+D(pge, 13)
+D(mca, 14)
+D(cmov, 15)
+D(pat, 16)
+D(pse36, 17)
+D(psn, 18)
+D(clfsh, 19)
+D(ds, 21)
+D(acpi, 22)
+D(mmx, 23)
+D(fxsr, 24)
+D(sse, 25)
+D(sse2, 26)
+D(ss, 27)
+D(htt, 28)
+D(tm, 29)
+D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+B(bmi1, 3)
+B(hle, 4)
+B(avx2, 5)
+B(smep, 7)
+B(bmi2, 8)
+B(erms, 9)
+B(invpcid, 10)
+B(rtm, 11)
+B(mpx, 14)
+B(avx512f, 16)
+B(avx512dq, 17)
+B(rdseed, 18)
+B(adx, 19)
+B(smap, 20)
+B(avx512ifma, 21)
+B(pcommit, 22)
+B(clflushopt, 23)
+B(clwb, 24)
+B(avx512pf, 26)
+B(avx512er, 27)
+B(avx512cd, 28)
+B(sha, 29)
+B(avx512bw, 30)
+B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+C(prefetchwt1, 0)
+C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
diff --git a/src/external/common/debug.cpp b/src/external/common/debug.cpp
new file mode 100644
index 00000000..f613d36e
--- /dev/null
+++ b/src/external/common/debug.cpp
@@ -0,0 +1,30 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * This module only hosts one global variable
+ * which can be used to dynamically influence the verbosity of traces,
+ * such as DEBUGLOG and RAWLOG
+ */
+
+#include "debug.h"
+
+#if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL >= 2)
+/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
+ * translation unit is empty. So remove this from Linux kernel builds, but
+ * otherwise just leave it in.
+ */
+int g_debuglevel = DEBUGLEVEL;
+#endif
diff --git a/src/external/common/debug.h b/src/external/common/debug.h
new file mode 100644
index 00000000..8fbc8925
--- /dev/null
+++ b/src/external/common/debug.h
@@ -0,0 +1,126 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * The purpose of this header is to enable debug functions.
+ * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
+ * and DEBUG_STATIC_ASSERT() for compile-time.
+ *
+ * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
+ *
+ * Level 1 enables assert() only.
+ * Starting level 2, traces can be generated and pushed to stderr.
+ * The higher the level, the more verbose the traces.
+ *
+ * It's possible to dynamically adjust level using variable g_debug_level,
+ * which is only declared if DEBUGLEVEL>=2,
+ * and is a global variable, not multi-thread protected (use with care)
+ */
+
+#ifndef DEBUG_H_12987983217
+#define DEBUG_H_12987983217
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+/* static assert is triggered at compile time, leaving no runtime artefact.
+ * static assert only works with compile-time constants.
+ * Also, this variant can only be used inside a function. */
+#define DEBUG_STATIC_ASSERT(c) (void) sizeof(char[(c) ? 1 : -1])
+
+
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+    #define DEBUGLEVEL 0
+#endif
+
+
+/* recommended values for DEBUGLEVEL :
+ * 0 : release mode, no debug, all run-time checks disabled
+ * 1 : enables assert() only, no display
+ * 2 : reserved, for currently active debug path
+ * 3 : events once per object lifetime (CCtx, CDict, etc.)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (verbose)
+ * 7+: events at every position (*very* verbose)
+ *
+ * It's generally inconvenient to output traces > 5.
+ * In which case, it's possible to selectively trigger high verbosity levels
+ * by modifying g_debug_level.
+ */
+
+#if (DEBUGLEVEL >= 1)
+    #define ZSTD_DEPS_NEED_ASSERT
+    #include "zstd_deps.h"
+#else
+    #ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
+        #define assert(condition) ((void) 0) /* disable assert (default) */
+    #endif
+#endif
+
+#if (DEBUGLEVEL >= 2)
+    #define ZSTD_DEPS_NEED_IO
+    #include "zstd_deps.h"
+extern int g_debuglevel; /* the variable is only declared,
+                            it actually lives in debug.c,
+                            and is shared by the whole process.
+                            It's not thread-safe.
+                            It's useful when enabling very verbose levels
+                            on selective conditions (such as position in src) */
+
+    #define RAWLOG(l, ...) \
+        do \
+        { \
+            if (l <= g_debuglevel) \
+            { \
+                ZSTD_DEBUG_PRINT(__VA_ARGS__); \
+            } \
+        } while (0)
+
+    #define STRINGIFY(x) #x
+    #define TOSTRING(x) STRINGIFY(x)
+    #define LINE_AS_STRING TOSTRING(__LINE__)
+
+    #define DEBUGLOG(l, ...) \
+        do \
+        { \
+            if (l <= g_debuglevel) \
+            { \
+                ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
+                ZSTD_DEBUG_PRINT(" \n"); \
+            } \
+        } while (0)
+#else
+    #define RAWLOG(l, ...) \
+        do \
+        { \
+        } while (0) /* disabled */
+    #define DEBUGLOG(l, ...) \
+        do \
+        { \
+        } while (0) /* disabled */
+#endif
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* DEBUG_H_12987983217 */
diff --git a/src/external/common/entropy_common.cpp b/src/external/common/entropy_common.cpp
new file mode 100644
index 00000000..3415c692
--- /dev/null
+++ b/src/external/common/entropy_common.cpp
@@ -0,0 +1,437 @@
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* *************************************
+*  Dependencies
+***************************************/
+#include "mem.h"
+#include "error_private.h"      /* ERR_*, ERROR */
+#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
+#include "fse.h"
+#include "huf.h"
+#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
+
+
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+
+
+/*===   Error Management   ===*/
+unsigned    FSE_isError(size_t code) { return ERR_isError(code); }
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+unsigned    HUF_isError(size_t code) { return ERR_isError(code); }
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+FORCE_INLINE_TEMPLATE
+size_t FSE_readNCount_body(short*      normalizedCounter,
+                           unsigned*   maxSVPtr,
+                           unsigned*   tableLogPtr,
+                           const void* headerBuffer,
+                           size_t      hbSize) {
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend   = istart + hbSize;
+    const BYTE*       ip     = istart;
+    int               nbBits;
+    int               remaining;
+    int               threshold;
+    U32               bitStream;
+    int               bitCount;
+    unsigned          charnum   = 0;
+    unsigned const    maxSV1    = *maxSVPtr + 1;
+    int               previous0 = 0;
+
+    if (hbSize < 8)
+    {
+        /* This function only works when hbSize >= 8 */
+        char buffer[8] = {0};
+        ZSTD_memcpy(buffer, headerBuffer, hbSize);
+        {
+            size_t const countSize =
+              FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, buffer, sizeof(buffer));
+            if (FSE_isError(countSize))
+                return countSize;
+            if (countSize > hbSize)
+                return ERROR(corruption_detected);
+            return countSize;
+        }
+    }
+    assert(hbSize >= 8);
+
+    /* init */
+    ZSTD_memset(
+      normalizedCounter, 0,
+      (*maxSVPtr + 1)
+        * sizeof(
+          normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
+    bitStream = MEM_readLE32(ip);
+    nbBits    = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX)
+        return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount     = 4;
+    *tableLogPtr = nbBits;
+    remaining    = (1 << nbBits) + 1;
+    threshold    = 1 << nbBits;
+    nbBits++;
+
+    for (;;)
+    {
+        if (previous0)
+        {
+            /* Count the number of repeats. Each time the
+             * 2-bit repeat code is 0b11 there is another
+             * repeat.
+             * Avoid UB by setting the high bit to 1.
+             */
+            int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
+            while (repeats >= 12)
+            {
+                charnum += 3 * 12;
+                if (LIKELY(ip <= iend - 7))
+                {
+                    ip += 3;
+                }
+                else
+                {
+                    bitCount -= (int) (8 * (iend - 7 - ip));
+                    bitCount &= 31;
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> bitCount;
+                repeats   = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
+            }
+            charnum += 3 * repeats;
+            bitStream >>= 2 * repeats;
+            bitCount += 2 * repeats;
+
+            /* Add the final repeat which isn't 0b11. */
+            assert((bitStream & 3) < 3);
+            charnum += bitStream & 3;
+            bitCount += 2;
+
+            /* This is an error, but break and return an error
+             * at the end, because returning out of a loop makes
+             * it harder for the compiler to optimize.
+             */
+            if (charnum >= maxSV1)
+                break;
+
+            /* We don't need to set the normalized count to 0
+             * because we already memset the whole buffer to 0.
+             */
+
+            if (LIKELY(ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4))
+            {
+                assert((bitCount >> 3) <= 3); /* For first condition to work */
+                ip += bitCount >> 3;
+                bitCount &= 7;
+            }
+            else
+            {
+                bitCount -= (int) (8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+        }
+        {
+            int const max = (2 * threshold - 1) - remaining;
+            int       count;
+
+            if ((bitStream & (threshold - 1)) < (U32) max)
+            {
+                count = bitStream & (threshold - 1);
+                bitCount += nbBits - 1;
+            }
+            else
+            {
+                count = bitStream & (2 * threshold - 1);
+                if (count >= threshold)
+                    count -= max;
+                bitCount += nbBits;
+            }
+
+            count--; /* extra accuracy */
+            /* When it matters (small blocks), this is a
+             * predictable branch, because we don't use -1.
+             */
+            if (count >= 0)
+            {
+                remaining -= count;
+            }
+            else
+            {
+                assert(count == -1);
+                remaining += count;
+            }
+            normalizedCounter[charnum++] = (short) count;
+            previous0                    = !count;
+
+            assert(threshold > 1);
+            if (remaining < threshold)
+            {
+                /* This branch can be folded into the
+                 * threshold update condition because we
+                 * know that threshold > 1.
+                 */
+                if (remaining <= 1)
+                    break;
+                nbBits    = ZSTD_highbit32(remaining) + 1;
+                threshold = 1 << (nbBits - 1);
+            }
+            if (charnum >= maxSV1)
+                break;
+
+            if (LIKELY(ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4))
+            {
+                ip += bitCount >> 3;
+                bitCount &= 7;
+            }
+            else
+            {
+                bitCount -= (int) (8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+        }
+    }
+    if (remaining != 1)
+        return ERROR(corruption_detected);
+    /* Only possible when there are too many zeros. */
+    if (charnum > maxSV1)
+        return ERROR(maxSymbolValue_tooSmall);
+    if (bitCount > 32)
+        return ERROR(corruption_detected);
+    *maxSVPtr = charnum - 1;
+
+    ip += (bitCount + 7) >> 3;
+    return ip - istart;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_readNCount_body_default(short*      normalizedCounter,
+                                          unsigned*   maxSVPtr,
+                                          unsigned*   tableLogPtr,
+                                          const void* headerBuffer,
+                                          size_t      hbSize) {
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+#if DYNAMIC_BMI2
+BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(short*      normalizedCounter,
+                                                             unsigned*   maxSVPtr,
+                                                             unsigned*   tableLogPtr,
+                                                             const void* headerBuffer,
+                                                             size_t      hbSize) {
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+#endif
+
+size_t FSE_readNCount_bmi2(short*      normalizedCounter,
+                           unsigned*   maxSVPtr,
+                           unsigned*   tableLogPtr,
+                           const void* headerBuffer,
+                           size_t      hbSize,
+                           int         bmi2) {
+#if DYNAMIC_BMI2
+    if (bmi2)
+    {
+        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer,
+                                        hbSize);
+    }
+#endif
+    (void) bmi2;
+    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer,
+                                       hbSize);
+}
+
+size_t FSE_readNCount(short*      normalizedCounter,
+                      unsigned*   maxSVPtr,
+                      unsigned*   tableLogPtr,
+                      const void* headerBuffer,
+                      size_t      hbSize) {
+    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize,
+                               /* bmi2 */ 0);
+}
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
+*/
+size_t HUF_readStats(BYTE*       huffWeight,
+                     size_t      hwSize,
+                     U32*        rankStats,
+                     U32*        nbSymbolsPtr,
+                     U32*        tableLogPtr,
+                     const void* src,
+                     size_t      srcSize) {
+    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src,
+                              srcSize, wksp, sizeof(wksp), /* flags */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t HUF_readStats_body(BYTE*       huffWeight,
+                                                size_t      hwSize,
+                                                U32*        rankStats,
+                                                U32*        nbSymbolsPtr,
+                                                U32*        tableLogPtr,
+                                                const void* src,
+                                                size_t      srcSize,
+                                                void*       workSpace,
+                                                size_t      wkspSize,
+                                                int         bmi2) {
+    U32         weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t      iSize;
+    size_t      oSize;
+
+    if (!srcSize)
+        return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    /* ZSTD_memset(huffWeight, 0, hwSize);   */ /* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128)
+    { /* special header */
+        oSize = iSize - 127;
+        iSize = ((oSize + 1) / 2);
+        if (iSize + 1 > srcSize)
+            return ERROR(srcSize_wrong);
+        if (oSize >= hwSize)
+            return ERROR(corruption_detected);
+        ip += 1;
+        {
+            U32 n;
+            for (n = 0; n < oSize; n += 2)
+            {
+                huffWeight[n]     = ip[n / 2] >> 4;
+                huffWeight[n + 1] = ip[n / 2] & 15;
+            }
+        }
+    }
+    else
+    { /* header compressed with FSE (normal case) */
+        if (iSize + 1 > srcSize)
+            return ERROR(srcSize_wrong);
+        /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize - 1, ip + 1, iSize, 6, workSpace,
+                                         wkspSize, bmi2);
+        if (FSE_isError(oSize))
+            return oSize;
+    }
+
+    /* collect weight stats */
+    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {
+        U32 n;
+        for (n = 0; n < oSize; n++)
+        {
+            if (huffWeight[n] > HUF_TABLELOG_MAX)
+                return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+        }
+    }
+    if (weightTotal == 0)
+        return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {
+        U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
+        if (tableLog > HUF_TABLELOG_MAX)
+            return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {
+            U32 const total      = 1 << tableLog;
+            U32 const rest       = total - weightTotal;
+            U32 const verif      = 1 << ZSTD_highbit32(rest);
+            U32 const lastWeight = ZSTD_highbit32(rest) + 1;
+            if (verif != rest)
+                return ERROR(corruption_detected); /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE) lastWeight;
+            rankStats[lastWeight]++;
+        }
+    }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1))
+        return ERROR(
+          corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32) (oSize + 1);
+    return iSize + 1;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t HUF_readStats_body_default(BYTE*       huffWeight,
+                                         size_t      hwSize,
+                                         U32*        rankStats,
+                                         U32*        nbSymbolsPtr,
+                                         U32*        tableLogPtr,
+                                         const void* src,
+                                         size_t      srcSize,
+                                         void*       workSpace,
+                                         size_t      wkspSize) {
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src,
+                              srcSize, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE*       huffWeight,
+                                                            size_t      hwSize,
+                                                            U32*        rankStats,
+                                                            U32*        nbSymbolsPtr,
+                                                            U32*        tableLogPtr,
+                                                            const void* src,
+                                                            size_t      srcSize,
+                                                            void*       workSpace,
+                                                            size_t      wkspSize) {
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src,
+                              srcSize, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t HUF_readStats_wksp(BYTE*       huffWeight,
+                          size_t      hwSize,
+                          U32*        rankStats,
+                          U32*        nbSymbolsPtr,
+                          U32*        tableLogPtr,
+                          const void* src,
+                          size_t      srcSize,
+                          void*       workSpace,
+                          size_t      wkspSize,
+                          int         flags) {
+#if DYNAMIC_BMI2
+    if (flags & HUF_flags_bmi2)
+    {
+        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr,
+                                       src, srcSize, workSpace, wkspSize);
+    }
+#endif
+    (void) flags;
+    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src,
+                                      srcSize, workSpace, wkspSize);
+}
diff --git a/src/external/common/error_private.cpp b/src/external/common/error_private.cpp
new file mode 100644
index 00000000..aa234b07
--- /dev/null
+++ b/src/external/common/error_private.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+#include "error_private.h"
+
+const char* ERR_getErrorString(ERR_enum code) {
+#ifdef ZSTD_STRIP_ERROR_STRINGS
+    (void) code;
+    return "Error strings stripped";
+#else
+    static const char* const notErrorCode = "Unspecified error code";
+    switch (code)
+    {
+    case PREFIX(no_error) :
+        return "No error detected";
+    case PREFIX(GENERIC) :
+        return "Error (generic)";
+    case PREFIX(prefix_unknown) :
+        return "Unknown frame descriptor";
+    case PREFIX(version_unsupported) :
+        return "Version not supported";
+    case PREFIX(frameParameter_unsupported) :
+        return "Unsupported frame parameter";
+    case PREFIX(frameParameter_windowTooLarge) :
+        return "Frame requires too much memory for decoding";
+    case PREFIX(corruption_detected) :
+        return "Data corruption detected";
+    case PREFIX(checksum_wrong) :
+        return "Restored data doesn't match checksum";
+    case PREFIX(literals_headerWrong) :
+        return "Header of Literals' block doesn't respect format specification";
+    case PREFIX(parameter_unsupported) :
+        return "Unsupported parameter";
+    case PREFIX(parameter_combination_unsupported) :
+        return "Unsupported combination of parameters";
+    case PREFIX(parameter_outOfBound) :
+        return "Parameter is out of bound";
+    case PREFIX(init_missing) :
+        return "Context should be init first";
+    case PREFIX(memory_allocation) :
+        return "Allocation error : not enough memory";
+    case PREFIX(workSpace_tooSmall) :
+        return "workSpace buffer is not large enough";
+    case PREFIX(stage_wrong) :
+        return "Operation not authorized at current processing stage";
+    case PREFIX(tableLog_tooLarge) :
+        return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge) :
+        return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall) :
+        return "Specified maxSymbolValue is too small";
+    case PREFIX(stabilityCondition_notRespected) :
+        return "pledged buffer stability condition is not respected";
+    case PREFIX(dictionary_corrupted) :
+        return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong) :
+        return "Dictionary mismatch";
+    case PREFIX(dictionaryCreation_failed) :
+        return "Cannot create Dictionary from provided samples";
+    case PREFIX(dstSize_tooSmall) :
+        return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong) :
+        return "Src size is incorrect";
+    case PREFIX(dstBuffer_null) :
+        return "Operation on NULL destination buffer";
+    case PREFIX(noForwardProgress_destFull) :
+        return "Operation made no progress over multiple calls, due to output buffer being full";
+    case PREFIX(noForwardProgress_inputEmpty) :
+        return "Operation made no progress over multiple calls, due to input being empty";
+        /* following error codes are not stable and may be removed or changed in a future version */
+    case PREFIX(frameIndex_tooLarge) :
+        return "Frame index is too large";
+    case PREFIX(seekableIO) :
+        return "An I/O error occurred when reading/seeking";
+    case PREFIX(dstBuffer_wrong) :
+        return "Destination buffer is wrong";
+    case PREFIX(srcBuffer_wrong) :
+        return "Source buffer is wrong";
+    case PREFIX(sequenceProducer_failed) :
+        return "Block-level external sequence producer returned an error code";
+    case PREFIX(externalSequences_invalid) :
+        return "External sequences are not valid";
+    case PREFIX(maxCode) :
+    default :
+        return notErrorCode;
+    }
+#endif
+}
diff --git a/src/external/common/error_private.h b/src/external/common/error_private.h
new file mode 100644
index 00000000..81ff5cd8
--- /dev/null
+++ b/src/external/common/error_private.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************************
+*  Dependencies
+******************************************/
+#include "../zstd_errors.h" /* enum list */
+#include "compiler.h"
+#include "debug.h"
+#include "zstd_deps.h" /* size_t */
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#if defined(__GNUC__)
+    #define ERR_STATIC static __attribute__((unused))
+#elif defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+    #define ERR_STATIC static inline
+#elif defined(_MSC_VER)
+    #define ERR_STATIC static __inline
+#else
+    #define ERR_STATIC \
+        static /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
+
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#undef ERROR /* already defined on Visual Studio */
+#define ERROR(name) ZSTD_ERROR(name)
+#define ZSTD_ERROR(name) ((size_t) - PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) {
+    if (!ERR_isError(code))
+        return (ERR_enum) 0;
+    return (ERR_enum) (0 - code);
+}
+
+/* check and forward error code */
+#define CHECK_V_F(e, f) \
+    size_t const e = f; \
+    do \
+    { \
+        if (ERR_isError(e)) \
+            return e; \
+    } while (0)
+#define CHECK_F(f) \
+    do \
+    { \
+        CHECK_V_F(_var_err__, f); \
+    } while (0)
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+const char* ERR_getErrorString(ERR_enum code); /* error_private.c */
+
+ERR_STATIC const char* ERR_getErrorName(size_t code) {
+    return ERR_getErrorString(ERR_getErrorCode(code));
+}
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR void _force_has_format_string(const char* format, ...) {
+    (void) format;
+}
+
+/**
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+    do \
+    { \
+        if (0) \
+        { \
+            _force_has_format_string(__VA_ARGS__); \
+        } \
+    } while (0)
+
+#define ERR_QUOTE(str) #str
+
+/**
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+    do \
+    { \
+        if (cond) \
+        { \
+            RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, \
+                   ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
+            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+            RAWLOG(3, ": " __VA_ARGS__); \
+            RAWLOG(3, "\n"); \
+            return ERROR(err); \
+        } \
+    } while (0)
+
+/**
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+    do \
+    { \
+        RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, \
+               ERR_QUOTE(ERROR(err))); \
+        _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+        RAWLOG(3, ": " __VA_ARGS__); \
+        RAWLOG(3, "\n"); \
+        return ERROR(err); \
+    } while (0)
+
+/**
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+    do \
+    { \
+        size_t const err_code = (err); \
+        if (ERR_isError(err_code)) \
+        { \
+            RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, \
+                   ERR_QUOTE(err), ERR_getErrorName(err_code)); \
+            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+            RAWLOG(3, ": " __VA_ARGS__); \
+            RAWLOG(3, "\n"); \
+            return err_code; \
+        } \
+    } while (0)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* ERROR_H_MODULE */
diff --git a/src/external/common/fse.h b/src/external/common/fse.h
new file mode 100644
index 00000000..52f1d299
--- /dev/null
+++ b/src/external/common/fse.h
@@ -0,0 +1,691 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#ifndef FSE_H
+    #define FSE_H
+
+
+    /*-*****************************************
+*  Dependencies
+******************************************/
+    #include "zstd_deps.h" /* size_t, ptrdiff_t */
+
+
+    /*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+    #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT == 1) && defined(__GNUC__) && (__GNUC__ >= 4)
+        #define FSE_PUBLIC_API __attribute__((visibility("default")))
+    #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT == 1) /* Visual expected */
+        #define FSE_PUBLIC_API __declspec(dllexport)
+    #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT == 1)
+        #define FSE_PUBLIC_API \
+            __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+    #else
+        #define FSE_PUBLIC_API
+    #endif
+
+    /*------   Version   ------*/
+    #define FSE_VERSION_MAJOR 0
+    #define FSE_VERSION_MINOR 9
+    #define FSE_VERSION_RELEASE 0
+
+    #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+    #define FSE_QUOTE(str) #str
+    #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+    #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+    #define FSE_VERSION_NUMBER \
+        (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned
+FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
+
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
+
+/* Error Management */
+FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
+FSE_PUBLIC_API const char*
+FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[] (see hist.h)
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= 'maxTableLog') */
+FSE_PUBLIC_API unsigned
+FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    useLowProbCount is a boolean parameter which trades off compressed size for
+    faster header decoding. When it is set to 1, the compressed data will be slightly
+    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
+    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
+    is a good default, since header deserialization makes a big speed difference.
+    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_normalizeCount(short*          normalizedCounter,
+                                         unsigned        tableLog,
+                                         const unsigned* count,
+                                         size_t          srcSize,
+                                         unsigned        maxSymbolValue,
+                                         unsigned        useLowProbCount);
+
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
+FSE_PUBLIC_API size_t FSE_writeNCount(void*        buffer,
+                                      size_t       bufferSize,
+                                      const short* normalizedCounter,
+                                      unsigned     maxSymbolValue,
+                                      unsigned     tableLog);
+
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned
+  FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
+
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable*  ct,
+                                      const short* normalizedCounter,
+                                      unsigned     maxSymbolValue,
+                                      unsigned     tableLog);
+
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_compress_usingCTable(
+  void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+FSE_PUBLIC_API size_t FSE_readNCount(short*      normalizedCounter,
+                                     unsigned*   maxSymbolValuePtr,
+                                     unsigned*   tableLogPtr,
+                                     const void* rBuffer,
+                                     size_t      rBuffSize);
+
+/*! FSE_readNCount_bmi2():
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
+ */
+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short*      normalizedCounter,
+                                          unsigned*   maxSymbolValuePtr,
+                                          unsigned*   tableLogPtr,
+                                          const void* rBuffer,
+                                          size_t      rBuffSize,
+                                          int         bmi2);
+
+typedef unsigned
+  FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+#endif /* FSE_H */
+
+
+#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+    #define FSE_H_FSE_STATIC_LINKING_ONLY
+
+    /* *** Dependency *** */
+    #include "bitstream.h"
+
+
+    /* *****************************************
+*  Static allocation
+*******************************************/
+    /* FSE buffer bounds */
+    #define FSE_NCOUNTBOUND 512
+    #define FSE_BLOCKBOUND(size) \
+        ((size) + ((size) >> 7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+    #define FSE_COMPRESSBOUND(size) \
+        (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
+
+    /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
+    #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) \
+        (1 + (1 << ((maxTableLog) - 1)) + (((maxSymbolValue) + 1) * 2))
+    #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << (maxTableLog)))
+
+    /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+    #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) \
+        (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+    #define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
+
+/* *****************************************
+ *  FSE advanced API
+ ***************************************** */
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog,
+                                      size_t   srcSize,
+                                      unsigned maxSymbolValue,
+                                      unsigned minus);
+/**< same as FSE_optimalTableLog(), which used `minus==2` */
+
+size_t FSE_buildCTable_rle(FSE_CTable* ct, unsigned char symbolValue);
+    /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+    /* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
+ * See FSE_buildCTable_wksp() for breakdown of workspace usage.
+ */
+    #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) \
+        (((maxSymbolValue + 2) + (1ull << (tableLog))) / 2 \
+         + sizeof(U64) / sizeof(U32) /* additional 8 bytes for potential table overwrite */)
+    #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) \
+        (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
+size_t FSE_buildCTable_wksp(FSE_CTable*  ct,
+                            const short* normalizedCounter,
+                            unsigned     maxSymbolValue,
+                            unsigned     tableLog,
+                            void*        workSpace,
+                            size_t       wkspSize);
+
+    #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) \
+        (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
+    #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) \
+        ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) \
+         / sizeof(unsigned))
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable*  dt,
+                                           const short* normalizedCounter,
+                                           unsigned     maxSymbolValue,
+                                           unsigned     tableLog,
+                                           void*        workSpace,
+                                           size_t       wkspSize);
+/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+
+    #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) \
+        (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 \
+         + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) \
+         + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+    #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) \
+        (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+size_t FSE_decompress_wksp_bmi2(void*       dst,
+                                size_t      dstCapacity,
+                                const void* cSrc,
+                                size_t      cSrcSize,
+                                unsigned    maxLog,
+                                void*       workSpace,
+                                size_t      wkspSize,
+                                int         bmi2);
+/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
+ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
+
+typedef enum {
+    FSE_repeat_none,  /**< Cannot use the previous table */
+    FSE_repeat_check, /**< Can use the previous table but it must be checked */
+    FSE_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
+} FSE_repeat;
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   Hence their body are included in next section.
+*/
+typedef struct {
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/**<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct {
+    size_t      state;
+    const void* table; /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/**<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) {
+    const void* ptr      = ct;
+    const U16*  u16ptr   = (const U16*) ptr;
+    const U32   tableLog = MEM_read16(ptr);
+    statePtr->value      = (ptrdiff_t) 1 << tableLog;
+    statePtr->stateTable = u16ptr + 2;
+    statePtr->symbolTT   = ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1);
+    statePtr->stateLog   = tableLog;
+}
+
+
+/*! FSE_initCState2() :
+*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+*   uses the smallest state value possible, saving the cost of this symbol */
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) {
+    FSE_initCState(statePtr, ct);
+    {
+        const FSE_symbolCompressionTransform symbolTT =
+          ((const FSE_symbolCompressionTransform*) (statePtr->symbolTT))[symbol];
+        const U16* stateTable = (const U16*) (statePtr->stateTable);
+        U32        nbBitsOut  = (U32) ((symbolTT.deltaNbBits + (1 << 15)) >> 16);
+        statePtr->value       = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+    }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) {
+    FSE_symbolCompressionTransform const symbolTT =
+      ((const FSE_symbolCompressionTransform*) (statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*) (statePtr->stateTable);
+    U32 const        nbBitsOut  = (U32) ((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, (size_t) statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) {
+    BIT_addBits(bitC, (size_t) statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+
+/* FSE_getMaxNbBits() :
+ * Approximate maximum cost of a symbol, in bits.
+ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) {
+    const FSE_symbolCompressionTransform* symbolTT =
+      (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    return (symbolTT[symbolValue].deltaNbBits + ((1 << 16) - 1)) >> 16;
+}
+
+/* FSE_bitCost() :
+ * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr,
+                           U32         tableLog,
+                           U32         symbolValue,
+                           U32         accuracyLog) {
+    const FSE_symbolCompressionTransform* symbolTT =
+      (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
+    U32 const threshold = (minNbBits + 1) << 16;
+    assert(tableLog < 16);
+    assert(accuracyLog < 31 - tableLog); /* ensure enough room for renormalization double shift */
+    {
+        U32 const tableSize          = 1 << tableLog;
+        U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
+        U32 const normalizedDeltaFromThreshold =
+          (deltaFromThreshold << accuracyLog)
+          >> tableLog; /* linear interpolation (very approximate) */
+        U32 const bitMultiplier = 1 << accuracyLog;
+        assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
+        assert(normalizedDeltaFromThreshold <= bitMultiplier);
+        return (minNbBits + 1) * bitMultiplier - normalizedDeltaFromThreshold;
+    }
+}
+
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader; /* sizeof U32 */
+
+typedef struct {
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t; /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) {
+    const void*                   ptr     = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*) ptr;
+    DStatePtr->state                      = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) {
+    FSE_decode_t const DInfo = ((const FSE_decode_t*) (DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) {
+    FSE_decode_t const DInfo   = ((const FSE_decode_t*) (DStatePtr->table))[DStatePtr->state];
+    U32 const          nbBits  = DInfo.nbBits;
+    size_t const       lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state           = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) {
+    FSE_decode_t const DInfo   = ((const FSE_decode_t*) (DStatePtr->table))[DStatePtr->state];
+    U32 const          nbBits  = DInfo.nbBits;
+    BYTE const         symbol  = DInfo.symbol;
+    size_t const       lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) {
+    FSE_decode_t const DInfo   = ((const FSE_decode_t*) (DStatePtr->table))[DStatePtr->state];
+    U32 const          nbBits  = DInfo.nbBits;
+    BYTE const         symbol  = DInfo.symbol;
+    size_t const       lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) { return DStatePtr->state == 0; }
+
+
+    #ifndef FSE_COMMONDEFS_ONLY
+
+        /* **************************************************************
+*  Tuning parameters
+****************************************************************/
+        /*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+        #ifndef FSE_MAX_MEMORY_USAGE
+            #define FSE_MAX_MEMORY_USAGE 14
+        #endif
+        #ifndef FSE_DEFAULT_MEMORY_USAGE
+            #define FSE_DEFAULT_MEMORY_USAGE 13
+        #endif
+        #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
+            #error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
+        #endif
+
+        /*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+        #ifndef FSE_MAX_SYMBOL_VALUE
+            #define FSE_MAX_SYMBOL_VALUE 255
+        #endif
+
+        /* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+        #define FSE_FUNCTION_TYPE BYTE
+        #define FSE_FUNCTION_EXTENSION
+        #define FSE_DECODE_TYPE FSE_decode_t
+
+
+    #endif /* !FSE_COMMONDEFS_ONLY */
+
+
+    /* ***************************************************************
+*  Constants
+*****************************************************************/
+    #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2)
+    #define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG)
+    #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1)
+    #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2)
+    #define FSE_MIN_TABLELOG 5
+
+    #define FSE_TABLELOG_ABSOLUTE_MAX 15
+    #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+        #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+    #endif
+
+    #define FSE_TABLESTEP(tableSize) (((tableSize) >> 1) + ((tableSize) >> 3) + 3)
+
+
+#endif /* FSE_STATIC_LINKING_ONLY */
+
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/src/external/common/fse_decompress.cpp b/src/external/common/fse_decompress.cpp
new file mode 100644
index 00000000..c2c5844f
--- /dev/null
+++ b/src/external/common/fse_decompress.cpp
@@ -0,0 +1,399 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "debug.h" /* assert */
+#include "bitstream.h"
+#include "compiler.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#include "error_private.h"
+#include "zstd_deps.h" /* ZSTD_memcpy */
+#include "bits.h"      /* ZSTD_highbit32 */
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+    #error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+    #error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X, Y) X##Y
+#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
+#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
+
+static size_t FSE_buildDTable_internal(FSE_DTable*  dt,
+                                       const short* normalizedCounter,
+                                       unsigned     maxSymbolValue,
+                                       unsigned     tableLog,
+                                       void*        workSpace,
+                                       size_t       wkspSize) {
+    void* const            tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    U16*                   symbolNext  = (U16*) workSpace;
+    BYTE*                  spread      = (BYTE*) (symbolNext + maxSymbolValue + 1);
+
+    U32 const maxSV1        = maxSymbolValue + 1;
+    U32 const tableSize     = 1 << tableLog;
+    U32       highThreshold = tableSize - 1;
+
+    /* Sanity Checks */
+    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize)
+        return ERROR(maxSymbolValue_tooLarge);
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE)
+        return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG)
+        return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {
+        FSE_DTableHeader DTableH;
+        DTableH.tableLog = (U16) tableLog;
+        DTableH.fastMode = 1;
+        {
+            S16 const largeLimit = (S16) (1 << (tableLog - 1));
+            U32       s;
+            for (s = 0; s < maxSV1; s++)
+            {
+                if (normalizedCounter[s] == -1)
+                {
+                    tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE) s;
+                    symbolNext[s]                       = 1;
+                }
+                else
+                {
+                    if (normalizedCounter[s] >= largeLimit)
+                        DTableH.fastMode = 0;
+                    symbolNext[s] = (U16) normalizedCounter[s];
+                }
+            }
+        }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    if (highThreshold == tableSize - 1)
+    {
+        size_t const tableMask = tableSize - 1;
+        size_t const step      = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t    pos = 0;
+            U64       sv  = 0;
+            U32       s;
+            for (s = 0; s < maxSV1; ++s, sv += add)
+            {
+                int       i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8)
+                {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += (size_t) n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what empirically worked best.
+         */
+        {
+            size_t       position = 0;
+            size_t       s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t) tableSize; s += unroll)
+            {
+                size_t u;
+                for (u = 0; u < unroll; ++u)
+                {
+                    size_t const uPosition        = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].symbol = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    }
+    else
+    {
+        U32 const tableMask = tableSize - 1;
+        U32 const step      = FSE_TABLESTEP(tableSize);
+        U32       s, position = 0;
+        for (s = 0; s < maxSV1; s++)
+        {
+            int i;
+            for (i = 0; i < normalizedCounter[s]; i++)
+            {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE) s;
+                position                     = (position + step) & tableMask;
+                while (position > highThreshold)
+                    position = (position + step) & tableMask; /* lowprob area */
+            }
+        }
+        if (position != 0)
+            return ERROR(
+              GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {
+        U32 u;
+        for (u = 0; u < tableSize; u++)
+        {
+            FSE_FUNCTION_TYPE const symbol    = (FSE_FUNCTION_TYPE) (tableDecode[u].symbol);
+            U32 const               nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits             = (BYTE) (tableLog - ZSTD_highbit32(nextState));
+            tableDecode[u].newState = (U16) ((nextState << tableDecode[u].nbBits) - tableSize);
+        }
+    }
+
+    return 0;
+}
+
+size_t FSE_buildDTable_wksp(FSE_DTable*  dt,
+                            const short* normalizedCounter,
+                            unsigned     maxSymbolValue,
+                            unsigned     tableLog,
+                            void*        workSpace,
+                            size_t       wkspSize) {
+    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace,
+                                    wkspSize);
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(void*             dst,
+                                                                size_t            maxDstSize,
+                                                                const void*       cSrc,
+                                                                size_t            cSrcSize,
+                                                                const FSE_DTable* dt,
+                                                                const unsigned    fast) {
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE*       op     = ostart;
+    BYTE* const omax   = op + maxDstSize;
+    BYTE* const olimit = omax - 3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t  state1;
+    FSE_DState_t  state2;
+
+    /* Init */
+    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+    RETURN_ERROR_IF(BIT_reloadDStream(&bitD) == BIT_DStream_overflow, corruption_detected, "");
+
+    #define FSE_GETSYMBOL(statePtr) \
+        fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4)
+    {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
+        {
+            if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished)
+            {
+                op += 2;
+                break;
+            }
+        }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1)
+    {
+        if (op > (omax - 2))
+            return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state1);
+        if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow)
+        {
+            *op++ = FSE_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op > (omax - 2))
+            return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state2);
+        if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow)
+        {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+        }
+    }
+
+    assert(op >= ostart);
+    return (size_t) (op - ostart);
+}
+
+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+} FSE_DecompressWksp;
+
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(void*       dst,
+                                                      size_t      dstCapacity,
+                                                      const void* cSrc,
+                                                      size_t      cSrcSize,
+                                                      unsigned    maxLog,
+                                                      void*       workSpace,
+                                                      size_t      wkspSize,
+                                                      int         bmi2) {
+    const BYTE* const         istart = (const BYTE*) cSrc;
+    const BYTE*               ip     = istart;
+    unsigned                  tableLog;
+    unsigned                  maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    FSE_DecompressWksp* const wksp           = (FSE_DecompressWksp*) workSpace;
+    size_t const              dtablePos      = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
+    FSE_DTable* const         dtable         = (FSE_DTable*) workSpace + dtablePos;
+
+    FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp))
+        return ERROR(GENERIC);
+
+    /* correct offset to dtable depends on this property */
+    FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
+
+    /* normal FSE decoding mode */
+    {
+        size_t const NCountLength =
+          FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength))
+            return NCountLength;
+        if (tableLog > maxLog)
+            return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize)
+        return ERROR(tableLog_tooLarge);
+    assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
+    workSpace = (BYTE*) workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+
+    CHECK_F(FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace,
+                                     wkspSize));
+
+    {
+        const void*             ptr      = dtable;
+        const FSE_DTableHeader* DTableH  = (const FSE_DTableHeader*) ptr;
+        const U32               fastMode = DTableH->fastMode;
+
+        /* select fast mode (static) */
+        if (fastMode)
+            return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_decompress_wksp_body_default(void*       dst,
+                                               size_t      dstCapacity,
+                                               const void* cSrc,
+                                               size_t      cSrcSize,
+                                               unsigned    maxLog,
+                                               void*       workSpace,
+                                               size_t      wkspSize) {
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize,
+                                    0);
+}
+
+    #if DYNAMIC_BMI2
+BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void*       dst,
+                                                                  size_t      dstCapacity,
+                                                                  const void* cSrc,
+                                                                  size_t      cSrcSize,
+                                                                  unsigned    maxLog,
+                                                                  void*       workSpace,
+                                                                  size_t      wkspSize) {
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize,
+                                    1);
+}
+    #endif
+
+size_t FSE_decompress_wksp_bmi2(void*       dst,
+                                size_t      dstCapacity,
+                                const void* cSrc,
+                                size_t      cSrcSize,
+                                unsigned    maxLog,
+                                void*       workSpace,
+                                size_t      wkspSize,
+                                int         bmi2) {
+    #if DYNAMIC_BMI2
+    if (bmi2)
+    {
+        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace,
+                                             wkspSize);
+    }
+    #endif
+    (void) bmi2;
+    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace,
+                                            wkspSize);
+}
+
+#endif /* FSE_COMMONDEFS_ONLY */
diff --git a/src/external/common/huf.h b/src/external/common/huf.h
new file mode 100644
index 00000000..36b9173c
--- /dev/null
+++ b/src/external/common/huf.h
@@ -0,0 +1,383 @@
+/* ******************************************************************
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#ifndef HUF_H_298734234
+    #define HUF_H_298734234
+
+    /* *** Dependencies *** */
+    #include "zstd_deps.h" /* size_t */
+    #include "mem.h"       /* U32 */
+    #define FSE_STATIC_LINKING_ONLY
+    #include "fse.h"
+
+
+    /* ***   Tool functions *** */
+    #define HUF_BLOCKSIZE_MAX \
+        (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
+size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
+
+/* Error Management */
+unsigned    HUF_isError(size_t code);      /**< tells if a return value is an error code */
+const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
+
+
+    #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
+    #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
+
+    /* *** Constants *** */
+    #define HUF_TABLELOG_MAX \
+        12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
+    #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
+    #define HUF_SYMBOLVALUE_MAX 255
+
+    #define HUF_TABLELOG_ABSOLUTEMAX \
+        12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+    #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+        #error "HUF_TABLELOG_MAX is too large !"
+    #endif
+
+
+    /* ****************************************
+*  Static allocation
+******************************************/
+    /* HUF buffer bounds */
+    #define HUF_CTABLEBOUND 129
+    #define HUF_BLOCKBOUND(size) \
+        (size + (size >> 8) \
+         + 8) /* only true when incompressible is pre-filtered with fast heuristic */
+    #define HUF_COMPRESSBOUND(size) \
+        (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
+typedef size_t HUF_CElt; /* consider it an incomplete type */
+    #define HUF_CTABLE_SIZE_ST(maxSymbolValue) \
+        ((maxSymbolValue) + 2) /* Use tables of size_t, for proper alignment */
+    #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
+    #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+        HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+    #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog)))
+    #define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog) - 1)] = { \
+          ((U32) ((maxTableLog) - 1) * 0x01000001)}
+    #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32) (maxTableLog) * 0x01000001)}
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+
+/**
+ * Huffman flags bitset.
+ * For all flags, 0 is the default value.
+ */
+typedef enum {
+    /**
+     * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
+     * Otherwise: Ignored.
+     */
+    HUF_flags_bmi2 = (1 << 0),
+    /**
+     * If set: Test possible table depths to find the one that produces the smallest header + encoded size.
+     * If unset: Use heuristic to find the table depth.
+     */
+    HUF_flags_optimalDepth = (1 << 1),
+    /**
+     * If set: If the previous table can encode the input, always reuse the previous table.
+     * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
+     */
+    HUF_flags_preferRepeat = (1 << 2),
+    /**
+     * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
+     * If unset: Always histogram the entire input.
+     */
+    HUF_flags_suspectUncompressible = (1 << 3),
+    /**
+     * If set: Don't use assembly implementations
+     * If unset: Allow using assembly implementations
+     */
+    HUF_flags_disableAsm = (1 << 4),
+    /**
+     * If set: Don't use the fast decoding loop, always use the fallback decoding loop.
+     * If unset: Use the fast decoding loop when possible.
+     */
+    HUF_flags_disableFast = (1 << 5)
+} HUF_flags_e;
+
+
+    /* ****************************************
+ *  HUF detailed API
+ * ****************************************/
+    #define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
+
+/*! HUF_compress() does the following:
+ *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ *  2. (optional) refine tableLog using HUF_optimalTableLog()
+ *  3. build Huffman table from count using HUF_buildCTable()
+ *  4. save Huffman table to memory buffer using HUF_writeCTable()
+ *  5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ *  The following API allows targeting specific sub-functions for advanced tasks.
+ *  For example, it's possible to compress several blocks using the same 'CTable',
+ *  or to save and regenerate 'CTable' using external methods.
+ */
+unsigned HUF_minTableLog(unsigned symbolCardinality);
+unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
+unsigned HUF_optimalTableLog(
+  unsigned        maxTableLog,
+  size_t          srcSize,
+  unsigned        maxSymbolValue,
+  void*           workSpace,
+  size_t          wkspSize,
+  HUF_CElt*       table,
+  const unsigned* count,
+  int
+    flags); /* table is used as scratch space for building and testing tables, not a return value */
+size_t HUF_writeCTable_wksp(void*           dst,
+                            size_t          maxDstSize,
+                            const HUF_CElt* CTable,
+                            unsigned        maxSymbolValue,
+                            unsigned        huffLog,
+                            void*           workspace,
+                            size_t          workspaceSize);
+size_t HUF_compress4X_usingCTable(
+  void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
+size_t
+HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+
+typedef enum {
+    HUF_repeat_none,  /**< Cannot use the previous table */
+    HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+    HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
+} HUF_repeat;
+
+/** HUF_compress4X_repeat() :
+ *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid.
+ *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
+size_t HUF_compress4X_repeat(
+  void*       dst,
+  size_t      dstSize,
+  const void* src,
+  size_t      srcSize,
+  unsigned    maxSymbolValue,
+  unsigned    tableLog,
+  void*       workSpace,
+  size_t
+    wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+  HUF_CElt*   hufTable,
+  HUF_repeat* repeat,
+  int         flags);
+
+    /** HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
+ */
+    #define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
+    #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_buildCTable_wksp(HUF_CElt*       tree,
+                            const unsigned* count,
+                            U32             maxSymbolValue,
+                            U32             maxNbBits,
+                            void*           workSpace,
+                            size_t          wkspSize);
+
+/*! HUF_readStats() :
+ *  Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE*       huffWeight,
+                     size_t      hwSize,
+                     U32*        rankStats,
+                     U32*        nbSymbolsPtr,
+                     U32*        tableLogPtr,
+                     const void* src,
+                     size_t      srcSize);
+
+    /*! HUF_readStats_wksp() :
+ * Same as HUF_readStats() but takes an external workspace which must be
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+    #define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX - 1)
+    #define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_readStats_wksp(BYTE*       huffWeight,
+                          size_t      hwSize,
+                          U32*        rankStats,
+                          U32*        nbSymbolsPtr,
+                          U32*        tableLogPtr,
+                          const void* src,
+                          size_t      srcSize,
+                          void*       workspace,
+                          size_t      wkspSize,
+                          int         flags);
+
+/** HUF_readCTable() :
+ *  Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable(HUF_CElt*   CTable,
+                      unsigned*   maxSymbolValuePtr,
+                      const void* src,
+                      size_t      srcSize,
+                      unsigned*   hasZeroWeights);
+
+/** HUF_getNbBitsFromCTable() :
+ *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+ *  Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
+ *  Note 2 : is not inlined, as HUF_CElt definition is private
+ */
+U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
+
+typedef struct {
+    BYTE tableLog;
+    BYTE maxSymbolValue;
+    BYTE unused[sizeof(size_t) - 2];
+} HUF_CTableHeader;
+
+/** HUF_readCTableHeader() :
+ *  @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
+ */
+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
+
+/*
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
+
+/** HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize);
+
+    /**
+ *  The minimum workspace size for the `workSpace` used in
+ *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
+ *
+ *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ *  Buffer overflow errors may potentially occur if code modifications result in
+ *  a required workspace size greater than that specified in the following
+ *  macro.
+ */
+    #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
+    #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+
+/* ====================== */
+/* single stream variants */
+/* ====================== */
+
+size_t HUF_compress1X_usingCTable(
+  void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
+/** HUF_compress1X_repeat() :
+ *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid.
+ *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
+size_t HUF_compress1X_repeat(
+  void*       dst,
+  size_t      dstSize,
+  const void* src,
+  size_t      srcSize,
+  unsigned    maxSymbolValue,
+  unsigned    tableLog,
+  void*       workSpace,
+  size_t
+    wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+  HUF_CElt*   hufTable,
+  HUF_repeat* repeat,
+  int         flags);
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx,
+                                  void*       dst,
+                                  size_t      dstSize,
+                                  const void* cSrc,
+                                  size_t      cSrcSize,
+                                  void*       workSpace,
+                                  size_t      wkspSize,
+                                  int         flags);
+    #ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx,
+                                   void*       dst,
+                                   size_t      dstSize,
+                                   const void* cSrc,
+                                   size_t      cSrcSize,
+                                   void*       workSpace,
+                                   size_t      wkspSize,
+                                   int         flags); /**< double-symbols decoder */
+    #endif
+
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable(void*             dst,
+                                    size_t            maxDstSize,
+                                    const void*       cSrc,
+                                    size_t            cSrcSize,
+                                    const HUF_DTable* DTable,
+                                    int               flags);
+    #ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx,
+                                   void*       dst,
+                                   size_t      dstSize,
+                                   const void* cSrc,
+                                   size_t      cSrcSize,
+                                   void*       workSpace,
+                                   size_t      wkspSize,
+                                   int         flags);
+    #endif
+size_t HUF_decompress4X_usingDTable(void*             dst,
+                                    size_t            maxDstSize,
+                                    const void*       cSrc,
+                                    size_t            cSrcSize,
+                                    const HUF_DTable* DTable,
+                                    int               flags);
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx,
+                                     void*       dst,
+                                     size_t      dstSize,
+                                     const void* cSrc,
+                                     size_t      cSrcSize,
+                                     void*       workSpace,
+                                     size_t      wkspSize,
+                                     int         flags);
+    #ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1_wksp(
+  HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
+    #endif
+    #ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2_wksp(
+  HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
+    #endif
+
+#endif /* HUF_H_298734234 */
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/src/external/common/mem.h b/src/external/common/mem.h
new file mode 100644
index 00000000..40b20301
--- /dev/null
+++ b/src/external/common/mem.h
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <stddef.h>    /* size_t, ptrdiff_t */
+#include "compiler.h"  /* __has_builtin */
+#include "debug.h"     /* DEBUG_STATIC_ASSERT */
+#include "zstd_deps.h" /* ZSTD_memcpy */
+
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#if defined(_MSC_VER)   /* Visual Studio */
+    #include <stdlib.h> /* _byteswap_ulong */
+    #include <intrin.h> /* _byteswap_* */
+#elif defined(__ICCARM__)
+    #include <intrinsics.h>
+#endif
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+#if !defined(__VMS) \
+  && (defined(__cplusplus) \
+      || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
+    #if defined(_AIX)
+        #include <inttypes.h>
+    #else
+        #include <stdint.h> /* intptr_t */
+    #endif
+typedef uint8_t  BYTE;
+typedef uint8_t  U8;
+typedef int8_t   S8;
+typedef uint16_t U16;
+typedef int16_t  S16;
+typedef uint32_t U32;
+typedef int32_t  S32;
+typedef uint64_t U64;
+typedef int64_t  S64;
+#else
+    #include <limits.h>
+    #if CHAR_BIT != 8
+        #error "this implementation requires char to be exactly 8-bit type"
+    #endif
+typedef unsigned char BYTE;
+typedef unsigned char U8;
+typedef signed char   S8;
+    #if USHRT_MAX != 65535
+        #error "this implementation requires short to be exactly 16-bit type"
+    #endif
+typedef unsigned short U16;
+typedef signed short   S16;
+    #if UINT_MAX != 4294967295
+        #error "this implementation requires int to be exactly 32-bit type"
+    #endif
+typedef unsigned int U32;
+typedef signed int   S32;
+/* note : there are no limits defined for long long type in C90.
+ * limits exist in C99, however, in such case, <stdint.h> is preferred */
+typedef unsigned long long U64;
+typedef signed long long   S64;
+#endif
+
+
+/*-**************************************************************
+*  Memory I/O API
+*****************************************************************/
+/*=== Static platform detection ===*/
+MEM_STATIC unsigned MEM_32bits(void);
+MEM_STATIC unsigned MEM_64bits(void);
+MEM_STATIC unsigned MEM_isLittleEndian(void);
+
+/*=== Native unaligned read/write ===*/
+MEM_STATIC U16    MEM_read16(const void* memPtr);
+MEM_STATIC U32    MEM_read32(const void* memPtr);
+MEM_STATIC U64    MEM_read64(const void* memPtr);
+MEM_STATIC size_t MEM_readST(const void* memPtr);
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
+
+/*=== Little endian unaligned read/write ===*/
+MEM_STATIC U16    MEM_readLE16(const void* memPtr);
+MEM_STATIC U32    MEM_readLE24(const void* memPtr);
+MEM_STATIC U32    MEM_readLE32(const void* memPtr);
+MEM_STATIC U64    MEM_readLE64(const void* memPtr);
+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
+
+/*=== Big endian unaligned read/write ===*/
+MEM_STATIC U32    MEM_readBE32(const void* memPtr);
+MEM_STATIC U64    MEM_readBE64(const void* memPtr);
+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
+
+/*=== Byteswap ===*/
+MEM_STATIC U32    MEM_swap32(U32 in);
+MEM_STATIC U64    MEM_swap64(U64 in);
+MEM_STATIC size_t MEM_swapST(size_t in);
+
+
+/*-**************************************************************
+*  Memory I/O Implementation
+*****************************************************************/
+/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory:
+ * Method 0 : always use `memcpy()`. Safe and portable.
+ * Method 1 : Use compiler extension to set unaligned access.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets depending on alignment.
+ * Default  : method 1 if supported, else method 0
+ */
+#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+    #ifdef __GNUC__
+        #define MEM_FORCE_MEMORY_ACCESS 1
+    #endif
+#endif
+
+MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t) == 4; }
+MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t) == 8; }
+
+MEM_STATIC unsigned MEM_isLittleEndian(void) {
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) \
+  && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+    return 1;
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) \
+  && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+    return 0;
+#elif defined(__clang__) && __LITTLE_ENDIAN__
+    return 1;
+#elif defined(__clang__) && __BIG_ENDIAN__
+    return 0;
+#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86)
+    return 1;
+#elif defined(__DMC__) && defined(_M_IX86)
+    return 1;
+#elif defined(__IAR_SYSTEMS_ICC__) && __LITTLE_ENDIAN__
+    return 1;
+#else
+    const union {
+        U32  u;
+        BYTE c[4];
+    } one = {1}; /* don't use static : performance detrimental  */
+    return one.c[0];
+#endif
+}
+
+#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS == 2)
+
+/* violates C standard, by lying on structure alignment.
+Only use if no other choice to achieve best performance on target platform */
+MEM_STATIC U16    MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
+MEM_STATIC U32    MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
+MEM_STATIC U64    MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*) memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*) memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*) memPtr = value; }
+
+#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS == 1)
+
+typedef __attribute__((aligned(1))) U16    unalign16;
+typedef __attribute__((aligned(1))) U32    unalign32;
+typedef __attribute__((aligned(1))) U64    unalign64;
+typedef __attribute__((aligned(1))) size_t unalignArch;
+
+MEM_STATIC U16    MEM_read16(const void* ptr) { return *(const unalign16*) ptr; }
+MEM_STATIC U32    MEM_read32(const void* ptr) { return *(const unalign32*) ptr; }
+MEM_STATIC U64    MEM_read64(const void* ptr) { return *(const unalign64*) ptr; }
+MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*) ptr; }
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*) memPtr = value; }
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*) memPtr = value; }
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*) memPtr = value; }
+
+#else
+
+/* default method, safe and standard.
+   can sometimes prove slower */
+
+MEM_STATIC U16 MEM_read16(const void* memPtr) {
+    U16 val;
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+MEM_STATIC U32 MEM_read32(const void* memPtr) {
+    U32 val;
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+MEM_STATIC U64 MEM_read64(const void* memPtr) {
+    U64 val;
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+MEM_STATIC size_t MEM_readST(const void* memPtr) {
+    size_t val;
+    ZSTD_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ZSTD_memcpy(memPtr, &value, sizeof(value)); }
+
+MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ZSTD_memcpy(memPtr, &value, sizeof(value)); }
+
+MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ZSTD_memcpy(memPtr, &value, sizeof(value)); }
+
+#endif /* MEM_FORCE_MEMORY_ACCESS */
+
+MEM_STATIC U32 MEM_swap32_fallback(U32 in) {
+    return ((in << 24) & 0xff000000) | ((in << 8) & 0x00ff0000) | ((in >> 8) & 0x0000ff00)
+         | ((in >> 24) & 0x000000ff);
+}
+
+MEM_STATIC U32 MEM_swap32(U32 in) {
+#if defined(_MSC_VER) /* Visual Studio */
+    return _byteswap_ulong(in);
+#elif (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
+  || (defined(__clang__) && __has_builtin(__builtin_bswap32))
+    return __builtin_bswap32(in);
+#elif defined(__ICCARM__)
+    return __REV(in);
+#else
+    return MEM_swap32_fallback(in);
+#endif
+}
+
+MEM_STATIC U64 MEM_swap64_fallback(U64 in) {
+    return ((in << 56) & 0xff00000000000000ULL) | ((in << 40) & 0x00ff000000000000ULL)
+         | ((in << 24) & 0x0000ff0000000000ULL) | ((in << 8) & 0x000000ff00000000ULL)
+         | ((in >> 8) & 0x00000000ff000000ULL) | ((in >> 24) & 0x0000000000ff0000ULL)
+         | ((in >> 40) & 0x000000000000ff00ULL) | ((in >> 56) & 0x00000000000000ffULL);
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in) {
+#if defined(_MSC_VER) /* Visual Studio */
+    return _byteswap_uint64(in);
+#elif (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
+  || (defined(__clang__) && __has_builtin(__builtin_bswap64))
+    return __builtin_bswap64(in);
+#else
+    return MEM_swap64_fallback(in);
+#endif
+}
+
+MEM_STATIC size_t MEM_swapST(size_t in) {
+    if (MEM_32bits())
+        return (size_t) MEM_swap32((U32) in);
+    else
+        return (size_t) MEM_swap64((U64) in);
+}
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void* memPtr) {
+    if (MEM_isLittleEndian())
+        return MEM_read16(memPtr);
+    else
+    {
+        const BYTE* p = (const BYTE*) memPtr;
+        return (U16) (p[0] + (p[1] << 8));
+    }
+}
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) {
+    if (MEM_isLittleEndian())
+    {
+        MEM_write16(memPtr, val);
+    }
+    else
+    {
+        BYTE* p = (BYTE*) memPtr;
+        p[0]    = (BYTE) val;
+        p[1]    = (BYTE) (val >> 8);
+    }
+}
+
+MEM_STATIC U32 MEM_readLE24(const void* memPtr) {
+    return (U32) MEM_readLE16(memPtr) + ((U32) (((const BYTE*) memPtr)[2]) << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) {
+    MEM_writeLE16(memPtr, (U16) val);
+    ((BYTE*) memPtr)[2] = (BYTE) (val >> 16);
+}
+
+MEM_STATIC U32 MEM_readLE32(const void* memPtr) {
+    if (MEM_isLittleEndian())
+        return MEM_read32(memPtr);
+    else
+        return MEM_swap32(MEM_read32(memPtr));
+}
+
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) {
+    if (MEM_isLittleEndian())
+        MEM_write32(memPtr, val32);
+    else
+        MEM_write32(memPtr, MEM_swap32(val32));
+}
+
+MEM_STATIC U64 MEM_readLE64(const void* memPtr) {
+    if (MEM_isLittleEndian())
+        return MEM_read64(memPtr);
+    else
+        return MEM_swap64(MEM_read64(memPtr));
+}
+
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) {
+    if (MEM_isLittleEndian())
+        MEM_write64(memPtr, val64);
+    else
+        MEM_write64(memPtr, MEM_swap64(val64));
+}
+
+MEM_STATIC size_t MEM_readLEST(const void* memPtr) {
+    if (MEM_32bits())
+        return (size_t) MEM_readLE32(memPtr);
+    else
+        return (size_t) MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) {
+    if (MEM_32bits())
+        MEM_writeLE32(memPtr, (U32) val);
+    else
+        MEM_writeLE64(memPtr, (U64) val);
+}
+
+/*=== Big endian r/w ===*/
+
+MEM_STATIC U32 MEM_readBE32(const void* memPtr) {
+    if (MEM_isLittleEndian())
+        return MEM_swap32(MEM_read32(memPtr));
+    else
+        return MEM_read32(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) {
+    if (MEM_isLittleEndian())
+        MEM_write32(memPtr, MEM_swap32(val32));
+    else
+        MEM_write32(memPtr, val32);
+}
+
+MEM_STATIC U64 MEM_readBE64(const void* memPtr) {
+    if (MEM_isLittleEndian())
+        return MEM_swap64(MEM_read64(memPtr));
+    else
+        return MEM_read64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) {
+    if (MEM_isLittleEndian())
+        MEM_write64(memPtr, MEM_swap64(val64));
+    else
+        MEM_write64(memPtr, val64);
+}
+
+MEM_STATIC size_t MEM_readBEST(const void* memPtr) {
+    if (MEM_32bits())
+        return (size_t) MEM_readBE32(memPtr);
+    else
+        return (size_t) MEM_readBE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) {
+    if (MEM_32bits())
+        MEM_writeBE32(memPtr, (U32) val);
+    else
+        MEM_writeBE64(memPtr, (U64) val);
+}
+
+/* code only tested on 32 and 64 bits systems */
+MEM_STATIC void MEM_check(void) {
+    DEBUG_STATIC_ASSERT((sizeof(size_t) == 4) || (sizeof(size_t) == 8));
+}
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* MEM_H_MODULE */
diff --git a/src/external/common/pool.cpp b/src/external/common/pool.cpp
new file mode 100644
index 00000000..26060115
--- /dev/null
+++ b/src/external/common/pool.cpp
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ======   Dependencies   ======= */
+#include "../common/allocations.h" /* ZSTD_customCalloc, ZSTD_customFree */
+#include "zstd_deps.h"             /* size_t */
+#include "debug.h"                 /* assert */
+#include "pool.h"
+
+/* ======   Compiler specifics   ====== */
+#if defined(_MSC_VER)
+    #pragma warning(disable: 4204) /* disable: C4204: non-constant aggregate initializer */
+#endif
+
+
+#ifdef ZSTD_MULTITHREAD
+
+    #include "threading.h" /* pthread adaptation */
+
+/* A job is a function and an opaque argument */
+typedef struct POOL_job_s {
+    POOL_function function;
+    void*         opaque;
+} POOL_job;
+
+struct POOL_ctx_s {
+    ZSTD_customMem customMem;
+    /* Keep track of the threads */
+    ZSTD_pthread_t* threads;
+    size_t          threadCapacity;
+    size_t          threadLimit;
+
+    /* The queue is a circular buffer */
+    POOL_job* queue;
+    size_t    queueHead;
+    size_t    queueTail;
+    size_t    queueSize;
+
+    /* The number of threads working on jobs */
+    size_t numThreadsBusy;
+    /* Indicates if the queue is empty */
+    int queueEmpty;
+
+    /* The mutex protects the queue */
+    ZSTD_pthread_mutex_t queueMutex;
+    /* Condition variable for pushers to wait on when the queue is full */
+    ZSTD_pthread_cond_t queuePushCond;
+    /* Condition variables for poppers to wait on when the queue is empty */
+    ZSTD_pthread_cond_t queuePopCond;
+    /* Indicates if the queue is shutting down */
+    int shutdown;
+};
+
+/* POOL_thread() :
+ * Work thread for the thread pool.
+ * Waits for jobs and executes them.
+ * @returns : NULL on failure else non-null.
+ */
+static void* POOL_thread(void* opaque) {
+    POOL_ctx* const ctx = (POOL_ctx*) opaque;
+    if (!ctx)
+    {
+        return NULL;
+    }
+    for (;;)
+    {
+        /* Lock the mutex and wait for a non-empty queue or until shutdown */
+        ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+
+        while (ctx->queueEmpty || (ctx->numThreadsBusy >= ctx->threadLimit))
+        {
+            if (ctx->shutdown)
+            {
+                /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
+                 * a few threads will be shutdown while !queueEmpty,
+                 * but enough threads will remain active to finish the queue */
+                ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+                return opaque;
+            }
+            ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
+        }
+        /* Pop a job off the queue */
+        {
+            POOL_job const job = ctx->queue[ctx->queueHead];
+            ctx->queueHead     = (ctx->queueHead + 1) % ctx->queueSize;
+            ctx->numThreadsBusy++;
+            ctx->queueEmpty = (ctx->queueHead == ctx->queueTail);
+            /* Unlock the mutex, signal a pusher, and run the job */
+            ZSTD_pthread_cond_signal(&ctx->queuePushCond);
+            ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+
+            job.function(job.opaque);
+
+            /* If the intended queue size was 0, signal after finishing job */
+            ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+            ctx->numThreadsBusy--;
+            ZSTD_pthread_cond_signal(&ctx->queuePushCond);
+            ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+        }
+    } /* for (;;) */
+    assert(0); /* Unreachable */
+}
+
+/* ZSTD_createThreadPool() : public access point */
+POOL_ctx* ZSTD_createThreadPool(size_t numThreads) { return POOL_create(numThreads, 0); }
+
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
+    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
+}
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
+    POOL_ctx* ctx;
+    /* Check parameters */
+    if (!numThreads)
+    {
+        return NULL;
+    }
+    /* Allocate the context and zero initialize */
+    ctx = (POOL_ctx*) ZSTD_customCalloc(sizeof(POOL_ctx), customMem);
+    if (!ctx)
+    {
+        return NULL;
+    }
+    /* Initialize the job queue.
+     * It needs one extra space since one space is wasted to differentiate
+     * empty and full queues.
+     */
+    ctx->queueSize = queueSize + 1;
+    ctx->queue     = (POOL_job*) ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem);
+    ctx->queueHead = 0;
+    ctx->queueTail = 0;
+    ctx->numThreadsBusy = 0;
+    ctx->queueEmpty     = 1;
+    {
+        int error = 0;
+        error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
+        error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
+        error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+        if (error)
+        {
+            POOL_free(ctx);
+            return NULL;
+        }
+    }
+    ctx->shutdown = 0;
+    /* Allocate space for the thread handles */
+    ctx->threads =
+      (ZSTD_pthread_t*) ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
+    ctx->threadCapacity = 0;
+    ctx->customMem      = customMem;
+    /* Check for errors */
+    if (!ctx->threads || !ctx->queue)
+    {
+        POOL_free(ctx);
+        return NULL;
+    }
+    /* Initialize the threads */
+    {
+        size_t i;
+        for (i = 0; i < numThreads; ++i)
+        {
+            if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx))
+            {
+                ctx->threadCapacity = i;
+                POOL_free(ctx);
+                return NULL;
+            }
+        }
+        ctx->threadCapacity = numThreads;
+        ctx->threadLimit    = numThreads;
+    }
+    return ctx;
+}
+
+/*! POOL_join() :
+    Shutdown the queue, wake any sleeping threads, and join all of the threads.
+*/
+static void POOL_join(POOL_ctx* ctx) {
+    /* Shut down the queue */
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    ctx->shutdown = 1;
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    /* Wake up sleeping threads */
+    ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
+    ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
+    /* Join all of the threads */
+    {
+        size_t i;
+        for (i = 0; i < ctx->threadCapacity; ++i)
+        {
+            ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */
+        }
+    }
+}
+
+void POOL_free(POOL_ctx* ctx) {
+    if (!ctx)
+    {
+        return;
+    }
+    POOL_join(ctx);
+    ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
+    ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
+    ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
+    ZSTD_customFree(ctx->queue, ctx->customMem);
+    ZSTD_customFree(ctx->threads, ctx->customMem);
+    ZSTD_customFree(ctx, ctx->customMem);
+}
+
+/*! POOL_joinJobs() :
+ *  Waits for all queued jobs to finish executing.
+ */
+void POOL_joinJobs(POOL_ctx* ctx) {
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    while (!ctx->queueEmpty || ctx->numThreadsBusy > 0)
+    {
+        ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+    }
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+}
+
+void ZSTD_freeThreadPool(ZSTD_threadPool* pool) { POOL_free(pool); }
+
+size_t POOL_sizeof(const POOL_ctx* ctx) {
+    if (ctx == NULL)
+        return 0; /* supports sizeof NULL */
+    return sizeof(*ctx) + ctx->queueSize * sizeof(POOL_job)
+         + ctx->threadCapacity * sizeof(ZSTD_pthread_t);
+}
+
+
+/* @return : 0 on success, 1 on error */
+static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) {
+    if (numThreads <= ctx->threadCapacity)
+    {
+        if (!numThreads)
+            return 1;
+        ctx->threadLimit = numThreads;
+        return 0;
+    }
+    /* numThreads > threadCapacity */
+    {
+        ZSTD_pthread_t* const threadPool =
+          (ZSTD_pthread_t*) ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
+        if (!threadPool)
+            return 1;
+        /* replace existing thread pool */
+        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t));
+        ZSTD_customFree(ctx->threads, ctx->customMem);
+        ctx->threads = threadPool;
+        /* Initialize additional threads */
+        {
+            size_t threadId;
+            for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId)
+            {
+                if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx))
+                {
+                    ctx->threadCapacity = threadId;
+                    return 1;
+                }
+            }
+        }
+    }
+    /* successfully expanded */
+    ctx->threadCapacity = numThreads;
+    ctx->threadLimit    = numThreads;
+    return 0;
+}
+
+/* @return : 0 on success, 1 on error */
+int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
+    int result;
+    if (ctx == NULL)
+        return 1;
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    result = POOL_resize_internal(ctx, numThreads);
+    ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    return result;
+}
+
+/**
+ * Returns 1 if the queue is full and 0 otherwise.
+ *
+ * When queueSize is 1 (pool was created with an intended queueSize of 0),
+ * then a queue is empty if there is a thread free _and_ no job is waiting.
+ */
+static int isQueueFull(POOL_ctx const* ctx) {
+    if (ctx->queueSize > 1)
+    {
+        return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
+    }
+    else
+    {
+        return (ctx->numThreadsBusy == ctx->threadLimit) || !ctx->queueEmpty;
+    }
+}
+
+
+static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    POOL_job job;
+    job.function = function;
+    job.opaque   = opaque;
+    assert(ctx != NULL);
+    if (ctx->shutdown)
+        return;
+
+    ctx->queueEmpty            = 0;
+    ctx->queue[ctx->queueTail] = job;
+    ctx->queueTail             = (ctx->queueTail + 1) % ctx->queueSize;
+    ZSTD_pthread_cond_signal(&ctx->queuePopCond);
+}
+
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    assert(ctx != NULL);
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    /* Wait until there is space in the queue for the new job */
+    while (isQueueFull(ctx) && (!ctx->shutdown))
+    {
+        ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+    }
+    POOL_add_internal(ctx, function, opaque);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+}
+
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    assert(ctx != NULL);
+    ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+    if (isQueueFull(ctx))
+    {
+        ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+        return 0;
+    }
+    POOL_add_internal(ctx, function, opaque);
+    ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+    return 1;
+}
+
+
+#else /* ZSTD_MULTITHREAD  not defined */
+
+/* ========================== */
+/* No multi-threading support */
+/* ========================== */
+
+
+/* We don't need any data, but if it is empty, malloc() might return NULL. */
+struct POOL_ctx_s {
+    int dummy;
+};
+static POOL_ctx g_poolCtx;
+
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
+    return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
+}
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
+    (void) numThreads;
+    (void) queueSize;
+    (void) customMem;
+    return &g_poolCtx;
+}
+
+void POOL_free(POOL_ctx* ctx) {
+    assert(!ctx || ctx == &g_poolCtx);
+    (void) ctx;
+}
+
+void POOL_joinJobs(POOL_ctx* ctx) {
+    assert(!ctx || ctx == &g_poolCtx);
+    (void) ctx;
+}
+
+int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
+    (void) ctx;
+    (void) numThreads;
+    return 0;
+}
+
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    (void) ctx;
+    function(opaque);
+}
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
+    (void) ctx;
+    function(opaque);
+    return 1;
+}
+
+size_t POOL_sizeof(const POOL_ctx* ctx) {
+    if (ctx == NULL)
+        return 0; /* supports sizeof NULL */
+    assert(ctx == &g_poolCtx);
+    return sizeof(*ctx);
+}
+
+#endif /* ZSTD_MULTITHREAD */
diff --git a/src/external/common/pool.h b/src/external/common/pool.h
new file mode 100644
index 00000000..3785d277
--- /dev/null
+++ b/src/external/common/pool.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef POOL_H
+#define POOL_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+#include "zstd_deps.h"
+#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
+#include "../zstd.h"
+
+typedef struct POOL_ctx_s POOL_ctx;
+
+/*! POOL_create() :
+ *  Create a thread pool with at most `numThreads` threads.
+ * `numThreads` must be at least 1.
+ *  The maximum number of queued jobs before blocking is `queueSize`.
+ * @return : POOL_ctx pointer on success, else NULL.
+*/
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
+
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
+
+/*! POOL_free() :
+ *  Free a thread pool returned by POOL_create().
+ */
+void POOL_free(POOL_ctx* ctx);
+
+
+/*! POOL_joinJobs() :
+ *  Waits for all queued jobs to finish executing.
+ */
+void POOL_joinJobs(POOL_ctx* ctx);
+
+/*! POOL_resize() :
+ *  Expands or shrinks pool's number of threads.
+ *  This is more efficient than releasing + creating a new context,
+ *  since it tries to preserve and reuse existing threads.
+ * `numThreads` must be at least 1.
+ * @return : 0 when resize was successful,
+ *           !0 (typically 1) if there is an error.
+ *    note : only numThreads can be resized, queueSize remains unchanged.
+ */
+int POOL_resize(POOL_ctx* ctx, size_t numThreads);
+
+/*! POOL_sizeof() :
+ * @return threadpool memory usage
+ *  note : compatible with NULL (returns 0 in this case)
+ */
+size_t POOL_sizeof(const POOL_ctx* ctx);
+
+/*! POOL_function :
+ *  The function type that can be added to a thread pool.
+ */
+typedef void (*POOL_function)(void*);
+
+/*! POOL_add() :
+ *  Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
+ *  Possibly blocks until there is room in the queue.
+ *  Note : The function may be executed asynchronously,
+ *         therefore, `opaque` must live until function has been completed.
+ */
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+/*! POOL_tryAdd() :
+ *  Add the job `function(opaque)` to thread pool _if_ a queue slot is available.
+ *  Returns immediately even if not (does not block).
+ * @return : 1 if successful, 0 if not.
+ */
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/src/external/common/portability_macros.h b/src/external/common/portability_macros.h
new file mode 100644
index 00000000..8c67c34e
--- /dev/null
+++ b/src/external/common/portability_macros.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_PORTABILITY_MACROS_H
+#define ZSTD_PORTABILITY_MACROS_H
+
+/**
+ * This header file contains macro definitions to support portability.
+ * This header is shared between C and ASM code, so it MUST only
+ * contain macro definitions. It MUST not contain any C code.
+ *
+ * This header ONLY defines macros to detect platforms/feature support.
+ *
+ */
+
+
+/* compat. with non-clang compilers */
+#ifndef __has_attribute
+    #define __has_attribute(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_builtin
+    #define __has_builtin(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+    #define __has_feature(x) 0
+#endif
+
+/* detects whether we are being compiled under msan */
+#ifndef ZSTD_MEMORY_SANITIZER
+    #if __has_feature(memory_sanitizer)
+        #define ZSTD_MEMORY_SANITIZER 1
+    #else
+        #define ZSTD_MEMORY_SANITIZER 0
+    #endif
+#endif
+
+/* detects whether we are being compiled under asan */
+#ifndef ZSTD_ADDRESS_SANITIZER
+    #if __has_feature(address_sanitizer)
+        #define ZSTD_ADDRESS_SANITIZER 1
+    #elif defined(__SANITIZE_ADDRESS__)
+        #define ZSTD_ADDRESS_SANITIZER 1
+    #else
+        #define ZSTD_ADDRESS_SANITIZER 0
+    #endif
+#endif
+
+/* detects whether we are being compiled under dfsan */
+#ifndef ZSTD_DATAFLOW_SANITIZER
+    #if __has_feature(dataflow_sanitizer)
+        #define ZSTD_DATAFLOW_SANITIZER 1
+    #else
+        #define ZSTD_DATAFLOW_SANITIZER 0
+    #endif
+#endif
+
+/* Mark the internal assembly functions as hidden  */
+#ifdef __ELF__
+    #define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
+#elif defined(__APPLE__)
+    #define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
+#else
+    #define ZSTD_HIDE_ASM_FUNCTION(func)
+#endif
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+    #if ((defined(__clang__) && __has_attribute(__target__)) \
+         || (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+      && (defined(__x86_64__) || defined(_M_X64)) && !defined(__BMI2__)
+        #define DYNAMIC_BMI2 1
+    #else
+        #define DYNAMIC_BMI2 0
+    #endif
+#endif
+
+/**
+ * Only enable assembly for GNU C compatible compilers,
+ * because other platforms may not support GAS assembly syntax.
+ *
+ * Only enable assembly for Linux / MacOS, other platforms may
+ * work, but they haven't been tested. This could likely be
+ * extended to BSD systems.
+ *
+ * Disable assembly when MSAN is enabled, because MSAN requires
+ * 100% of code to be instrumented to work.
+ */
+#if defined(__GNUC__)
+    #if defined(__linux__) || defined(__linux) || defined(__APPLE__)
+        #if ZSTD_MEMORY_SANITIZER
+            #define ZSTD_ASM_SUPPORTED 0
+        #elif ZSTD_DATAFLOW_SANITIZER
+            #define ZSTD_ASM_SUPPORTED 0
+        #else
+            #define ZSTD_ASM_SUPPORTED 1
+        #endif
+    #else
+        #define ZSTD_ASM_SUPPORTED 0
+    #endif
+#else
+    #define ZSTD_ASM_SUPPORTED 0
+#endif
+
+/**
+ * Determines whether we should enable assembly for x86-64
+ * with BMI2.
+ *
+ * Enable if all of the following conditions hold:
+ * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
+ * - Assembly is supported
+ * - We are compiling for x86-64 and either:
+ *   - DYNAMIC_BMI2 is enabled
+ *   - BMI2 is supported at compile time
+ */
+#if !defined(ZSTD_DISABLE_ASM) && ZSTD_ASM_SUPPORTED && defined(__x86_64__) \
+  && (DYNAMIC_BMI2 || defined(__BMI2__))
+    #define ZSTD_ENABLE_ASM_X86_64_BMI2 1
+#else
+    #define ZSTD_ENABLE_ASM_X86_64_BMI2 0
+#endif
+
+/*
+ * For x86 ELF targets, add .note.gnu.property section for Intel CET in
+ * assembly sources when CET is enabled.
+ *
+ * Additionally, any function that may be called indirectly must begin
+ * with ZSTD_CET_ENDBRANCH.
+ */
+#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) && defined(__has_include)
+    #if __has_include(<cet.h>)
+        #include <cet.h>
+        #define ZSTD_CET_ENDBRANCH _CET_ENDBR
+    #endif
+#endif
+
+#ifndef ZSTD_CET_ENDBRANCH
+    #define ZSTD_CET_ENDBRANCH
+#endif
+
+#endif /* ZSTD_PORTABILITY_MACROS_H */
diff --git a/src/external/common/threading.cpp b/src/external/common/threading.cpp
new file mode 100644
index 00000000..828522f4
--- /dev/null
+++ b/src/external/common/threading.cpp
@@ -0,0 +1,184 @@
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/**
+ * This file will hold wrapper for systems, which do not support pthreads
+ */
+
+#include "threading.h"
+
+/* create fake symbol to avoid empty translation unit warning */
+int g_ZSTD_threading_useless_symbol;
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+    /**
+ * Windows minimalist Pthread Wrapper
+ */
+
+
+    /* ===  Dependencies  === */
+    #include <process.h>
+    #include <errno.h>
+
+
+/* ===  Implementation  === */
+
+typedef struct {
+    void* (*start_routine)(void*);
+    void*                arg;
+    int                  initialized;
+    ZSTD_pthread_cond_t  initialized_cond;
+    ZSTD_pthread_mutex_t initialized_mutex;
+} ZSTD_thread_params_t;
+
+static unsigned __stdcall worker(void* arg) {
+    void* (*start_routine)(void*);
+    void* thread_arg;
+
+    /* Initialized thread_arg and start_routine and signal main thread that we don't need it
+     * to wait any longer.
+     */
+    {
+        ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*) arg;
+        thread_arg                         = thread_param->arg;
+        start_routine                      = thread_param->start_routine;
+
+        /* Signal main thread that we are running and do not depend on its memory anymore */
+        ZSTD_pthread_mutex_lock(&thread_param->initialized_mutex);
+        thread_param->initialized = 1;
+        ZSTD_pthread_cond_signal(&thread_param->initialized_cond);
+        ZSTD_pthread_mutex_unlock(&thread_param->initialized_mutex);
+    }
+
+    start_routine(thread_arg);
+
+    return 0;
+}
+
+int ZSTD_pthread_create(ZSTD_pthread_t* thread,
+                        const void*     unused,
+                        void* (*start_routine)(void*),
+                        void* arg) {
+    ZSTD_thread_params_t thread_param;
+    (void) unused;
+
+    if (thread == NULL)
+        return -1;
+    *thread = NULL;
+
+    thread_param.start_routine = start_routine;
+    thread_param.arg           = arg;
+    thread_param.initialized   = 0;
+
+    /* Setup thread initialization synchronization */
+    if (ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL))
+    {
+        /* Should never happen on Windows */
+        return -1;
+    }
+    if (ZSTD_pthread_mutex_init(&thread_param.initialized_mutex, NULL))
+    {
+        /* Should never happen on Windows */
+        ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
+        return -1;
+    }
+
+    /* Spawn thread */
+    *thread = (HANDLE) _beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
+    if (*thread == NULL)
+    {
+        ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
+        ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
+        return errno;
+    }
+
+    /* Wait for thread to be initialized */
+    ZSTD_pthread_mutex_lock(&thread_param.initialized_mutex);
+    while (!thread_param.initialized)
+    {
+        ZSTD_pthread_cond_wait(&thread_param.initialized_cond, &thread_param.initialized_mutex);
+    }
+    ZSTD_pthread_mutex_unlock(&thread_param.initialized_mutex);
+    ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
+    ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
+
+    return 0;
+}
+
+int ZSTD_pthread_join(ZSTD_pthread_t thread) {
+    DWORD result;
+
+    if (!thread)
+        return 0;
+
+    result = WaitForSingleObject(thread, INFINITE);
+    CloseHandle(thread);
+
+    switch (result)
+    {
+    case WAIT_OBJECT_0 :
+        return 0;
+    case WAIT_ABANDONED :
+        return EINVAL;
+    default :
+        return GetLastError();
+    }
+}
+
+#endif /* ZSTD_MULTITHREAD */
+
+#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
+
+    #define ZSTD_DEPS_NEED_MALLOC
+    #include "zstd_deps.h"
+
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) {
+    assert(mutex != NULL);
+    *mutex = (pthread_mutex_t*) ZSTD_malloc(sizeof(pthread_mutex_t));
+    if (!*mutex)
+        return 1;
+    return pthread_mutex_init(*mutex, attr);
+}
+
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) {
+    assert(mutex != NULL);
+    if (!*mutex)
+        return 0;
+    {
+        int const ret = pthread_mutex_destroy(*mutex);
+        ZSTD_free(*mutex);
+        return ret;
+    }
+}
+
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) {
+    assert(cond != NULL);
+    *cond = (pthread_cond_t*) ZSTD_malloc(sizeof(pthread_cond_t));
+    if (!*cond)
+        return 1;
+    return pthread_cond_init(*cond, attr);
+}
+
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) {
+    assert(cond != NULL);
+    if (!*cond)
+        return 0;
+    {
+        int const ret = pthread_cond_destroy(*cond);
+        ZSTD_free(*cond);
+        return ret;
+    }
+}
+
+#endif
diff --git a/src/external/common/threading.h b/src/external/common/threading.h
new file mode 100644
index 00000000..f29e8593
--- /dev/null
+++ b/src/external/common/threading.h
@@ -0,0 +1,152 @@
+/**
+ * Copyright (c) 2016 Tino Reichardt
+ * All rights reserved.
+ *
+ * You can contact the author at:
+ * - zstdmt source repository: https://github.com/mcmilk/zstdmt
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef THREADING_H_938743
+#define THREADING_H_938743
+
+#include "debug.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
+
+    /**
+ * Windows minimalist Pthread Wrapper
+ */
+    #ifdef WINVER
+        #undef WINVER
+    #endif
+    #define WINVER 0x0600
+
+    #ifdef _WIN32_WINNT
+        #undef _WIN32_WINNT
+    #endif
+    #define _WIN32_WINNT 0x0600
+
+    #ifndef WIN32_LEAN_AND_MEAN
+        #define WIN32_LEAN_AND_MEAN
+    #endif
+
+    #undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */
+    #include <windows.h>
+    #undef ERROR
+    #define ERROR(name) ZSTD_ERROR(name)
+
+
+    /* mutex */
+    #define ZSTD_pthread_mutex_t CRITICAL_SECTION
+    #define ZSTD_pthread_mutex_init(a, b) ((void) (b), InitializeCriticalSection((a)), 0)
+    #define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a))
+    #define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a))
+    #define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a))
+
+    /* condition variable */
+    #define ZSTD_pthread_cond_t CONDITION_VARIABLE
+    #define ZSTD_pthread_cond_init(a, b) ((void) (b), InitializeConditionVariable((a)), 0)
+    #define ZSTD_pthread_cond_destroy(a) ((void) (a))
+    #define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
+    #define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a))
+    #define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
+
+/* ZSTD_pthread_create() and ZSTD_pthread_join() */
+typedef HANDLE ZSTD_pthread_t;
+
+int ZSTD_pthread_create(ZSTD_pthread_t* thread,
+                        const void*     unused,
+                        void* (*start_routine)(void*),
+                        void* arg);
+
+int ZSTD_pthread_join(ZSTD_pthread_t thread);
+
+    /**
+ * add here more wrappers as required
+ */
+
+
+#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */
+    /* ===   POSIX Systems   === */
+    #include <pthread.h>
+
+    #if DEBUGLEVEL < 1
+
+        #define ZSTD_pthread_mutex_t pthread_mutex_t
+        #define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
+        #define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
+        #define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a))
+        #define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a))
+
+        #define ZSTD_pthread_cond_t pthread_cond_t
+        #define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b))
+        #define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a))
+        #define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b))
+        #define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a))
+        #define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a))
+
+        #define ZSTD_pthread_t pthread_t
+        #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+        #define ZSTD_pthread_join(a) pthread_join((a), NULL)
+
+    #else /* DEBUGLEVEL >= 1 */
+
+        /* Debug implementation of threading.
+ * In this implementation we use pointers for mutexes and condition variables.
+ * This way, if we forget to init/destroy them the program will crash or ASAN
+ * will report leaks.
+ */
+
+        #define ZSTD_pthread_mutex_t pthread_mutex_t*
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
+        #define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a))
+        #define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a))
+
+        #define ZSTD_pthread_cond_t pthread_cond_t*
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
+        #define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b))
+        #define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a))
+        #define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a))
+
+        #define ZSTD_pthread_t pthread_t
+        #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+        #define ZSTD_pthread_join(a) pthread_join((a), NULL)
+
+    #endif
+
+#else /* ZSTD_MULTITHREAD not defined */
+/* No multithreading support */
+
+typedef int ZSTD_pthread_mutex_t;
+    #define ZSTD_pthread_mutex_init(a, b) ((void) (a), (void) (b), 0)
+    #define ZSTD_pthread_mutex_destroy(a) ((void) (a))
+    #define ZSTD_pthread_mutex_lock(a) ((void) (a))
+    #define ZSTD_pthread_mutex_unlock(a) ((void) (a))
+
+typedef int ZSTD_pthread_cond_t;
+    #define ZSTD_pthread_cond_init(a, b) ((void) (a), (void) (b), 0)
+    #define ZSTD_pthread_cond_destroy(a) ((void) (a))
+    #define ZSTD_pthread_cond_wait(a, b) ((void) (a), (void) (b))
+    #define ZSTD_pthread_cond_signal(a) ((void) (a))
+    #define ZSTD_pthread_cond_broadcast(a) ((void) (a))
+
+    /* do not use ZSTD_pthread_t */
+
+#endif /* ZSTD_MULTITHREAD */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* THREADING_H_938743 */
diff --git a/src/external/common/xxhash.cpp b/src/external/common/xxhash.cpp
new file mode 100644
index 00000000..052cd522
--- /dev/null
+++ b/src/external/common/xxhash.cpp
@@ -0,0 +1,18 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (c) Yann Collet - Meta Platforms, Inc
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * xxhash.c instantiates functions defined in xxhash.h
+ */
+
+#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
+#define XXH_IMPLEMENTATION      /* access definitions */
+
+#include "xxhash.h"
diff --git a/src/external/common/xxhash.h b/src/external/common/xxhash.h
new file mode 100644
index 00000000..3fb339fa
--- /dev/null
+++ b/src/external/common/xxhash.h
@@ -0,0 +1,7206 @@
+/*
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) Yann Collet - Meta Platforms, Inc
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Local adaptations for Zstandard */
+
+#ifndef XXH_NO_XXH3
+#define XXH_NO_XXH3
+#endif
+
+#ifndef XXH_NAMESPACE
+#define XXH_NAMESPACE ZSTD_
+#endif
+
+/*!
+ * @mainpage xxHash
+ *
+ * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
+ * limits.
+ *
+ * It is proposed in four flavors, in three families:
+ * 1. @ref XXH32_family
+ *   - Classic 32-bit hash function. Simple, compact, and runs on almost all
+ *     32-bit and 64-bit systems.
+ * 2. @ref XXH64_family
+ *   - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
+ *     64-bit systems (but _not_ 32-bit systems).
+ * 3. @ref XXH3_family
+ *   - Modern 64-bit and 128-bit hash function family which features improved
+ *     strength and performance across the board, especially on smaller data.
+ *     It benefits greatly from SIMD and 64-bit without requiring it.
+ *
+ * Benchmarks
+ * ---
+ * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
+ * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
+ *
+ * | Hash Name            | ISA ext | Width | Large Data Speed | Small Data Velocity |
+ * | -------------------- | ------- | ----: | ---------------: | ------------------: |
+ * | XXH3_64bits()        | @b AVX2 |    64 |        59.4 GB/s |               133.1 |
+ * | MeowHash             | AES-NI  |   128 |        58.2 GB/s |                52.5 |
+ * | XXH3_128bits()       | @b AVX2 |   128 |        57.9 GB/s |               118.1 |
+ * | CLHash               | PCLMUL  |    64 |        37.1 GB/s |                58.1 |
+ * | XXH3_64bits()        | @b SSE2 |    64 |        31.5 GB/s |               133.1 |
+ * | XXH3_128bits()       | @b SSE2 |   128 |        29.6 GB/s |               118.1 |
+ * | RAM sequential read  |         |   N/A |        28.0 GB/s |                 N/A |
+ * | ahash                | AES-NI  |    64 |        22.5 GB/s |               107.2 |
+ * | City64               |         |    64 |        22.0 GB/s |                76.6 |
+ * | T1ha2                |         |    64 |        22.0 GB/s |                99.0 |
+ * | City128              |         |   128 |        21.7 GB/s |                57.7 |
+ * | FarmHash             | AES-NI  |    64 |        21.3 GB/s |                71.9 |
+ * | XXH64()              |         |    64 |        19.4 GB/s |                71.0 |
+ * | SpookyHash           |         |    64 |        19.3 GB/s |                53.2 |
+ * | Mum                  |         |    64 |        18.0 GB/s |                67.0 |
+ * | CRC32C               | SSE4.2  |    32 |        13.0 GB/s |                57.9 |
+ * | XXH32()              |         |    32 |         9.7 GB/s |                71.9 |
+ * | City32               |         |    32 |         9.1 GB/s |                66.0 |
+ * | Blake3*              | @b AVX2 |   256 |         4.4 GB/s |                 8.1 |
+ * | Murmur3              |         |    32 |         3.9 GB/s |                56.1 |
+ * | SipHash*             |         |    64 |         3.0 GB/s |                43.2 |
+ * | Blake3*              | @b SSE2 |   256 |         2.4 GB/s |                 8.1 |
+ * | HighwayHash          |         |    64 |         1.4 GB/s |                 6.0 |
+ * | FNV64                |         |    64 |         1.2 GB/s |                62.7 |
+ * | Blake2*              |         |   256 |         1.1 GB/s |                 5.1 |
+ * | SHA1*                |         |   160 |         0.8 GB/s |                 5.6 |
+ * | MD5*                 |         |   128 |         0.6 GB/s |                 7.8 |
+ * @note
+ *   - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
+ *     even though it is mandatory on x64.
+ *   - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
+ *     by modern standards.
+ *   - Small data velocity is a rough average of algorithm's efficiency for small
+ *     data. For more accurate information, see the wiki.
+ *   - More benchmarks and strength tests are found on the wiki:
+ *         https://github.com/Cyan4973/xxHash/wiki
+ *
+ * Usage
+ * ------
+ * All xxHash variants use a similar API. Changing the algorithm is a trivial
+ * substitution.
+ *
+ * @pre
+ *    For functions which take an input and length parameter, the following
+ *    requirements are assumed:
+ *    - The range from [`input`, `input + length`) is valid, readable memory.
+ *      - The only exception is if the `length` is `0`, `input` may be `NULL`.
+ *    - For C++, the objects must have the *TriviallyCopyable* property, as the
+ *      functions access bytes directly as if it was an array of `unsigned char`.
+ *
+ * @anchor single_shot_example
+ * **Single Shot**
+ *
+ * These functions are stateless functions which hash a contiguous block of memory,
+ * immediately returning the result. They are the easiest and usually the fastest
+ * option.
+ *
+ * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
+ *
+ * @code{.c}
+ *   #include <string.h>
+ *   #include "xxhash.h"
+ *
+ *   // Example for a function which hashes a null terminated string with XXH32().
+ *   XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
+ *   {
+ *       // NULL pointers are only valid if the length is zero
+ *       size_t length = (string == NULL) ? 0 : strlen(string);
+ *       return XXH32(string, length, seed);
+ *   }
+ * @endcode
+ *
+ *
+ * @anchor streaming_example
+ * **Streaming**
+ *
+ * These groups of functions allow incremental hashing of unknown size, even
+ * more than what would fit in a size_t.
+ *
+ * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
+ *
+ * @code{.c}
+ *   #include <stdio.h>
+ *   #include <assert.h>
+ *   #include "xxhash.h"
+ *   // Example for a function which hashes a FILE incrementally with XXH3_64bits().
+ *   XXH64_hash_t hashFile(FILE* f)
+ *   {
+ *       // Allocate a state struct. Do not just use malloc() or new.
+ *       XXH3_state_t* state = XXH3_createState();
+ *       assert(state != NULL && "Out of memory!");
+ *       // Reset the state to start a new hashing session.
+ *       XXH3_64bits_reset(state);
+ *       char buffer[4096];
+ *       size_t count;
+ *       // Read the file in chunks
+ *       while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
+ *           // Run update() as many times as necessary to process the data
+ *           XXH3_64bits_update(state, buffer, count);
+ *       }
+ *       // Retrieve the finalized hash. This will not change the state.
+ *       XXH64_hash_t result = XXH3_64bits_digest(state);
+ *       // Free the state. Do not use free().
+ *       XXH3_freeState(state);
+ *       return result;
+ *   }
+ * @endcode
+ *
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ *
+ * @anchor canonical_representation_example
+ * **Canonical Representation**
+ *
+ * The default return values from XXH functions are unsigned 32, 64 and 128 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ *
+ * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(),
+ * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(),
+ * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(),
+ *
+ * @code{.c}
+ *   #include <stdio.h>
+ *   #include "xxhash.h"
+ *
+ *   // Example for a function which prints XXH32_hash_t in human readable format
+ *   void printXxh32(XXH32_hash_t hash)
+ *   {
+ *       XXH32_canonical_t cano;
+ *       XXH32_canonicalFromHash(&cano, hash);
+ *       size_t i;
+ *       for(i = 0; i < sizeof(cano.digest); ++i) {
+ *           printf("%02x", cano.digest[i]);
+ *       }
+ *       printf("\n");
+ *   }
+ *
+ *   // Example for a function which converts XXH32_canonical_t to XXH32_hash_t
+ *   XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano)
+ *   {
+ *       XXH32_hash_t hash = XXH32_hashFromCanonical(&cano);
+ *       return hash;
+ *   }
+ * @endcode
+ *
+ *
+ * @file xxhash.h
+ * xxHash prototypes and implementation
+ */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* ****************************
+ *  INLINE mode
+ ******************************/
+/*!
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Gives access to internal state declaration, required for static allocation.
+ *
+ * Incompatible with dynamic linking, due to risks of ABI changes.
+ *
+ * Usage:
+ * @code{.c}
+ *     #define XXH_STATIC_LINKING_ONLY
+ *     #include "xxhash.h"
+ * @endcode
+ */
+#define XXH_STATIC_LINKING_ONLY
+/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */
+
+/*!
+ * @brief Gives access to internal definitions.
+ *
+ * Usage:
+ * @code{.c}
+ *     #define XXH_STATIC_LINKING_ONLY
+ *     #define XXH_IMPLEMENTATION
+ *     #include "xxhash.h"
+ * @endcode
+ */
+#define XXH_IMPLEMENTATION
+/* Do not undef XXH_IMPLEMENTATION for Doxygen */
+
+/*!
+ * @brief Exposes the implementation and marks all functions as `inline`.
+ *
+ * Use these build macros to inline xxhash into the target unit.
+ * Inlining improves performance on small inputs, especially when the length is
+ * expressed as a compile-time constant:
+ *
+ *  https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *
+ * It also keeps xxHash symbols private to the unit, so they are not exported.
+ *
+ * Usage:
+ * @code{.c}
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ * @endcode
+ * Do not compile and link xxhash.o as a separate object, as it is not useful.
+ */
+#define XXH_INLINE_ALL
+#undef XXH_INLINE_ALL
+/*!
+ * @brief Exposes the implementation without marking functions as inline.
+ */
+#define XXH_PRIVATE_API
+#undef XXH_PRIVATE_API
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+#define XXH_NAMESPACE /* YOUR NAME HERE */
+#undef XXH_NAMESPACE
+#endif
+
+#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) && !defined(XXH_INLINE_ALL_31684351384)
+/* this section should be traversed only once */
+#define XXH_INLINE_ALL_31684351384
+/* give access to the advanced API, required to compile implementations */
+#undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */
+#define XXH_STATIC_LINKING_ONLY
+/* make all functions private */
+#undef XXH_PUBLIC_API
+#if defined(__GNUC__)
+    #define XXH_PUBLIC_API static __inline __attribute__((unused))
+#elif defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+    #define XXH_PUBLIC_API static inline
+#elif defined(_MSC_VER)
+    #define XXH_PUBLIC_API static __inline
+#else
+    /* note: this version may generate warnings for unused static functions */
+    #define XXH_PUBLIC_API static
+#endif
+
+/*
+    * This part deals with the special case where a unit wants to inline xxHash,
+    * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
+    * such as part of some previously included *.h header file.
+    * Without further action, the new include would just be ignored,
+    * and functions would effectively _not_ be inlined (silent failure).
+    * The following macros solve this situation by prefixing all inlined names,
+    * avoiding naming collision with previous inclusions.
+    */
+/* Before that, we unconditionally #undef all symbols,
+    * in case they were already defined with XXH_NAMESPACE.
+    * They will then be redefined for XXH_INLINE_ALL
+    */
+#undef XXH_versionNumber
+/* XXH32 */
+#undef XXH32
+#undef XXH32_createState
+#undef XXH32_freeState
+#undef XXH32_reset
+#undef XXH32_update
+#undef XXH32_digest
+#undef XXH32_copyState
+#undef XXH32_canonicalFromHash
+#undef XXH32_hashFromCanonical
+/* XXH64 */
+#undef XXH64
+#undef XXH64_createState
+#undef XXH64_freeState
+#undef XXH64_reset
+#undef XXH64_update
+#undef XXH64_digest
+#undef XXH64_copyState
+#undef XXH64_canonicalFromHash
+#undef XXH64_hashFromCanonical
+/* XXH3_64bits */
+#undef XXH3_64bits
+#undef XXH3_64bits_withSecret
+#undef XXH3_64bits_withSeed
+#undef XXH3_64bits_withSecretandSeed
+#undef XXH3_createState
+#undef XXH3_freeState
+#undef XXH3_copyState
+#undef XXH3_64bits_reset
+#undef XXH3_64bits_reset_withSeed
+#undef XXH3_64bits_reset_withSecret
+#undef XXH3_64bits_update
+#undef XXH3_64bits_digest
+#undef XXH3_generateSecret
+/* XXH3_128bits */
+#undef XXH128
+#undef XXH3_128bits
+#undef XXH3_128bits_withSeed
+#undef XXH3_128bits_withSecret
+#undef XXH3_128bits_reset
+#undef XXH3_128bits_reset_withSeed
+#undef XXH3_128bits_reset_withSecret
+#undef XXH3_128bits_reset_withSecretandSeed
+#undef XXH3_128bits_update
+#undef XXH3_128bits_digest
+#undef XXH128_isEqual
+#undef XXH128_cmp
+#undef XXH128_canonicalFromHash
+#undef XXH128_hashFromCanonical
+/* Finally, free the namespace itself */
+#undef XXH_NAMESPACE
+
+/* employ the namespace for XXH_INLINE_ALL */
+#define XXH_NAMESPACE XXH_INLINE_
+/*
+    * Some identifiers (enums, type names) are not symbols,
+    * but they must nonetheless be renamed to avoid redeclaration.
+    * Alternative solution: do not redeclare them.
+    * However, this requires some #ifdefs, and has a more dispersed impact.
+    * Meanwhile, renaming can be achieved in a single place.
+    */
+#define XXH_IPREF(Id) XXH_NAMESPACE##Id
+#define XXH_OK XXH_IPREF(XXH_OK)
+#define XXH_ERROR XXH_IPREF(XXH_ERROR)
+#define XXH_errorcode XXH_IPREF(XXH_errorcode)
+#define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
+#define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
+#define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
+#define XXH32_state_s XXH_IPREF(XXH32_state_s)
+#define XXH32_state_t XXH_IPREF(XXH32_state_t)
+#define XXH64_state_s XXH_IPREF(XXH64_state_s)
+#define XXH64_state_t XXH_IPREF(XXH64_state_t)
+#define XXH3_state_s XXH_IPREF(XXH3_state_s)
+#define XXH3_state_t XXH_IPREF(XXH3_state_t)
+#define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
+/* Ensure the header is parsed again, even if it was previously included */
+#undef XXHASH_H_5627135585666179
+#undef XXHASH_H_STATIC_13879238742
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/* ****************************************************************
+ *  Stable API
+ *****************************************************************/
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+/*! @brief Marks a global symbol. */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+    #if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+        #ifdef XXH_EXPORT
+            #define XXH_PUBLIC_API __declspec(dllexport)
+        #elif XXH_IMPORT
+            #define XXH_PUBLIC_API __declspec(dllimport)
+        #endif
+    #else
+        #define XXH_PUBLIC_API /* do nothing */
+    #endif
+#endif
+
+#ifdef XXH_NAMESPACE
+    #define XXH_CAT(A, B) A##B
+    #define XXH_NAME2(A, B) XXH_CAT(A, B)
+    #define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+    /* XXH32 */
+    #define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+    #define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+    #define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+    #define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+    #define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+    #define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+    #define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+    #define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+    #define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+    /* XXH64 */
+    #define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+    #define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+    #define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+    #define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+    #define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+    #define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+    #define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+    #define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+    #define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+    /* XXH3_64bits */
+    #define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
+    #define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
+    #define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
+    #define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
+    #define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
+    #define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
+    #define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
+    #define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
+    #define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
+    #define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
+    #define XXH3_64bits_reset_withSecretandSeed \
+        XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
+    #define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
+    #define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
+    #define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
+    #define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
+    /* XXH3_128bits */
+    #define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
+    #define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
+    #define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
+    #define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
+    #define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
+    #define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
+    #define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
+    #define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
+    #define XXH3_128bits_reset_withSecretandSeed \
+        XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
+    #define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
+    #define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
+    #define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
+    #define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
+    #define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
+    #define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Compiler specifics
+***************************************/
+
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+    #if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+        #ifdef XXH_EXPORT
+            #define XXH_PUBLIC_API __declspec(dllexport)
+        #elif XXH_IMPORT
+            #define XXH_PUBLIC_API __declspec(dllimport)
+        #endif
+    #else
+        #define XXH_PUBLIC_API /* do nothing */
+    #endif
+#endif
+
+#if defined(__GNUC__)
+    #define XXH_CONSTF __attribute__((const))
+    #define XXH_PUREF __attribute__((pure))
+    #define XXH_MALLOCF __attribute__((malloc))
+#else
+    #define XXH_CONSTF /* disable */
+    #define XXH_PUREF
+    #define XXH_MALLOCF
+#endif
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 8
+#define XXH_VERSION_RELEASE 2
+/*! @brief Version number, encoded as two digits each */
+#define XXH_VERSION_NUMBER \
+    (XXH_VERSION_MAJOR * 100 * 100 + XXH_VERSION_MINOR * 100 + XXH_VERSION_RELEASE)
+
+/*!
+ * @brief Obtains the xxHash version.
+ *
+ * This is mostly useful when xxHash is compiled as a shared library,
+ * since the returned value comes from the library, as opposed to header file.
+ *
+ * @return @ref XXH_VERSION_NUMBER of the invoked library.
+ */
+XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber(void);
+
+
+/* ****************************
+*  Common basic types
+******************************/
+#include <stddef.h> /* size_t */
+/*!
+ * @brief Exit code for the streaming API.
+ */
+typedef enum {
+    XXH_OK = 0, /*!< OK */
+    XXH_ERROR   /*!< Error */
+} XXH_errorcode;
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
+/*!
+ * @brief An unsigned 32-bit integer.
+ *
+ * Not necessarily defined to `uint32_t` but functionally equivalent.
+ */
+typedef uint32_t XXH32_hash_t;
+
+#elif !defined(__VMS) \
+  && (defined(__cplusplus) \
+      || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
+    #ifdef _AIX
+        #include <inttypes.h>
+    #else
+        #include <stdint.h>
+    #endif
+typedef uint32_t XXH32_hash_t;
+
+#else
+    #include <limits.h>
+    #if UINT_MAX == 0xFFFFFFFFUL
+typedef unsigned int XXH32_hash_t;
+    #elif ULONG_MAX == 0xFFFFFFFFUL
+typedef unsigned long XXH32_hash_t;
+    #else
+        #error "unsupported platform: need a 32-bit type"
+    #endif
+#endif
+
+/*!
+ * @}
+ *
+ * @defgroup XXH32_family XXH32 family
+ * @ingroup public
+ * Contains functions used in the classic 32-bit xxHash algorithm.
+ *
+ * @note
+ *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
+ *   Note that the @ref XXH3_family provides competitive speed for both 32-bit
+ *   and 64-bit systems, and offers true 64/128 bit hash results.
+ *
+ * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
+ * @see @ref XXH32_impl for implementation details
+ * @{
+ */
+
+/*!
+ * @brief Calculates the 32-bit hash of @p input using xxHash32.
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 32-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 32-bit xxHash32 value.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32(const void* input, size_t length, XXH32_hash_t seed);
+
+#ifndef XXH_NO_STREAM
+/*!
+ * @typedef struct XXH32_state_s XXH32_state_t
+ * @brief The opaque state struct for the XXH32 streaming API.
+ *
+ * @see XXH32_state_s for details.
+ */
+typedef struct XXH32_state_s XXH32_state_t;
+
+/*!
+ * @brief Allocates an @ref XXH32_state_t.
+ *
+ * @return An allocated pointer of @ref XXH32_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH32_freeState().
+ */
+XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
+/*!
+ * @brief Frees an @ref XXH32_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH32_createState().
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+/*!
+ * @brief Copies one @ref XXH32_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH32_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 32-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH32_update().
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH32_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t* statePtr,
+                                          const void*    input,
+                                          size_t         length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH32_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated 32-bit xxHash32 value from that state.
+ *
+ * @note
+ *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest(const XXH32_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
+
+/*******   Canonical representation   *******/
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
+ */
+typedef struct {
+    unsigned char digest[4]; /*!< Hash bytes, big endian */
+} XXH32_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
+ *
+ * @param dst  The @ref XXH32_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH32_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
+ *
+ * @param src The @ref XXH32_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+
+/*! @cond Doxygen ignores this part */
+#ifdef __has_attribute
+    #define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+    #define XXH_HAS_ATTRIBUTE(x) 0
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * C23 __STDC_VERSION__ number hasn't been specified yet. For now
+ * leave as `201711L` (C17 + 1).
+ * TODO: Update to correct value when its been specified.
+ */
+#define XXH_C23_VN 201711L
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
+    #define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+    #define XXH_HAS_C_ATTRIBUTE(x) 0
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+#if defined(__cplusplus) && defined(__has_cpp_attribute)
+    #define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+    #define XXH_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
+ * introduced in CPP17 and C23.
+ * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
+ */
+#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
+    #define XXH_FALLTHROUGH [[fallthrough]]
+#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
+    #define XXH_FALLTHROUGH __attribute__((__fallthrough__))
+#else
+    #define XXH_FALLTHROUGH /* fallthrough */
+#endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * Define XXH_NOESCAPE for annotated pointers in public API.
+ * https://clang.llvm.org/docs/AttributeReference.html#noescape
+ * As of writing this, only supported by clang.
+ */
+#if XXH_HAS_ATTRIBUTE(noescape)
+    #define XXH_NOESCAPE __attribute__((noescape))
+#else
+    #define XXH_NOESCAPE
+#endif
+/*! @endcond */
+
+
+/*!
+ * @}
+ * @ingroup public
+ * @{
+ */
+
+#ifndef XXH_NO_LONG_LONG
+    /*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+    #if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
+/*!
+ * @brief An unsigned 64-bit integer.
+ *
+ * Not necessarily defined to `uint64_t` but functionally equivalent.
+ */
+typedef uint64_t XXH64_hash_t;
+    #elif !defined(__VMS) \
+      && (defined(__cplusplus) \
+          || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
+        #ifdef _AIX
+            #include <inttypes.h>
+        #else
+            #include <stdint.h>
+        #endif
+typedef uint64_t XXH64_hash_t;
+    #else
+        #include <limits.h>
+        #if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
+/* LP64 ABI says uint64_t is unsigned long */
+typedef unsigned long XXH64_hash_t;
+        #else
+/* the following type must have a width of 64-bit */
+typedef unsigned long long XXH64_hash_t;
+        #endif
+    #endif
+
+/*!
+ * @}
+ *
+ * @defgroup XXH64_family XXH64 family
+ * @ingroup public
+ * @{
+ * Contains functions used in the classic 64-bit xxHash algorithm.
+ *
+ * @note
+ *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
+ *   and offers true 64/128 bit hash results.
+ *   It provides better speed for systems with vector processing capabilities.
+ */
+
+/*!
+ * @brief Calculates the 64-bit hash of @p input using xxHash64.
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit xxHash64 value.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input,
+                                            size_t                   length,
+                                            XXH64_hash_t             seed);
+
+    /*******   Streaming   *******/
+    #ifndef XXH_NO_STREAM
+/*!
+ * @brief The opaque state struct for the XXH64 streaming API.
+ *
+ * @see XXH64_state_s for details.
+ */
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
+
+/*!
+ * @brief Allocates an @ref XXH64_state_t.
+ *
+ * @return An allocated pointer of @ref XXH64_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH64_freeState().
+ */
+XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
+
+/*!
+ * @brief Frees an @ref XXH64_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH64_createState().
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+
+/*!
+ * @brief Copies one @ref XXH64_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state,
+                                    const XXH64_state_t*        src_state);
+
+/*!
+ * @brief Resets an @ref XXH64_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH64_update().
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH64_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH_NOESCAPE XXH64_state_t* statePtr,
+                                          XXH_NOESCAPE const void*    input,
+                                          size_t                      length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH64_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated 64-bit xxHash64 value from that state.
+ *
+ * @note
+ *   Calling XXH64_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* statePtr);
+    #endif /* !XXH_NO_STREAM */
+/*******   Canonical representation   *******/
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH64_hash_t.
+ */
+typedef struct {
+    unsigned char digest[sizeof(XXH64_hash_t)];
+} XXH64_canonical_t;
+
+/*!
+ * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.
+ *
+ * @param dst The @ref XXH64_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH64_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.
+ *
+ * @param src The @ref XXH64_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
+XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
+
+    #ifndef XXH_NO_XXH3
+
+/*!
+ * @}
+ * ************************************************************************
+ * @defgroup XXH3_family XXH3 family
+ * @ingroup public
+ * @{
+ *
+ * XXH3 is a more recent hash algorithm featuring:
+ *  - Improved speed for both small and large inputs
+ *  - True 64-bit and 128-bit outputs
+ *  - SIMD acceleration
+ *  - Improved 32-bit viability
+ *
+ * Speed analysis methodology is explained here:
+ *
+ *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
+ *
+ * Compared to XXH64, expect XXH3 to run approximately
+ * ~2x faster on large inputs and >3x faster on small ones,
+ * exact differences vary depending on platform.
+ *
+ * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
+ * but does not require it.
+ * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
+ * at competitive speeds, even without vector support. Further details are
+ * explained in the implementation.
+ *
+ * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD
+ * implementations for many common platforms:
+ *   - AVX512
+ *   - AVX2
+ *   - SSE2
+ *   - ARM NEON
+ *   - WebAssembly SIMD128
+ *   - POWER8 VSX
+ *   - s390x ZVector
+ * This can be controlled via the @ref XXH_VECTOR macro, but it automatically
+ * selects the best version according to predefined macros. For the x86 family, an
+ * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
+ *
+ * XXH3 implementation is portable:
+ * it has a generic C90 formulation that can be compiled on any platform,
+ * all implementations generate exactly the same hash value on all platforms.
+ * Starting from v0.8.0, it's also labelled "stable", meaning that
+ * any future version will also generate the same hash value.
+ *
+ * XXH3 offers 2 variants, _64bits and _128bits.
+ *
+ * When only 64 bits are needed, prefer invoking the _64bits variant, as it
+ * reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar return type than a struct.
+ *
+ * The API supports one-shot hashing, streaming mode, and custom secrets.
+ */
+/*-**********************************************************************
+*  XXH3 64-bit variant
+************************************************************************/
+
+/*!
+ * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input.
+ *
+ * @param input  The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ *   This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however
+ *   it may have slightly better performance due to constant propagation of the
+ *   defaults.
+ *
+ * @see
+ *    XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input.
+ *
+ * @param input  The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed   The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ *    seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input,
+                                                           size_t                   length,
+                                                           XXH64_hash_t             seed);
+
+        /*!
+ * The bare minimum size for a custom secret.
+ *
+ * @see
+ *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
+ *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
+ */
+        #define XXH3_SECRET_SIZE_MIN 136
+
+/*!
+ * @brief Calculates 64-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @pre
+ *   The memory between @p data and @p data + @p len must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p data may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing @ref XXH3_generateSecret() instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data,
+                                                             size_t                   len,
+                                                             XXH_NOESCAPE const void* secret,
+                                                             size_t                   secretSize);
+
+
+        /*******   Streaming   *******/
+        #ifndef XXH_NO_STREAM
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ */
+
+/*!
+ * @brief The opaque state struct for the XXH3 streaming API.
+ *
+ * @see XXH3_state_s for details.
+ */
+typedef struct XXH3_state_s              XXH3_state_t;
+XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_errorcode             XXH3_freeState(XXH3_state_t* statePtr);
+
+/*!
+ * @brief Copies one @ref XXH3_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t*       dst_state,
+                                   XXH_NOESCAPE const XXH3_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret with default parameters.
+ *   - Call this function before @ref XXH3_64bits_update().
+ *   - Digest will be equivalent to `XXH3_64bits()`.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed     The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret from `seed`.
+ *   - Call this function before @ref XXH3_64bits_update().
+ *   - Digest will be equivalent to `XXH3_64bits_withSeed()`.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                        XXH64_hash_t               seed);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   `secret` is referenced, it _must outlive_ the hash streaming session.
+ *
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                          XXH_NOESCAPE const void*   secret,
+                                                          size_t                     secretSize);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                XXH_NOESCAPE const void*   input,
+                                                size_t                     length);
+
+/*!
+ * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 64-bit hash value from that state.
+ *
+ * @note
+ *   Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest(XXH_NOESCAPE const XXH3_state_t* statePtr);
+        #endif /* !XXH_NO_STREAM */
+
+/* note : canonical representation of XXH3 is the same as XXH64
+ * since they both produce XXH64_hash_t values */
+
+
+/*-**********************************************************************
+*  XXH3 128-bit variant
+************************************************************************/
+
+/*!
+ * @brief The return value from 128-bit hashes.
+ *
+ * Stored in little endian order, although the fields themselves are in native
+ * endianness.
+ */
+typedef struct {
+    XXH64_hash_t low64;  /*!< `value & 0xFFFFFFFFFFFFFFFF` */
+    XXH64_hash_t high64; /*!< `value >> 64` */
+} XXH128_hash_t;
+
+/*!
+ * @brief Calculates 128-bit unseeded variant of XXH3 of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len  The length of @p data, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
+ * for shorter inputs.
+ *
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
+/*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len  The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * @note
+ *    seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data,
+                                                             size_t                   len,
+                                                             XXH64_hash_t             seed);
+/*!
+ * @brief Calculates 128-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing @ref XXH3_generateSecret() instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data,
+                                                               size_t                   len,
+                                                               XXH_NOESCAPE const void* secret,
+                                                               size_t                   secretSize);
+
+        /*******   Streaming   *******/
+        #ifndef XXH_NO_STREAM
+/*
+ * Streaming requires state maintenance.
+ * This operation costs memory and CPU.
+ * As a consequence, streaming is slower than one-shot hashing.
+ * For better performance, prefer one-shot functions whenever applicable.
+ *
+ * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
+ * Use already declared XXH3_createState() and XXH3_freeState().
+ *
+ * All reset and streaming functions have same meaning as their 64-bit counterpart.
+ */
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret with default parameters.
+ *   - Call it before @ref XXH3_128bits_update().
+ *   - Digest will be equivalent to `XXH3_128bits()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed     The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret from `seed`.
+ *   - Call it before @ref XXH3_128bits_update().
+ *   - Digest will be equivalent to `XXH3_128bits_withSeed()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                         XXH64_hash_t               seed);
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   The state struct to reset.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                           XXH_NOESCAPE const void*   secret,
+                                                           size_t                     secretSize);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                 XXH_NOESCAPE const void*   input,
+                                                 size_t                     length);
+
+/*!
+ * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 128-bit hash value from that state.
+ *
+ * @note
+ *   Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ *
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH3_128bits_digest(XXH_NOESCAPE const XXH3_state_t* statePtr);
+        #endif /* !XXH_NO_STREAM */
+
+/* Following helper functions make it possible to compare XXH128_hast_t values.
+ * Since XXH128_hash_t is a structure, this capability is not offered by the language.
+ * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
+
+/*!
+ * @brief Check equality of two XXH128_hash_t values
+ *
+ * @param h1 The 128-bit hash value.
+ * @param h2 Another 128-bit hash value.
+ *
+ * @return `1` if `h1` and `h2` are equal.
+ * @return `0` if they are not.
+ */
+XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+
+/*!
+ * @brief Compares two @ref XXH128_hash_t
+ *
+ * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
+ *
+ * @param h128_1 Left-hand side value
+ * @param h128_2 Right-hand side value
+ *
+ * @return >0 if @p h128_1  > @p h128_2
+ * @return =0 if @p h128_1 == @p h128_2
+ * @return <0 if @p h128_1  < @p h128_2
+ */
+XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1,
+                                        XXH_NOESCAPE const void* h128_2);
+
+
+/*******   Canonical representation   *******/
+typedef struct {
+    unsigned char digest[sizeof(XXH128_hash_t)];
+} XXH128_canonical_t;
+
+
+/*!
+ * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
+ *
+ * @param dst  The @ref XXH128_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH128_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst,
+                                             XXH128_hash_t                    hash);
+
+/*!
+ * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.
+ *
+ * @param src The @ref XXH128_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
+
+
+    #endif /* !XXH_NO_XXH3 */
+#endif     /* XXH_NO_LONG_LONG */
+
+/*!
+ * @}
+ */
+#endif /* XXHASH_H_5627135585666179 */
+
+
+#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
+#define XXHASH_H_STATIC_13879238742
+/* ****************************************************************************
+ * This section contains declarations which are not guaranteed to remain stable.
+ * They may change in future versions, becoming incompatible with a different
+ * version of the library.
+ * These declarations should only be used with static linking.
+ * Never use them in association with dynamic linking!
+ ***************************************************************************** */
+
+/*
+ * These definitions are only present to allow static allocation
+ * of XXH states, on stack or in a struct, for example.
+ * Never **ever** access their members directly.
+ */
+
+/*!
+ * @internal
+ * @brief Structure for XXH32 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH32_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH64_state_s, XXH3_state_s
+ */
+struct XXH32_state_s {
+    XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
+    XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
+    XXH32_hash_t v[4];      /*!< Accumulator lanes */
+    XXH32_hash_t mem32[4];  /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
+    XXH32_hash_t memsize;   /*!< Amount of data in @ref mem32 */
+    XXH32_hash_t reserved;  /*!< Reserved field. Do not read nor write to it. */
+}; /* typedef'd to XXH32_state_t */
+
+
+#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
+
+/*!
+ * @internal
+ * @brief Structure for XXH64 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH64_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH32_state_s, XXH3_state_s
+ */
+struct XXH64_state_s {
+    XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
+    XXH64_hash_t v[4];      /*!< Accumulator lanes */
+    XXH64_hash_t mem64[4];  /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
+    XXH32_hash_t memsize;   /*!< Amount of data in @ref mem64 */
+    XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
+    XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
+}; /* typedef'd to XXH64_state_t */
+
+    #ifndef XXH_NO_XXH3
+
+        #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
+            #include <stdalign.h>
+            #define XXH_ALIGN(n) alignas(n)
+        #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
+            /* In C++ alignas() is a keyword */
+            #define XXH_ALIGN(n) alignas(n)
+        #elif defined(__GNUC__)
+            #define XXH_ALIGN(n) __attribute__((aligned(n)))
+        #elif defined(_MSC_VER)
+            #define XXH_ALIGN(n) __declspec(align(n))
+        #else
+            #define XXH_ALIGN(n) /* disabled */
+        #endif
+
+        /* Old GCC versions only accept the attribute after the type in structures. */
+        #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
+          && !(defined(__cplusplus) && (__cplusplus >= 201103L))          /* >= C++11 */ \
+          && defined(__GNUC__)
+            #define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
+        #else
+            #define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
+        #endif
+
+        /*!
+ * @brief The size of the internal XXH3 buffer.
+ *
+ * This is the optimal update size for incremental hashing.
+ *
+ * @see XXH3_64b_update(), XXH3_128b_update().
+ */
+        #define XXH3_INTERNALBUFFER_SIZE 256
+
+        /*!
+ * @internal
+ * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
+ *
+ * This is the size used in @ref XXH3_kSecret and the seeded functions.
+ *
+ * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
+ */
+        #define XXH3_SECRET_DEFAULT_SIZE 192
+
+/*!
+ * @internal
+ * @brief Structure for XXH3 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
+ * Otherwise it is an opaque type.
+ * Never use this definition in combination with dynamic library.
+ * This allows fields to safely be changed in the future.
+ *
+ * @note ** This structure has a strict alignment requirement of 64 bytes!! **
+ * Do not allocate this with `malloc()` or `new`,
+ * it will not be sufficiently aligned.
+ * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
+ *
+ * Typedef'd to @ref XXH3_state_t.
+ * Do never access the members of this struct directly.
+ *
+ * @see XXH3_INITSTATE() for stack initialization.
+ * @see XXH3_createState(), XXH3_freeState().
+ * @see XXH32_state_s, XXH64_state_s
+ */
+struct XXH3_state_s {
+    XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
+    /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
+    XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
+    /*!< Used to store a custom secret generated from a seed. */
+    XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
+    /*!< The internal buffer. @see XXH32_state_s::mem32 */
+    XXH32_hash_t bufferedSize;
+    /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
+    XXH32_hash_t useSeed;
+    /*!< Reserved field. Needed for padding on 64-bit. */
+    size_t nbStripesSoFar;
+    /*!< Number or stripes processed. */
+    XXH64_hash_t totalLen;
+    /*!< Total length hashed. 64-bit even on 32-bit targets. */
+    size_t nbStripesPerBlock;
+    /*!< Number of stripes per block. */
+    size_t secretLimit;
+    /*!< Size of @ref customSecret or @ref extSecret */
+    XXH64_hash_t seed;
+    /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
+    XXH64_hash_t reserved64;
+    /*!< Reserved field. */
+    const unsigned char* extSecret;
+    /*!< Reference to an external secret for the _withSecret variants, NULL
+        *   for other variants. */
+    /* note: there may be some padding at the end due to alignment on 64 bytes */
+}; /* typedef'd to XXH3_state_t */
+
+        #undef XXH_ALIGN_MEMBER
+
+        /*!
+ * @brief Initializes a stack-allocated `XXH3_state_s`.
+ *
+ * When the @ref XXH3_state_t structure is merely emplaced on stack,
+ * it should be initialized with XXH3_INITSTATE() or a memset()
+ * in case its first reset uses XXH3_NNbits_reset_withSeed().
+ * This init can be omitted if the first reset uses default or _withSecret mode.
+ * This operation isn't necessary when the state is created with XXH3_createState().
+ * Note that this doesn't prepare the state for a streaming operation,
+ * it's still necessary to use XXH3_NNbits_reset*() afterwards.
+ */
+        #define XXH3_INITSTATE(XXH3_state_ptr) \
+            do \
+            { \
+                XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
+                tmp_xxh3_state_ptr->seed         = 0; \
+                tmp_xxh3_state_ptr->extSecret    = NULL; \
+            } while (0)
+
+
+/*!
+ * @brief Calculates the 128-bit hash of @p data using XXH3.
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len  The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p data and @p data + @p len must be valid,
+ *   readable, contiguous memory. However, if @p len is `0`, @p data may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 128-bit XXH3 value.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data,
+                                              size_t                   len,
+                                              XXH64_hash_t             seed);
+
+
+/* ===   Experimental API   === */
+/* Symbols defined below must be considered tied to a specific library version. */
+
+/*!
+ * @brief Derive a high-entropy secret from any user-defined content, named customSeed.
+ *
+ * @param secretBuffer    A writable buffer for derived high-entropy secret data.
+ * @param secretSize      Size of secretBuffer, in bytes.  Must be >= XXH3_SECRET_DEFAULT_SIZE.
+ * @param customSeed      A user-defined content.
+ * @param customSeedSize  Size of customSeed, in bytes.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * The generated secret can be used in combination with `*_withSecret()` functions.
+ * The `_withSecret()` variants are useful to provide a higher level of protection
+ * than 64-bit seed, as it becomes much more difficult for an external actor to
+ * guess how to impact the calculation logic.
+ *
+ * The function accepts as input a custom seed of any length and any content,
+ * and derives from it a high-entropy secret of length @p secretSize into an
+ * already allocated buffer @p secretBuffer.
+ *
+ * The generated secret can then be used with any `*_withSecret()` variant.
+ * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
+ * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
+ * are part of this list. They all accept a `secret` parameter
+ * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
+ * _and_ feature very high entropy (consist of random-looking bytes).
+ * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
+ * be employed to ensure proper quality.
+ *
+ * @p customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even "poor entropy" sources such as a bunch
+ * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
+ *
+ * @pre
+ *   - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
+ *   - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ *
+ * Example code:
+ * @code{.c}
+ *    #include <stdio.h>
+ *    #include <stdlib.h>
+ *    #include <string.h>
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Hashes argv[2] using the entropy from argv[1].
+ *    int main(int argc, char* argv[])
+ *    {
+ *        char secret[XXH3_SECRET_SIZE_MIN];
+ *        if (argv != 3) { return 1; }
+ *        XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
+ *        XXH64_hash_t h = XXH3_64bits_withSecret(
+ *             argv[2], strlen(argv[2]),
+ *             secret, sizeof(secret)
+ *        );
+ *        printf("%016llx\n", (unsigned long long) h);
+ *    }
+ * @endcode
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void*       secretBuffer,
+                                                 size_t                   secretSize,
+                                                 XXH_NOESCAPE const void* customSeed,
+                                                 size_t                   customSeedSize);
+
+/*!
+ * @brief Generate the same secret as the _withSeed() variants.
+ *
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
+ * @param seed         The 64-bit seed to alter the hash result predictably.
+ *
+ * The generated secret can be used in combination with
+ *`*_withSecret()` and `_withSecretandSeed()` variants.
+ *
+ * Example C++ `std::string` hash class:
+ * @code{.cpp}
+ *    #include <string>
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Slow, seeds each time
+ *    class HashSlow {
+ *        XXH64_hash_t seed;
+ *    public:
+ *        HashSlow(XXH64_hash_t s) : seed{s} {}
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
+ *        }
+ *    };
+ *    // Fast, caches the seeded secret for future uses.
+ *    class HashFast {
+ *        unsigned char secret[XXH3_SECRET_SIZE_MIN];
+ *    public:
+ *        HashFast(XXH64_hash_t s) {
+ *            XXH3_generateSecret_fromSeed(secret, seed);
+ *        }
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{
+ *                XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
+ *            };
+ *        }
+ *    };
+ * @endcode
+ */
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer,
+                                                 XXH64_hash_t       seed);
+
+/*!
+ * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed       The 64-bit seed to alter the hash result predictably.
+ *
+ * These variants generate hash values using either
+ * @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes)
+ * or @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX).
+ *
+ * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
+ * `_withSeed()` has to generate the secret on the fly for "large" keys.
+ * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
+ * `_withSecret()` has to generate the masks on the fly for "small" keys,
+ * which requires more instructions than _withSeed() variants.
+ * Therefore, _withSecretandSeed variant combines the best of both worlds.
+ *
+ * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
+ * this variant produces *exactly* the same results as `_withSeed()` variant,
+ * hence offering only a pure speed benefit on "large" input,
+ * by skipping the need to regenerate the secret for every large input.
+ *
+ * Another usage scenario is to hash the secret to a 64-bit hash value,
+ * for example with XXH3_64bits(), which then becomes the seed,
+ * and then employ both the seed and the secret in _withSecretandSeed().
+ * On top of speed, an added benefit is that each bit in the secret
+ * has a 50% chance to swap each bit in the output, via its impact to the seed.
+ *
+ * This is not guaranteed when using the secret directly in "small data" scenarios,
+ * because only portions of the secret are employed for small data.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data,
+                                                                    size_t                   len,
+                                                                    XXH_NOESCAPE const void* secret,
+                                                                    size_t       secretSize,
+                                                                    XXH64_hash_t seed);
+/*!
+ * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param input      The block of data to be hashed, at least @p len bytes in size.
+ * @param length     The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input,
+                               size_t                   length,
+                               XXH_NOESCAPE const void* secret,
+                               size_t                   secretSize,
+                               XXH64_hash_t             seed64);
+        #ifndef XXH_NO_STREAM
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                    XXH_NOESCAPE const void*   secret,
+                                    size_t                     secretSize,
+                                    XXH64_hash_t               seed64);
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                     XXH_NOESCAPE const void*   secret,
+                                     size_t                     secretSize,
+                                     XXH64_hash_t               seed64);
+        #endif /* !XXH_NO_STREAM */
+
+    #endif /* !XXH_NO_XXH3 */
+#endif     /* XXH_NO_LONG_LONG */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+    #define XXH_IMPLEMENTATION
+#endif
+
+#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
+
+
+/* ======================================================================== */
+/* ======================================================================== */
+/* ======================================================================== */
+
+
+/*-**********************************************************************
+ * xxHash implementation
+ *-**********************************************************************
+ * xxHash's implementation used to be hosted inside xxhash.c.
+ *
+ * However, inlining requires implementation to be visible to the compiler,
+ * hence be included alongside the header.
+ * Previously, implementation was hosted inside xxhash.c,
+ * which was then #included when inlining was activated.
+ * This construction created issues with a few build and install systems,
+ * as it required xxhash.c to be stored in /include directory.
+ *
+ * xxHash implementation is now directly integrated within xxhash.h.
+ * As a consequence, xxhash.c is no longer needed in /include.
+ *
+ * xxhash.c is still available and is still useful.
+ * In a "normal" setup, when xxhash is not inlined,
+ * xxhash.h only exposes the prototypes and public symbols,
+ * while xxhash.c can be built into an object file xxhash.o
+ * which can then be linked into the final binary.
+ ************************************************************************/
+
+#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) || defined(XXH_IMPLEMENTATION)) \
+  && !defined(XXH_IMPLEM_13a8737387)
+#define XXH_IMPLEM_13a8737387
+
+/* *************************************
+*  Tuning parameters
+***************************************/
+
+/*!
+ * @defgroup tuning Tuning parameters
+ * @{
+ *
+ * Various macros to control xxHash's behavior.
+ */
+#ifdef XXH_DOXYGEN
+    /*!
+ * @brief Define this to disable 64-bit code.
+ *
+ * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
+ */
+    #define XXH_NO_LONG_LONG
+    #undef XXH_NO_LONG_LONG /* don't actually */
+    /*!
+ * @brief Controls how unaligned memory is accessed.
+ *
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is
+ * safe and portable.
+ *
+ * Unfortunately, on some target/compiler combinations, the generated assembly
+ * is sub-optimal.
+ *
+ * The below switch allow selection of a different access method
+ * in the search for improved performance.
+ *
+ * @par Possible options:
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
+ *   @par
+ *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
+ *     eliminate the function call and treat it as an unaligned access.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
+ *   @par
+ *     Depends on compiler extensions and is therefore not portable.
+ *     This method is safe _if_ your compiler supports it,
+ *     and *generally* as fast or faster than `memcpy`.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
+ *  @par
+ *     Casts directly and dereferences. This method doesn't depend on the
+ *     compiler, but it violates the C standard as it directly dereferences an
+ *     unaligned pointer. It can generate buggy code on targets which do not
+ *     support unaligned memory accesses, but in some circumstances, it's the
+ *     only known way to get the most performance.
+ *
+ *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
+ *  @par
+ *     Also portable. This can generate the best code on old compilers which don't
+ *     inline small `memcpy()` calls, and it might also be faster on big-endian
+ *     systems which lack a native byteswap instruction. However, some compilers
+ *     will emit literal byteshifts even if the target supports unaligned access.
+ *
+ *
+ * @warning
+ *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
+ *   care, as what works on one compiler/platform/optimization level may cause
+ *   another to read garbage data or even crash.
+ *
+ * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
+ *
+ * Prefer these methods in priority order (0 > 3 > 1 > 2)
+ */
+    #define XXH_FORCE_MEMORY_ACCESS 0
+
+    /*!
+ * @def XXH_SIZE_OPT
+ * @brief Controls how much xxHash optimizes for size.
+ *
+ * xxHash, when compiled, tends to result in a rather large binary size. This
+ * is mostly due to heavy usage to forced inlining and constant folding of the
+ * @ref XXH3_family to increase performance.
+ *
+ * However, some developers prefer size over speed. This option can
+ * significantly reduce the size of the generated code. When using the `-Os`
+ * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
+ * otherwise it is defined to 0.
+ *
+ * Most of these size optimizations can be controlled manually.
+ *
+ * This is a number from 0-2.
+ *  - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
+ *    comes first.
+ *  - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
+ *    conservative and disables hacks that increase code size. It implies the
+ *    options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
+ *    and @ref XXH3_NEON_LANES == 8 if they are not already defined.
+ *  - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
+ *    Performance may cry. For example, the single shot functions just use the
+ *    streaming API.
+ */
+    #define XXH_SIZE_OPT 0
+
+    /*!
+ * @def XXH_FORCE_ALIGN_CHECK
+ * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
+ * and XXH64() only).
+ *
+ * This is an important performance trick for architectures without decent
+ * unaligned memory access performance.
+ *
+ * It checks for input alignment, and when conditions are met, uses a "fast
+ * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
+ * faster_ read speed.
+ *
+ * The check costs one initial branch per hash, which is generally negligible,
+ * but not zero.
+ *
+ * Moreover, it's not useful to generate an additional code path if memory
+ * access uses the same instruction for both aligned and unaligned
+ * addresses (e.g. x86 and aarch64).
+ *
+ * In these cases, the alignment check can be removed by setting this macro to 0.
+ * Then the code will always use unaligned memory access.
+ * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
+ * which are platforms known to offer good unaligned memory accesses performance.
+ *
+ * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
+ *
+ * This option does not affect XXH3 (only XXH32 and XXH64).
+ */
+    #define XXH_FORCE_ALIGN_CHECK 0
+
+    /*!
+ * @def XXH_NO_INLINE_HINTS
+ * @brief When non-zero, sets all functions to `static`.
+ *
+ * By default, xxHash tries to force the compiler to inline almost all internal
+ * functions.
+ *
+ * This can usually improve performance due to reduced jumping and improved
+ * constant folding, but significantly increases the size of the binary which
+ * might not be favorable.
+ *
+ * Additionally, sometimes the forced inlining can be detrimental to performance,
+ * depending on the architecture.
+ *
+ * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
+ * compiler full control on whether to inline or not.
+ *
+ * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
+ * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
+ */
+    #define XXH_NO_INLINE_HINTS 0
+
+    /*!
+ * @def XXH3_INLINE_SECRET
+ * @brief Determines whether to inline the XXH3 withSecret code.
+ *
+ * When the secret size is known, the compiler can improve the performance
+ * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
+ *
+ * However, if the secret size is not known, it doesn't have any benefit. This
+ * happens when xxHash is compiled into a global symbol. Therefore, if
+ * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
+ *
+ * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
+ * that are *sometimes* force inline on -Og, and it is impossible to automatically
+ * detect this optimization level.
+ */
+    #define XXH3_INLINE_SECRET 0
+
+    /*!
+ * @def XXH32_ENDJMP
+ * @brief Whether to use a jump for `XXH32_finalize`.
+ *
+ * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
+ * This is generally preferable for performance,
+ * but depending on exact architecture, a jmp may be preferable.
+ *
+ * This setting is only possibly making a difference for very small inputs.
+ */
+    #define XXH32_ENDJMP 0
+
+    /*!
+ * @internal
+ * @brief Redefines old internal names.
+ *
+ * For compatibility with code that uses xxHash's internals before the names
+ * were changed to improve namespacing. There is no other reason to use this.
+ */
+    #define XXH_OLD_NAMES
+    #undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+
+    /*!
+ * @def XXH_NO_STREAM
+ * @brief Disables the streaming API.
+ *
+ * When xxHash is not inlined and the streaming functions are not used, disabling
+ * the streaming functions can improve code size significantly, especially with
+ * the @ref XXH3_family which tends to make constant folded copies of itself.
+ */
+    #define XXH_NO_STREAM
+    #undef XXH_NO_STREAM /* don't actually */
+#endif                   /* XXH_DOXYGEN */
+/*!
+ * @}
+ */
+
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+                                /* prefer __packed__ structures (method 1) for GCC
+    * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
+    * which for some reason does unaligned loads. */
+    #if defined(__GNUC__) \
+      && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
+        #define XXH_FORCE_MEMORY_ACCESS 1
+    #endif
+#endif
+
+#ifndef XXH_SIZE_OPT
+    /* default to 1 for -Os or -Oz */
+    #if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
+        #define XXH_SIZE_OPT 1
+    #else
+        #define XXH_SIZE_OPT 0
+    #endif
+#endif
+
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+    /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
+    #if XXH_SIZE_OPT >= 1 || defined(__i386) || defined(__x86_64__) || defined(__aarch64__) \
+      || defined(__ARM_FEATURE_UNALIGNED) || defined(_M_IX86) || defined(_M_X64) \
+      || defined(_M_ARM64) || defined(_M_ARM) /* visual */
+        #define XXH_FORCE_ALIGN_CHECK 0
+    #else
+        #define XXH_FORCE_ALIGN_CHECK 1
+    #endif
+#endif
+
+#ifndef XXH_NO_INLINE_HINTS
+    #if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
+        #define XXH_NO_INLINE_HINTS 1
+    #else
+        #define XXH_NO_INLINE_HINTS 0
+    #endif
+#endif
+
+#ifndef XXH3_INLINE_SECRET
+    #if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) || !defined(XXH_INLINE_ALL)
+        #define XXH3_INLINE_SECRET 0
+    #else
+        #define XXH3_INLINE_SECRET 1
+    #endif
+#endif
+
+#ifndef XXH32_ENDJMP
+    /* generally preferable for performance */
+    #define XXH32_ENDJMP 0
+#endif
+
+/*!
+ * @defgroup impl Implementation
+ * @{
+ */
+
+
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+#if defined(XXH_NO_STREAM)
+/* nothing */
+#elif defined(XXH_NO_STDLIB)
+
+/* When requesting to disable any mention of stdlib,
+ * the library loses the ability to invoked malloc / free.
+ * In practice, it means that functions like `XXH*_createState()`
+ * will always fail, and return NULL.
+ * This flag is useful in situations where
+ * xxhash.h is integrated into some kernel, embedded or limited environment
+ * without access to dynamic allocation.
+ */
+
+static XXH_CONSTF void* XXH_malloc(size_t s) {
+    (void) s;
+    return NULL;
+}
+static void XXH_free(void* p) { (void) p; }
+
+#else
+
+    /*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+    #include <stdlib.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#endif /* XXH_NO_STDLIB */
+
+#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size) {
+    return memcpy(dest, src, size);
+}
+
+#include <limits.h> /* ULLONG_MAX */
+
+
+/* *************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER                    /* Visual Studio warning fix */
+    #pragma warning(disable: 4127) /* disable: C4127: conditional expression is constant */
+#endif
+
+#if XXH_NO_INLINE_HINTS /* disable inlining hints */
+    #if defined(__GNUC__) || defined(__clang__)
+        #define XXH_FORCE_INLINE static __attribute__((unused))
+    #else
+        #define XXH_FORCE_INLINE static
+    #endif
+    #define XXH_NO_INLINE static
+/* enable inlining hints */
+#elif defined(__GNUC__) || defined(__clang__)
+    #define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
+    #define XXH_NO_INLINE static __attribute__((noinline))
+#elif defined(_MSC_VER) /* Visual Studio */
+    #define XXH_FORCE_INLINE static __forceinline
+    #define XXH_NO_INLINE static __declspec(noinline)
+#elif defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
+    #define XXH_FORCE_INLINE static inline
+    #define XXH_NO_INLINE static
+#else
+    #define XXH_FORCE_INLINE static
+    #define XXH_NO_INLINE static
+#endif
+
+#if XXH3_INLINE_SECRET
+    #define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
+#else
+    #define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
+#endif
+
+
+/* *************************************
+*  Debug
+***************************************/
+/*!
+ * @ingroup tuning
+ * @def XXH_DEBUGLEVEL
+ * @brief Sets the debugging level.
+ *
+ * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
+ * compiler's command line options. The value must be a number.
+ */
+#ifndef XXH_DEBUGLEVEL
+    #ifdef DEBUGLEVEL /* backwards compat */
+        #define XXH_DEBUGLEVEL DEBUGLEVEL
+    #else
+        #define XXH_DEBUGLEVEL 0
+    #endif
+#endif
+
+#if (XXH_DEBUGLEVEL >= 1)
+    #include <assert.h> /* note: can still be disabled with NDEBUG */
+    #define XXH_ASSERT(c) assert(c)
+#else
+    #if defined(__INTEL_COMPILER)
+        #define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c))
+    #else
+        #define XXH_ASSERT(c) XXH_ASSUME(c)
+    #endif
+#endif
+
+/* note: use after variable declarations */
+#ifndef XXH_STATIC_ASSERT
+    #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
+        #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \
+            do \
+            { \
+                _Static_assert((c), m); \
+            } while (0)
+    #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
+        #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \
+            do \
+            { \
+                static_assert((c), m); \
+            } while (0)
+    #else
+        #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \
+            do \
+            { \
+                struct xxh_sa { \
+                    char x[(c) ? 1 : -1]; \
+                }; \
+            } while (0)
+    #endif
+    #define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c), #c)
+#endif
+
+/*!
+ * @internal
+ * @def XXH_COMPILER_GUARD(var)
+ * @brief Used to prevent unwanted optimizations for @p var.
+ *
+ * It uses an empty GCC inline assembly statement with a register constraint
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
+ * on x86) and marks it as modified.
+ *
+ * This is used in a few places to avoid unwanted autovectorization (e.g.
+ * XXH32_round()). All vectorization we want is explicit via intrinsics,
+ * and _usually_ isn't wanted elsewhere.
+ *
+ * We also use it to prevent unwanted constant folding for AArch64 in
+ * XXH3_initCustomSecret_scalar().
+ */
+#if defined(__GNUC__) || defined(__clang__)
+    #define XXH_COMPILER_GUARD(var) __asm__("" : "+r"(var))
+#else
+    #define XXH_COMPILER_GUARD(var) ((void) 0)
+#endif
+
+/* Specifically for NEON vectors which use the "w" constraint, on
+ * Clang. */
+#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)
+    #define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w"(var))
+#else
+    #define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void) 0)
+#endif
+
+/* *************************************
+*  Basic Types
+***************************************/
+#if !defined(__VMS) \
+  && (defined(__cplusplus) \
+      || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
+    #ifdef _AIX
+        #include <inttypes.h>
+    #else
+        #include <stdint.h>
+    #endif
+typedef uint8_t xxh_u8;
+#else
+typedef unsigned char xxh_u8;
+#endif
+typedef XXH32_hash_t xxh_u32;
+
+#ifdef XXH_OLD_NAMES
+    #warning \
+      "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"
+    #define BYTE xxh_u8
+    #define U8 xxh_u8
+    #define U32 xxh_u32
+#endif
+
+/* ***   Memory access   *** */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_read32(const void* ptr)
+ * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit native endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readBE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit big endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
+ * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
+ * always @ref XXH_alignment::XXH_unaligned.
+ *
+ * @param ptr The pointer to read from.
+ * @param align Whether @p ptr is aligned.
+ * @pre
+ *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
+ *   aligned.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3))
+/*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE32 and XXH_readBE32.
+ */
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2))
+
+/*
+ * Force direct memory access. Only works on CPU which support unaligned memory
+ * access in hardware.
+ */
+static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1))
+
+    /*
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
+ */
+    #ifdef XXH_OLD_NAMES
+typedef union {
+    xxh_u32 u32;
+} __attribute__((packed)) unalign;
+    #endif
+static xxh_u32 XXH_read32(const void* ptr) {
+    typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
+    return *((const xxh_unalign32*) ptr);
+}
+
+#else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ */
+static xxh_u32 XXH_read32(const void* memPtr) {
+    xxh_u32 val;
+    XXH_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ***   Endianness   *** */
+
+/*!
+ * @ingroup tuning
+ * @def XXH_CPU_LITTLE_ENDIAN
+ * @brief Whether the target is little endian.
+ *
+ * Defined to 1 if the target is little endian, or 0 if it is big endian.
+ * It can be defined externally, for example on the compiler command line.
+ *
+ * If it is not defined,
+ * a runtime check (which is usually constant folded) is used instead.
+ *
+ * @note
+ *   This is not necessarily defined to an integer constant.
+ *
+ * @see XXH_isLittleEndian() for the runtime check.
+ */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+    /*
+ * Try to detect endianness automatically, to avoid the nonstandard behavior
+ * in `XXH_isLittleEndian()`
+ */
+    #if defined(_WIN32) /* Windows is always little endian */ \
+      || defined(__LITTLE_ENDIAN__) \
+      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+        #define XXH_CPU_LITTLE_ENDIAN 1
+    #elif defined(__BIG_ENDIAN__) \
+      || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+        #define XXH_CPU_LITTLE_ENDIAN 0
+    #else
+/*!
+ * @internal
+ * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
+ *
+ * Most compilers will constant fold this.
+ */
+static int XXH_isLittleEndian(void) {
+    /*
+     * Portable and well-defined behavior.
+     * Don't use static: it is detrimental to performance.
+     */
+    const union {
+        xxh_u32 u;
+        xxh_u8  c[4];
+    } one = {1};
+    return one.c[0];
+}
+        #define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
+    #endif
+#endif
+
+
+/* ****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#ifdef __has_builtin
+    #define XXH_HAS_BUILTIN(x) __has_builtin(x)
+#else
+    #define XXH_HAS_BUILTIN(x) 0
+#endif
+
+
+/*
+ * C23 and future versions have standard "unreachable()".
+ * Once it has been implemented reliably we can add it as an
+ * additional case:
+ *
+ * ```
+ * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
+ * #  include <stddef.h>
+ * #  ifdef unreachable
+ * #    define XXH_UNREACHABLE() unreachable()
+ * #  endif
+ * #endif
+ * ```
+ *
+ * Note C++23 also has std::unreachable() which can be detected
+ * as follows:
+ * ```
+ * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
+ * #  include <utility>
+ * #  define XXH_UNREACHABLE() std::unreachable()
+ * #endif
+ * ```
+ * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
+ * We don't use that as including `<utility>` in `extern "C"` blocks
+ * doesn't work on GCC12
+ */
+
+#if XXH_HAS_BUILTIN(__builtin_unreachable)
+    #define XXH_UNREACHABLE() __builtin_unreachable()
+
+#elif defined(_MSC_VER)
+    #define XXH_UNREACHABLE() __assume(0)
+
+#else
+    #define XXH_UNREACHABLE()
+#endif
+
+#if XXH_HAS_BUILTIN(__builtin_assume)
+    #define XXH_ASSUME(c) __builtin_assume(c)
+#else
+    #define XXH_ASSUME(c) \
+        if (!(c)) \
+        { \
+            XXH_UNREACHABLE(); \
+        }
+#endif
+
+/*!
+ * @internal
+ * @def XXH_rotl32(x,r)
+ * @brief 32-bit rotate left.
+ *
+ * @param x The 32-bit integer to be rotated.
+ * @param r The number of bits to rotate.
+ * @pre
+ *   @p r > 0 && @p r < 32
+ * @note
+ *   @p x and @p r may be evaluated multiple times.
+ * @return The rotated result.
+ */
+#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
+  && XXH_HAS_BUILTIN(__builtin_rotateleft64)
+    #define XXH_rotl32 __builtin_rotateleft32
+    #define XXH_rotl64 __builtin_rotateleft64
+/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
+#elif defined(_MSC_VER)
+    #define XXH_rotl32(x, r) _rotl(x, r)
+    #define XXH_rotl64(x, r) _rotl64(x, r)
+#else
+    #define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
+    #define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
+#endif
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_swap32(xxh_u32 x)
+ * @brief A 32-bit byteswap.
+ *
+ * @param x The 32-bit integer to byteswap.
+ * @return @p x, byteswapped.
+ */
+#if defined(_MSC_VER) /* Visual Studio */
+    #define XXH_swap32 _byteswap_ulong
+#elif XXH_GCC_VERSION >= 403
+    #define XXH_swap32 __builtin_bswap32
+#else
+static xxh_u32 XXH_swap32(xxh_u32 x) {
+    return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00)
+         | ((x >> 24) & 0x000000ff);
+}
+#endif
+
+
+/* ***************************
+*  Memory reads
+*****************************/
+
+/*!
+ * @internal
+ * @brief Enum to indicate whether a pointer is aligned.
+ */
+typedef enum {
+    XXH_aligned,  /*!< Aligned */
+    XXH_unaligned /*!< Possibly unaligned */
+} XXH_alignment;
+
+/*
+ * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
+ *
+ * This is ideal for older compilers which don't inline memcpy.
+ */
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3))
+
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr) {
+    const xxh_u8* bytePtr = (const xxh_u8*) memPtr;
+    return bytePtr[0] | ((xxh_u32) bytePtr[1] << 8) | ((xxh_u32) bytePtr[2] << 16)
+         | ((xxh_u32) bytePtr[3] << 24);
+}
+
+XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr) {
+    const xxh_u8* bytePtr = (const xxh_u8*) memPtr;
+    return bytePtr[3] | ((xxh_u32) bytePtr[2] << 8) | ((xxh_u32) bytePtr[1] << 16)
+         | ((xxh_u32) bytePtr[0] << 24);
+}
+
+#else
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) {
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+}
+
+static xxh_u32 XXH_readBE32(const void* ptr) {
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+#endif
+
+XXH_FORCE_INLINE xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align) {
+    if (align == XXH_unaligned)
+    {
+        return XXH_readLE32(ptr);
+    }
+    else
+    {
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*) ptr : XXH_swap32(*(const xxh_u32*) ptr);
+    }
+}
+
+
+/* *************************************
+*  Misc
+***************************************/
+/*! @ingroup public */
+XXH_PUBLIC_API unsigned XXH_versionNumber(void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+*  32-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @defgroup XXH32_impl XXH32 implementation
+ * @ingroup impl
+ *
+ * Details on the XXH32 implementation.
+ * @{
+ */
+/* #define instead of static const, to be used as initializers */
+#define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */
+#define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */
+#define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */
+#define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */
+#define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */
+
+#ifdef XXH_OLD_NAMES
+    #define PRIME32_1 XXH_PRIME32_1
+    #define PRIME32_2 XXH_PRIME32_2
+    #define PRIME32_3 XXH_PRIME32_3
+    #define PRIME32_4 XXH_PRIME32_4
+    #define PRIME32_5 XXH_PRIME32_5
+#endif
+
+/*!
+ * @internal
+ * @brief Normal stripe processing routine.
+ *
+ * This shuffles the bits so that any bit from @p input impacts several bits in
+ * @p acc.
+ *
+ * @param acc The accumulator lane.
+ * @param input The stripe of input to mix.
+ * @return The mixed accumulator lane.
+ */
+static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) {
+    acc += input * XXH_PRIME32_2;
+    acc = XXH_rotl32(acc, 13);
+    acc *= XXH_PRIME32_1;
+#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) \
+  && !defined(XXH_ENABLE_AUTOVECTORIZE)
+    /*
+     * UGLY HACK:
+     * A compiler fence is the only thing that prevents GCC and Clang from
+     * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
+     * reason) without globally disabling SSE4.1.
+     *
+     * The reason we want to avoid vectorization is because despite working on
+     * 4 integers at a time, there are multiple factors slowing XXH32 down on
+     * SSE4:
+     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
+     *   newer chips!) making it slightly slower to multiply four integers at
+     *   once compared to four integers independently. Even when pmulld was
+     *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
+     *   just to multiply unless doing a long operation.
+     *
+     * - Four instructions are required to rotate,
+     *      movqda tmp,  v // not required with VEX encoding
+     *      pslld  tmp, 13 // tmp <<= 13
+     *      psrld  v,   19 // x >>= 19
+     *      por    v,  tmp // x |= tmp
+     *   compared to one for scalar:
+     *      roll   v, 13    // reliably fast across the board
+     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
+     *
+     * - Instruction level parallelism is actually more beneficial here because
+     *   the SIMD actually serializes this operation: While v1 is rotating, v2
+     *   can load data, while v3 can multiply. SSE forces them to operate
+     *   together.
+     *
+     * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
+     * the loop. NEON is only faster on the A53, and with the newer cores, it is less
+     * than half the speed.
+     *
+     * Additionally, this is used on WASM SIMD128 because it JITs to the same
+     * SIMD instructions and has the same issue.
+     */
+    XXH_COMPILER_GUARD(acc);
+#endif
+    return acc;
+}
+
+/*!
+ * @internal
+ * @brief Mixes all bits to finalize the hash.
+ *
+ * The final mix ensures that all input bits have a chance to impact any bit in
+ * the output digest, resulting in an unbiased distribution.
+ *
+ * @param hash The hash to avalanche.
+ * @return The avalanched hash.
+ */
+static xxh_u32 XXH32_avalanche(xxh_u32 hash) {
+    hash ^= hash >> 15;
+    hash *= XXH_PRIME32_2;
+    hash ^= hash >> 13;
+    hash *= XXH_PRIME32_3;
+    hash ^= hash >> 16;
+    return hash;
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-15 bytes of @p ptr.
+ *
+ * There may be up to 15 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 16.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash.
+ * @see XXH64_finalize().
+ */
+static XXH_PUREF xxh_u32 XXH32_finalize(xxh_u32       hash,
+                                        const xxh_u8* ptr,
+                                        size_t        len,
+                                        XXH_alignment align) {
+#define XXH_PROCESS1 \
+    do \
+    { \
+        hash += (*ptr++) * XXH_PRIME32_5; \
+        hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
+    } while (0)
+
+#define XXH_PROCESS4 \
+    do \
+    { \
+        hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
+        ptr += 4; \
+        hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
+    } while (0)
+
+    if (ptr == NULL)
+        XXH_ASSERT(len == 0);
+
+    /* Compact rerolled version; generally faster */
+    if (!XXH32_ENDJMP)
+    {
+        len &= 15;
+        while (len >= 4)
+        {
+            XXH_PROCESS4;
+            len -= 4;
+        }
+        while (len > 0)
+        {
+            XXH_PROCESS1;
+            --len;
+        }
+        return XXH32_avalanche(hash);
+    }
+    else
+    {
+        switch (len & 15) /* or switch(bEnd - p) */
+        {
+        case 12 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 8 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 4 :
+            XXH_PROCESS4;
+            return XXH32_avalanche(hash);
+
+        case 13 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 9 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 5 :
+            XXH_PROCESS4;
+            XXH_PROCESS1;
+            return XXH32_avalanche(hash);
+
+        case 14 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 10 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 6 :
+            XXH_PROCESS4;
+            XXH_PROCESS1;
+            XXH_PROCESS1;
+            return XXH32_avalanche(hash);
+
+        case 15 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 11 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 7 :
+            XXH_PROCESS4;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 3 :
+            XXH_PROCESS1;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 2 :
+            XXH_PROCESS1;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 1 :
+            XXH_PROCESS1;
+            XXH_FALLTHROUGH; /* fallthrough */
+        case 0 :
+            return XXH32_avalanche(hash);
+        }
+        XXH_ASSERT(0);
+        return hash; /* reaching this point is deemed impossible */
+    }
+}
+
+#ifdef XXH_OLD_NAMES
+    #define PROCESS1 XXH_PROCESS1
+    #define PROCESS4 XXH_PROCESS4
+#else
+    #undef XXH_PROCESS1
+    #undef XXH_PROCESS4
+#endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH32().
+ *
+ * @param input , len , seed Directly passed from @ref XXH32().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u32 XXH32_endian_align(const xxh_u8* input,
+                                                      size_t        len,
+                                                      xxh_u32       seed,
+                                                      XXH_alignment align) {
+    xxh_u32 h32;
+
+    if (input == NULL)
+        XXH_ASSERT(len == 0);
+
+    if (len >= 16)
+    {
+        const xxh_u8* const bEnd  = input + len;
+        const xxh_u8* const limit = bEnd - 15;
+        xxh_u32             v1    = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+        xxh_u32             v2    = seed + XXH_PRIME32_2;
+        xxh_u32             v3    = seed + 0;
+        xxh_u32             v4    = seed - XXH_PRIME32_1;
+
+        do
+        {
+            v1 = XXH32_round(v1, XXH_get32bits(input));
+            input += 4;
+            v2 = XXH32_round(v2, XXH_get32bits(input));
+            input += 4;
+            v3 = XXH32_round(v3, XXH_get32bits(input));
+            input += 4;
+            v4 = XXH32_round(v4, XXH_get32bits(input));
+            input += 4;
+        } while (input < limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32 = seed + XXH_PRIME32_5;
+    }
+
+    h32 += (xxh_u32) len;
+
+    return XXH32_finalize(h32, input, len & 15, align);
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32(const void* input, size_t len, XXH32_hash_t seed) {
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, (const xxh_u8*) input, len);
+    return XXH32_digest(&state);
+#else
+    if (XXH_FORCE_ALIGN_CHECK)
+    {
+        if ((((size_t) input) & 3) == 0)
+        { /* Input is 4-bytes aligned, leverage the speed benefit */
+            return XXH32_endian_align((const xxh_u8*) input, len, seed, XXH_aligned);
+        }
+    }
+
+    return XXH32_endian_align((const xxh_u8*) input, len, seed, XXH_unaligned);
+#endif
+}
+
+
+/*******   Hash streaming   *******/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) {
+    return (XXH32_state_t*) XXH_malloc(sizeof(XXH32_state_t));
+}
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) {
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) {
+    XXH_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) {
+    XXH_ASSERT(statePtr != NULL);
+    memset(statePtr, 0, sizeof(*statePtr));
+    statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
+    statePtr->v[1] = seed + XXH_PRIME32_2;
+    statePtr->v[2] = seed + 0;
+    statePtr->v[3] = seed - XXH_PRIME32_1;
+    return XXH_OK;
+}
+
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t* state, const void* input, size_t len) {
+    if (input == NULL)
+    {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    {
+        const xxh_u8*       p    = (const xxh_u8*) input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len_32 += (XXH32_hash_t) len;
+        state->large_len |= (XXH32_hash_t) ((len >= 16) | (state->total_len_32 >= 16));
+
+        if (state->memsize + len < 16)
+        { /* fill in tmp buffer */
+            XXH_memcpy((xxh_u8*) (state->mem32) + state->memsize, input, len);
+            state->memsize += (XXH32_hash_t) len;
+            return XXH_OK;
+        }
+
+        if (state->memsize)
+        { /* some data left from previous update */
+            XXH_memcpy((xxh_u8*) (state->mem32) + state->memsize, input, 16 - state->memsize);
+            {
+                const xxh_u32* p32 = state->mem32;
+                state->v[0]        = XXH32_round(state->v[0], XXH_readLE32(p32));
+                p32++;
+                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32));
+                p32++;
+                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32));
+                p32++;
+                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
+            }
+            p += 16 - state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p <= bEnd - 16)
+        {
+            const xxh_u8* const limit = bEnd - 16;
+
+            do
+            {
+                state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p));
+                p += 4;
+                state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p));
+                p += 4;
+                state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p));
+                p += 4;
+                state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p));
+                p += 4;
+            } while (p <= limit);
+        }
+
+        if (p < bEnd)
+        {
+            XXH_memcpy(state->mem32, p, (size_t) (bEnd - p));
+            state->memsize = (unsigned) (bEnd - p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state) {
+    xxh_u32 h32;
+
+    if (state->large_len)
+    {
+        h32 = XXH_rotl32(state->v[0], 1) + XXH_rotl32(state->v[1], 7) + XXH_rotl32(state->v[2], 12)
+            + XXH_rotl32(state->v[3], 18);
+    }
+    else
+    {
+        h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
+    }
+
+    h32 += state->total_len_32;
+
+    return XXH32_finalize(h32, (const xxh_u8*) state->mem32, state->memsize, XXH_aligned);
+}
+#endif /* !XXH_NO_STREAM */
+
+/*******   Canonical representation   *******/
+
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) {
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN)
+        hash = XXH_swap32(hash);
+    XXH_memcpy(dst, &hash, sizeof(*dst));
+}
+/*! @ingroup XXH32_family */
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) {
+    return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+*  64-bit hash functions
+*********************************************************************/
+/*!
+ * @}
+ * @ingroup impl
+ * @{
+ */
+/*******   Memory access   *******/
+
+typedef XXH64_hash_t xxh_u64;
+
+    #ifdef XXH_OLD_NAMES
+        #define U64 xxh_u64
+    #endif
+
+    #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3))
+    /*
+ * Manual byteshift. Best for old compilers which don't inline memcpy.
+ * We actually directly use XXH_readLE64 and XXH_readBE64.
+ */
+    #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static xxh_u64 XXH_read64(const void* memPtr) { return *(const xxh_u64*) memPtr; }
+
+    #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1))
+
+        /*
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
+ */
+        #ifdef XXH_OLD_NAMES
+typedef union {
+    xxh_u32 u32;
+    xxh_u64 u64;
+} __attribute__((packed)) unalign64;
+        #endif
+static xxh_u64 XXH_read64(const void* ptr) {
+    typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
+    return *((const xxh_unalign64*) ptr);
+}
+
+    #else
+
+/*
+ * Portable and safe solution. Generally efficient.
+ * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
+ */
+static xxh_u64 XXH_read64(const void* memPtr) {
+    xxh_u64 val;
+    XXH_memcpy(&val, memPtr, sizeof(val));
+    return val;
+}
+
+    #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+    #if defined(_MSC_VER) /* Visual Studio */
+        #define XXH_swap64 _byteswap_uint64
+    #elif XXH_GCC_VERSION >= 403
+        #define XXH_swap64 __builtin_bswap64
+    #else
+static xxh_u64 XXH_swap64(xxh_u64 x) {
+    return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL)
+         | ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL)
+         | ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL)
+         | ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL);
+}
+    #endif
+
+
+    /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
+    #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3))
+
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr) {
+    const xxh_u8* bytePtr = (const xxh_u8*) memPtr;
+    return bytePtr[0] | ((xxh_u64) bytePtr[1] << 8) | ((xxh_u64) bytePtr[2] << 16)
+         | ((xxh_u64) bytePtr[3] << 24) | ((xxh_u64) bytePtr[4] << 32)
+         | ((xxh_u64) bytePtr[5] << 40) | ((xxh_u64) bytePtr[6] << 48)
+         | ((xxh_u64) bytePtr[7] << 56);
+}
+
+XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr) {
+    const xxh_u8* bytePtr = (const xxh_u8*) memPtr;
+    return bytePtr[7] | ((xxh_u64) bytePtr[6] << 8) | ((xxh_u64) bytePtr[5] << 16)
+         | ((xxh_u64) bytePtr[4] << 24) | ((xxh_u64) bytePtr[3] << 32)
+         | ((xxh_u64) bytePtr[2] << 40) | ((xxh_u64) bytePtr[1] << 48)
+         | ((xxh_u64) bytePtr[0] << 56);
+}
+
+    #else
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) {
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+}
+
+static xxh_u64 XXH_readBE64(const void* ptr) {
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+    #endif
+
+XXH_FORCE_INLINE xxh_u64 XXH_readLE64_align(const void* ptr, XXH_alignment align) {
+    if (align == XXH_unaligned)
+        return XXH_readLE64(ptr);
+    else
+        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*) ptr : XXH_swap64(*(const xxh_u64*) ptr);
+}
+
+
+    /*******   xxh64   *******/
+    /*!
+ * @}
+ * @defgroup XXH64_impl XXH64 implementation
+ * @ingroup impl
+ *
+ * Details on the XXH64 implementation.
+ * @{
+ */
+    /* #define rather that static const, to be used as initializers */
+    #define XXH_PRIME64_1 \
+        0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
+    #define XXH_PRIME64_2 \
+        0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
+    #define XXH_PRIME64_3 \
+        0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
+    #define XXH_PRIME64_4 \
+        0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
+    #define XXH_PRIME64_5 \
+        0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
+
+    #ifdef XXH_OLD_NAMES
+        #define PRIME64_1 XXH_PRIME64_1
+        #define PRIME64_2 XXH_PRIME64_2
+        #define PRIME64_3 XXH_PRIME64_3
+        #define PRIME64_4 XXH_PRIME64_4
+        #define PRIME64_5 XXH_PRIME64_5
+    #endif
+
+/*! @copydoc XXH32_round */
+static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) {
+    acc += input * XXH_PRIME64_2;
+    acc = XXH_rotl64(acc, 31);
+    acc *= XXH_PRIME64_1;
+    #if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+    /*
+     * DISABLE AUTOVECTORIZATION:
+     * A compiler fence is used to prevent GCC and Clang from
+     * autovectorizing the XXH64 loop (pragmas and attributes don't work for some
+     * reason) without globally disabling AVX512.
+     *
+     * Autovectorization of XXH64 tends to be detrimental,
+     * though the exact outcome may change depending on exact cpu and compiler version.
+     * For information, it has been reported as detrimental for Skylake-X,
+     * but possibly beneficial for Zen4.
+     *
+     * The default is to disable auto-vectorization,
+     * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.
+     */
+    XXH_COMPILER_GUARD(acc);
+    #endif
+    return acc;
+}
+
+static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) {
+    val = XXH64_round(0, val);
+    acc ^= val;
+    acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
+    return acc;
+}
+
+/*! @copydoc XXH32_avalanche */
+static xxh_u64 XXH64_avalanche(xxh_u64 hash) {
+    hash ^= hash >> 33;
+    hash *= XXH_PRIME64_2;
+    hash ^= hash >> 29;
+    hash *= XXH_PRIME64_3;
+    hash ^= hash >> 32;
+    return hash;
+}
+
+
+    #define XXH_get64bits(p) XXH_readLE64_align(p, align)
+
+/*!
+ * @internal
+ * @brief Processes the last 0-31 bytes of @p ptr.
+ *
+ * There may be up to 31 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 32.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash
+ * @see XXH32_finalize().
+ */
+static XXH_PUREF xxh_u64 XXH64_finalize(xxh_u64       hash,
+                                        const xxh_u8* ptr,
+                                        size_t        len,
+                                        XXH_alignment align) {
+    if (ptr == NULL)
+        XXH_ASSERT(len == 0);
+    len &= 31;
+    while (len >= 8)
+    {
+        xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
+        ptr += 8;
+        hash ^= k1;
+        hash = XXH_rotl64(hash, 27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+        len -= 8;
+    }
+    if (len >= 4)
+    {
+        hash ^= (xxh_u64) (XXH_get32bits(ptr)) * XXH_PRIME64_1;
+        ptr += 4;
+        hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+        len -= 4;
+    }
+    while (len > 0)
+    {
+        hash ^= (*ptr++) * XXH_PRIME64_5;
+        hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
+        --len;
+    }
+    return XXH64_avalanche(hash);
+}
+
+    #ifdef XXH_OLD_NAMES
+        #define PROCESS1_64 XXH_PROCESS1_64
+        #define PROCESS4_64 XXH_PROCESS4_64
+        #define PROCESS8_64 XXH_PROCESS8_64
+    #else
+        #undef XXH_PROCESS1_64
+        #undef XXH_PROCESS4_64
+        #undef XXH_PROCESS8_64
+    #endif
+
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH64().
+ *
+ * @param input , len , seed Directly passed from @ref XXH64().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u64 XXH64_endian_align(const xxh_u8* input,
+                                                      size_t        len,
+                                                      xxh_u64       seed,
+                                                      XXH_alignment align) {
+    xxh_u64 h64;
+    if (input == NULL)
+        XXH_ASSERT(len == 0);
+
+    if (len >= 32)
+    {
+        const xxh_u8* const bEnd  = input + len;
+        const xxh_u8* const limit = bEnd - 31;
+        xxh_u64             v1    = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+        xxh_u64             v2    = seed + XXH_PRIME64_2;
+        xxh_u64             v3    = seed + 0;
+        xxh_u64             v4    = seed - XXH_PRIME64_1;
+
+        do
+        {
+            v1 = XXH64_round(v1, XXH_get64bits(input));
+            input += 8;
+            v2 = XXH64_round(v2, XXH_get64bits(input));
+            input += 8;
+            v3 = XXH64_round(v3, XXH_get64bits(input));
+            input += 8;
+            v4 = XXH64_round(v4, XXH_get64bits(input));
+            input += 8;
+        } while (input < limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    }
+    else
+    {
+        h64 = seed + XXH_PRIME64_5;
+    }
+
+    h64 += (xxh_u64) len;
+
+    return XXH64_finalize(h64, input, len, align);
+}
+
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) {
+    #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, (const xxh_u8*) input, len);
+    return XXH64_digest(&state);
+    #else
+    if (XXH_FORCE_ALIGN_CHECK)
+    {
+        if ((((size_t) input) & 7) == 0)
+        { /* Input is aligned, let's leverage the speed advantage */
+            return XXH64_endian_align((const xxh_u8*) input, len, seed, XXH_aligned);
+        }
+    }
+
+    return XXH64_endian_align((const xxh_u8*) input, len, seed, XXH_unaligned);
+
+    #endif
+}
+
+    /*******   Hash Streaming   *******/
+    #ifndef XXH_NO_STREAM
+/*! @ingroup XXH64_family*/
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) {
+    return (XXH64_state_t*) XXH_malloc(sizeof(XXH64_state_t));
+}
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) {
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState,
+                                    const XXH64_state_t*        srcState) {
+    XXH_memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed) {
+    XXH_ASSERT(statePtr != NULL);
+    memset(statePtr, 0, sizeof(*statePtr));
+    statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
+    statePtr->v[1] = seed + XXH_PRIME64_2;
+    statePtr->v[2] = seed + 0;
+    statePtr->v[3] = seed - XXH_PRIME64_1;
+    return XXH_OK;
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH_NOESCAPE XXH64_state_t* state,
+                                          XXH_NOESCAPE const void*    input,
+                                          size_t                      len) {
+    if (input == NULL)
+    {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    {
+        const xxh_u8*       p    = (const xxh_u8*) input;
+        const xxh_u8* const bEnd = p + len;
+
+        state->total_len += len;
+
+        if (state->memsize + len < 32)
+        { /* fill in tmp buffer */
+            XXH_memcpy(((xxh_u8*) state->mem64) + state->memsize, input, len);
+            state->memsize += (xxh_u32) len;
+            return XXH_OK;
+        }
+
+        if (state->memsize)
+        { /* tmp buffer is full */
+            XXH_memcpy(((xxh_u8*) state->mem64) + state->memsize, input, 32 - state->memsize);
+            state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64 + 0));
+            state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64 + 1));
+            state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64 + 2));
+            state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64 + 3));
+            p += 32 - state->memsize;
+            state->memsize = 0;
+        }
+
+        if (p + 32 <= bEnd)
+        {
+            const xxh_u8* const limit = bEnd - 32;
+
+            do
+            {
+                state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p));
+                p += 8;
+                state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p));
+                p += 8;
+                state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p));
+                p += 8;
+                state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p));
+                p += 8;
+            } while (p <= limit);
+        }
+
+        if (p < bEnd)
+        {
+            XXH_memcpy(state->mem64, p, (size_t) (bEnd - p));
+            state->memsize = (unsigned) (bEnd - p);
+        }
+    }
+
+    return XXH_OK;
+}
+
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state) {
+    xxh_u64 h64;
+
+    if (state->total_len >= 32)
+    {
+        h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12)
+            + XXH_rotl64(state->v[3], 18);
+        h64 = XXH64_mergeRound(h64, state->v[0]);
+        h64 = XXH64_mergeRound(h64, state->v[1]);
+        h64 = XXH64_mergeRound(h64, state->v[2]);
+        h64 = XXH64_mergeRound(h64, state->v[3]);
+    }
+    else
+    {
+        h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
+    }
+
+    h64 += (xxh_u64) state->total_len;
+
+    return XXH64_finalize(h64, (const xxh_u8*) state->mem64, (size_t) state->total_len,
+                          XXH_aligned);
+}
+    #endif /* !XXH_NO_STREAM */
+
+/******* Canonical representation   *******/
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst,
+                                            XXH64_hash_t                    hash) {
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN)
+        hash = XXH_swap64(hash);
+    XXH_memcpy(dst, &hash, sizeof(*dst));
+}
+
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src) {
+    return XXH_readBE64(src);
+}
+
+    #ifndef XXH_NO_XXH3
+
+        /* *********************************************************************
+*  XXH3
+*  New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+        /*!
+ * @}
+ * @defgroup XXH3_impl XXH3 implementation
+ * @ingroup impl
+ * @{
+ */
+
+        /* ===   Compiler specifics   === */
+
+        #if ((defined(sun) || defined(__sun)) \
+             && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
+            #define XXH_RESTRICT                                       /* disable */
+        #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
+            #define XXH_RESTRICT restrict
+        #elif (defined(__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
+          || (defined(__clang__)) || (defined(_MSC_VER) && (_MSC_VER >= 1400)) \
+          || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
+            /*
+ * There are a LOT more compilers that recognize __restrict but this
+ * covers the major ones.
+ */
+            #define XXH_RESTRICT __restrict
+        #else
+            #define XXH_RESTRICT /* disable */
+        #endif
+
+        #if (defined(__GNUC__) && (__GNUC__ >= 3)) \
+          || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
+            #define XXH_likely(x) __builtin_expect(x, 1)
+            #define XXH_unlikely(x) __builtin_expect(x, 0)
+        #else
+            #define XXH_likely(x) (x)
+            #define XXH_unlikely(x) (x)
+        #endif
+
+        #ifndef XXH_HAS_INCLUDE
+            #ifdef __has_include
+                /*
+ * Not defined as XXH_HAS_INCLUDE(x) (function-like) because
+ * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)
+ */
+                #define XXH_HAS_INCLUDE __has_include
+            #else
+                #define XXH_HAS_INCLUDE(x) 0
+            #endif
+        #endif
+
+        #if defined(__GNUC__) || defined(__clang__)
+            #if defined(__ARM_FEATURE_SVE)
+                #include <arm_sve.h>
+            #endif
+            #if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(_M_ARM) && _M_ARM >= 7) \
+              || defined(_M_ARM64) || defined(_M_ARM64EC) \
+              || (defined(__wasm_simd128__) \
+                  && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */
+                #define inline __inline__           /* circumvent a clang bug */
+                #include <arm_neon.h>
+                #undef inline
+            #elif defined(__AVX2__)
+                #include <immintrin.h>
+            #elif defined(__SSE2__)
+                #include <emmintrin.h>
+            #endif
+        #endif
+
+        #if defined(_MSC_VER)
+            #include <intrin.h>
+        #endif
+
+        /*
+ * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
+ * remaining a true 64-bit/128-bit hash function.
+ *
+ * This is done by prioritizing a subset of 64-bit operations that can be
+ * emulated without too many steps on the average 32-bit machine.
+ *
+ * For example, these two lines seem similar, and run equally fast on 64-bit:
+ *
+ *   xxh_u64 x;
+ *   x ^= (x >> 47); // good
+ *   x ^= (x >> 13); // bad
+ *
+ * However, to a 32-bit machine, there is a major difference.
+ *
+ * x ^= (x >> 47) looks like this:
+ *
+ *   x.lo ^= (x.hi >> (47 - 32));
+ *
+ * while x ^= (x >> 13) looks like this:
+ *
+ *   // note: funnel shifts are not usually cheap.
+ *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
+ *   x.hi ^= (x.hi >> 13);
+ *
+ * The first one is significantly faster than the second, simply because the
+ * shift is larger than 32. This means:
+ *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
+ *    32 bits in the shift.
+ *  - The shift result will always fit in the lower 32 bits, and therefore,
+ *    we can ignore the upper 32 bits in the xor.
+ *
+ * Thanks to this optimization, XXH3 only requires these features to be efficient:
+ *
+ *  - Usable unaligned access
+ *  - A 32-bit or 64-bit ALU
+ *      - If 32-bit, a decent ADC instruction
+ *  - A 32 or 64-bit multiply with a 64-bit result
+ *  - For the 128-bit variant, a decent byteswap helps short inputs.
+ *
+ * The first two are already required by XXH32, and almost all 32-bit and 64-bit
+ * platforms which can run XXH32 can run XXH3 efficiently.
+ *
+ * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
+ * notable exception.
+ *
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
+ *
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
+ * Lo registers, and this shuffling results in thousands more MOVs than A32.
+ *
+ * A32 and T32 don't have this limitation. They can access all 14 registers,
+ * do a 32->64 multiply with UMULL, and the flexible operand allowing free
+ * shifts is helpful, too.
+ *
+ * Therefore, we do a quick sanity check.
+ *
+ * If compiling Thumb-1 for a target which supports ARM instructions, we will
+ * emit a warning, as it is not a "sane" platform to compile for.
+ *
+ * Usually, if this happens, it is because of an accident and you probably need
+ * to specify -march, as you likely meant to compile for a newer architecture.
+ *
+ * Credit: large sections of the vectorial and asm source code paths
+ *         have been contributed by @easyaspi314
+ */
+        #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
+            #warning "XXH3 is highly inefficient without ARM or Thumb-2."
+        #endif
+
+        /* ==========================================
+ * Vectorization detection
+ * ========================================== */
+
+        #ifdef XXH_DOXYGEN
+            /*!
+ * @ingroup tuning
+ * @brief Overrides the vectorization implementation chosen for XXH3.
+ *
+ * Can be defined to 0 to disable SIMD or any of the values mentioned in
+ * @ref XXH_VECTOR_TYPE.
+ *
+ * If this is not defined, it uses predefined macros to determine the best
+ * implementation.
+ */
+            #define XXH_VECTOR XXH_SCALAR
+/*!
+ * @ingroup tuning
+ * @brief Possible values for @ref XXH_VECTOR.
+ *
+ * Note that these are actually implemented as macros.
+ *
+ * If this is not defined, it is detected automatically.
+ * internal macro XXH_X86DISPATCH overrides this.
+ */
+enum XXH_VECTOR_TYPE /* fake enum */ {
+    XXH_SCALAR = 0, /*!< Portable scalar version */
+    XXH_SSE2   = 1, /*!<
+                      * SSE2 for Pentium 4, Opteron, all x86_64.
+                      *
+                      * @note SSE2 is also guaranteed on Windows 10, macOS, and
+                      * Android x86.
+                      */
+    XXH_AVX2   = 2, /*!< AVX2 for Haswell and Bulldozer */
+    XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */
+    XXH_NEON   = 4, /*!<
+                       * NEON for most ARMv7-A, all AArch64, and WASM SIMD128
+                       * via the SIMDeverywhere polyfill provided with the
+                       * Emscripten SDK.
+                       */
+    XXH_VSX    = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+    XXH_SVE    = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
+};
+            /*!
+ * @ingroup tuning
+ * @brief Selects the minimum alignment for XXH3's accumulators.
+ *
+ * When using SIMD, this should match the alignment required for said vector
+ * type, so, for example, 32 for AVX2.
+ *
+ * Default: Auto detected.
+ */
+            #define XXH_ACC_ALIGN 8
+        #endif
+
+        /* Actual definition */
+        #ifndef XXH_DOXYGEN
+            #define XXH_SCALAR 0
+            #define XXH_SSE2 1
+            #define XXH_AVX2 2
+            #define XXH_AVX512 3
+            #define XXH_NEON 4
+            #define XXH_VSX 5
+            #define XXH_SVE 6
+        #endif
+
+        #ifndef XXH_VECTOR /* can be defined on command line */
+            #if defined(__ARM_FEATURE_SVE)
+                #define XXH_VECTOR XXH_SVE
+            #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)                   /* gcc */ \
+                   || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
+                   || (defined(__wasm_simd128__) \
+                       && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \
+                   ) \
+              && (defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
+                  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+                #define XXH_VECTOR XXH_NEON
+            #elif defined(__AVX512F__)
+                #define XXH_VECTOR XXH_AVX512
+            #elif defined(__AVX2__)
+                #define XXH_VECTOR XXH_AVX2
+            #elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) \
+              || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+                #define XXH_VECTOR XXH_SSE2
+            #elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
+              || (defined(__s390x__) && defined(__VEC__)) && defined(__GNUC__) /* TODO: IBM XL */
+                #define XXH_VECTOR XXH_VSX
+            #else
+                #define XXH_VECTOR XXH_SCALAR
+            #endif
+        #endif
+
+        /* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
+        #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
+            #ifdef _MSC_VER
+                #pragma warning(once: 4606)
+            #else
+                #warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
+            #endif
+            #undef XXH_VECTOR
+            #define XXH_VECTOR XXH_SCALAR
+        #endif
+
+        /*
+ * Controls the alignment of the accumulator,
+ * for compatibility with aligned vector loads, which are usually faster.
+ */
+        #ifndef XXH_ACC_ALIGN
+            #if defined(XXH_X86DISPATCH)
+                #define XXH_ACC_ALIGN 64   /* for compatibility with avx512 */
+            #elif XXH_VECTOR == XXH_SCALAR /* scalar */
+                #define XXH_ACC_ALIGN 8
+            #elif XXH_VECTOR == XXH_SSE2 /* sse2 */
+                #define XXH_ACC_ALIGN 16
+            #elif XXH_VECTOR == XXH_AVX2 /* avx2 */
+                #define XXH_ACC_ALIGN 32
+            #elif XXH_VECTOR == XXH_NEON /* neon */
+                #define XXH_ACC_ALIGN 16
+            #elif XXH_VECTOR == XXH_VSX /* vsx */
+                #define XXH_ACC_ALIGN 16
+            #elif XXH_VECTOR == XXH_AVX512 /* avx512 */
+                #define XXH_ACC_ALIGN 64
+            #elif XXH_VECTOR == XXH_SVE /* sve */
+                #define XXH_ACC_ALIGN 64
+            #endif
+        #endif
+
+        #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 || XXH_VECTOR == XXH_AVX2 \
+          || XXH_VECTOR == XXH_AVX512
+            #define XXH_SEC_ALIGN XXH_ACC_ALIGN
+        #elif XXH_VECTOR == XXH_SVE
+            #define XXH_SEC_ALIGN XXH_ACC_ALIGN
+        #else
+            #define XXH_SEC_ALIGN 8
+        #endif
+
+        #if defined(__GNUC__) || defined(__clang__)
+            #define XXH_ALIASING __attribute__((may_alias))
+        #else
+            #define XXH_ALIASING /* nothing */
+        #endif
+
+        /*
+ * UGLY HACK:
+ * GCC usually generates the best code with -O3 for xxHash.
+ *
+ * However, when targeting AVX2, it is overzealous in its unrolling resulting
+ * in code roughly 3/4 the speed of Clang.
+ *
+ * There are other issues, such as GCC splitting _mm256_loadu_si256 into
+ * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
+ * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
+ *
+ * That is why when compiling the AVX2 version, it is recommended to use either
+ *   -O2 -mavx2 -march=haswell
+ * or
+ *   -O2 -mavx2 -mno-avx256-split-unaligned-load
+ * for decent performance, or to use Clang instead.
+ *
+ * Fortunately, we can control the first one with a pragma that forces GCC into
+ * -O2, but the other one we can't control without "failed to inline always
+ * inline function due to target mismatch" warnings.
+ */
+        #if XXH_VECTOR == XXH_AVX2                      /* AVX2 */ \
+          && defined(__GNUC__) && !defined(__clang__)   /* GCC, not Clang */ \
+          && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
+            #pragma GCC push_options
+            #pragma GCC optimize("-O2")
+        #endif
+
+        #if XXH_VECTOR == XXH_NEON
+
+/*
+ * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
+ * optimizes out the entire hashLong loop because of the aliasing violation.
+ *
+ * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
+ * so the only option is to mark it as aliasing.
+ */
+typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
+
+            /*!
+ * @internal
+ * @brief `vld1q_u64` but faster and alignment-safe.
+ *
+ * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
+ * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
+ *
+ * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
+ * prohibits load-store optimizations. Therefore, a direct dereference is used.
+ *
+ * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
+ * unaligned load.
+ */
+            #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
+{
+    return *(xxh_aliasing_uint64x2_t const*) ptr;
+}
+            #else
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) {
+    return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*) ptr));
+}
+            #endif
+
+            /*!
+ * @internal
+ * @brief `vmlal_u32` on low and high halves of a vector.
+ *
+ * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
+ * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
+ * with `vmlal_u32`.
+ */
+            #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
+XXH_FORCE_INLINE uint64x2_t XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) {
+    /* Inline assembly is the only way */
+    __asm__("umlal   %0.2d, %1.2s, %2.2s" : "+w"(acc) : "w"(lhs), "w"(rhs));
+    return acc;
+}
+XXH_FORCE_INLINE uint64x2_t XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) {
+    /* This intrinsic works as expected */
+    return vmlal_high_u32(acc, lhs, rhs);
+}
+            #else
+/* Portable intrinsic versions */
+XXH_FORCE_INLINE uint64x2_t XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) {
+    return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
+}
+/*! @copydoc XXH_vmlal_low_u32
+ * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
+XXH_FORCE_INLINE uint64x2_t XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) {
+    return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
+}
+            #endif
+
+            /*!
+ * @ingroup tuning
+ * @brief Controls the NEON to scalar ratio for XXH3
+ *
+ * This can be set to 2, 4, 6, or 8.
+ *
+ * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
+ *
+ * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
+ * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
+ * bandwidth.
+ *
+ * This is even more noticeable on the more advanced cores like the Cortex-A76 which
+ * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
+ *
+ * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
+ * and 2 scalar lanes, which is chosen by default.
+ *
+ * This does not apply to Apple processors or 32-bit processors, which run better with
+ * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
+ *
+ * This change benefits CPUs with large micro-op buffers without negatively affecting
+ * most other CPUs:
+ *
+ *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
+ *  |:----------------------|:--------------------|----------:|-----------:|------:|
+ *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
+ *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
+ *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
+ *  | Apple M1              | 4 NEON/8 micro-ops  | 37.3 GB/s |  36.1 GB/s |  ~-3% |
+ *
+ * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
+ *
+ * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning
+ * it effectively becomes worse 4.
+ *
+ * @see XXH3_accumulate_512_neon()
+ */
+            #ifndef XXH3_NEON_LANES
+                #if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) \
+                     || defined(_M_ARM64EC)) \
+                  && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
+                    #define XXH3_NEON_LANES 6
+                #else
+                    #define XXH3_NEON_LANES XXH_ACC_NB
+                #endif
+            #endif
+        #endif /* XXH_VECTOR == XXH_NEON */
+
+        /*
+ * VSX and Z Vector helpers.
+ *
+ * This is very messy, and any pull requests to clean this up are welcome.
+ *
+ * There are a lot of problems with supporting VSX and s390x, due to
+ * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
+ */
+        #if XXH_VECTOR == XXH_VSX
+            /* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
+ * and `pixel`. This is a problem for obvious reasons.
+ *
+ * These keywords are unnecessary; the spec literally says they are
+ * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
+ * after including the header.
+ *
+ * We use pragma push_macro/pop_macro to keep the namespace clean. */
+            #pragma push_macro("bool")
+            #pragma push_macro("vector")
+            #pragma push_macro("pixel")
+            /* silence potential macro redefined warnings */
+            #undef bool
+            #undef vector
+            #undef pixel
+
+            #if defined(__s390x__)
+                #include <s390intrin.h>
+            #else
+                #include <altivec.h>
+            #endif
+
+            /* Restore the original macro values, if applicable. */
+            #pragma pop_macro("pixel")
+            #pragma pop_macro("vector")
+            #pragma pop_macro("bool")
+
+typedef __vector unsigned long long xxh_u64x2;
+typedef __vector unsigned char      xxh_u8x16;
+typedef __vector unsigned           xxh_u32x4;
+
+/*
+ * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
+ */
+typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
+
+            #ifndef XXH_VSX_BE
+                #if defined(__BIG_ENDIAN__) \
+                  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+                    #define XXH_VSX_BE 1
+                #elif defined(__VEC_ELEMENT_REG_ORDER__) \
+                  && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+                    #warning "-maltivec=be is not recommended. Please use native endianness."
+                    #define XXH_VSX_BE 1
+                #else
+                    #define XXH_VSX_BE 0
+                #endif
+            #endif /* !defined(XXH_VSX_BE) */
+
+            #if XXH_VSX_BE
+                #if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
+                    #define XXH_vec_revb vec_revb
+                #else
+/*!
+ * A polyfill for POWER9's vec_revb().
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) {
+    xxh_u8x16 const vByteSwap = {0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+                                 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08};
+    return vec_perm(val, val, vByteSwap);
+}
+                #endif
+            #endif /* XXH_VSX_BE */
+
+/*!
+ * Performs an unaligned vector load and byte swaps it on big endian.
+ */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void* ptr) {
+    xxh_u64x2 ret;
+    XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
+            #if XXH_VSX_BE
+    ret = XXH_vec_revb(ret);
+            #endif
+    return ret;
+}
+
+            /*
+ * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
+ *
+ * These intrinsics weren't added until GCC 8, despite existing for a while,
+ * and they are endian dependent. Also, their meaning swap depending on version.
+ * */
+            #if defined(__s390x__)
+                /* s390x is always big endian, no issue on this platform */
+                #define XXH_vec_mulo vec_mulo
+                #define XXH_vec_mule vec_mule
+            #elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) \
+              && !defined(__ibmxl__)
+                /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+                /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
+                #define XXH_vec_mulo __builtin_altivec_vmulouw
+                #define XXH_vec_mule __builtin_altivec_vmuleuw
+            #else
+/* gcc needs inline assembly */
+/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) {
+    xxh_u64x2 result;
+    __asm__("vmulouw %0, %1, %2" : "=v"(result) : "v"(a), "v"(b));
+    return result;
+}
+XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) {
+    xxh_u64x2 result;
+    __asm__("vmuleuw %0, %1, %2" : "=v"(result) : "v"(a), "v"(b));
+    return result;
+}
+            #endif /* XXH_vec_mulo, XXH_vec_mule */
+        #endif     /* XXH_VECTOR == XXH_VSX */
+
+        #if XXH_VECTOR == XXH_SVE
+            #define ACCRND(acc, offset) \
+                do \
+                { \
+                    svuint64_t input_vec  = svld1_u64(mask, xinput + offset); \
+                    svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
+                    svuint64_t mixed      = sveor_u64_x(mask, secret_vec, input_vec); \
+                    svuint64_t swapped    = svtbl_u64(input_vec, kSwap); \
+                    svuint64_t mixed_lo   = svextw_u64_x(mask, mixed); \
+                    svuint64_t mixed_hi   = svlsr_n_u64_x(mask, mixed, 32); \
+                    svuint64_t mul        = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
+                    acc                   = svadd_u64_x(mask, acc, mul); \
+                } while (0)
+        #endif /* XXH_VECTOR == XXH_SVE */
+
+        /* prefetch
+ * can be disabled, by declaring XXH_NO_PREFETCH build macro */
+        #if defined(XXH_NO_PREFETCH)
+            #define XXH_PREFETCH(ptr) (void) (ptr) /* disabled */
+        #else
+            #if XXH_SIZE_OPT >= 1
+                #define XXH_PREFETCH(ptr) (void) (ptr)
+            #elif defined(_MSC_VER) \
+              && (defined(_M_X64) \
+                  || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
+                #include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+                #define XXH_PREFETCH(ptr) _mm_prefetch((const char*) (ptr), _MM_HINT_T0)
+            #elif defined(__GNUC__) \
+              && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)))
+                #define XXH_PREFETCH(ptr) \
+                    __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+            #else
+                #define XXH_PREFETCH(ptr) (void) (ptr) /* disabled */
+            #endif
+        #endif /* XXH_NO_PREFETCH */
+
+
+        /* ==========================================
+ * XXH3 default settings
+ * ========================================== */
+
+        #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
+
+        #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
+            #error "default keyset is not large enough"
+        #endif
+
+/*! Pseudorandom secret taken directly from FARSH. */
+XXH_ALIGN(64)
+static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+  0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+  0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+  0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+  0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+  0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+  0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+  0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+  0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+  0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+  0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+  0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+  0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+
+static const xxh_u64 PRIME_MX1 =
+  0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */
+static const xxh_u64 PRIME_MX2 =
+  0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */
+
+        #ifdef XXH_OLD_NAMES
+            #define kSecret XXH3_kSecret
+        #endif
+
+        #ifdef XXH_DOXYGEN
+/*!
+ * @brief Calculates a 32-bit to 64-bit long multiply.
+ *
+ * Implemented as a macro.
+ *
+ * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
+ * need to (but it shouldn't need to anyways, it is about 7 instructions to do
+ * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
+ * use that instead of the normal method.
+ *
+ * If you are compiling for platforms like Thumb-1 and don't have a better option,
+ * you may also want to write your own long multiply routine here.
+ *
+ * @param x, y Numbers to be multiplied
+ * @return 64-bit product of the low 32 bits of @p x and @p y.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH_mult32to64(xxh_u64 x, xxh_u64 y) {
+    return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
+}
+        #elif defined(_MSC_VER) && defined(_M_IX86)
+            #define XXH_mult32to64(x, y) __emulu((unsigned) (x), (unsigned) (y))
+        #else
+            /*
+ * Downcast + upcast is usually better than masking on older compilers like
+ * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
+ *
+ * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
+ * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
+ */
+            #define XXH_mult32to64(x, y) ((xxh_u64) (xxh_u32) (x) * (xxh_u64) (xxh_u32) (y))
+        #endif
+
+/*!
+ * @brief Calculates a 64->128-bit long multiply.
+ *
+ * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
+ * version.
+ *
+ * @param lhs , rhs The 64-bit integers to be multiplied
+ * @return The 128-bit result represented in an @ref XXH128_hash_t.
+ */
+static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) {
+            /*
+     * GCC/Clang __uint128_t method.
+     *
+     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+     * This is usually the best way as it usually uses a native long 64-bit
+     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+     *
+     * Usually.
+     *
+     * Despite being a 32-bit platform, Clang (and emscripten) define this type
+     * despite not having the arithmetic for it. This results in a laggy
+     * compiler builtin call which calculates a full 128-bit multiply.
+     * In that case it is best to use the portable one.
+     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+     */
+        #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
+            && defined(__SIZEOF_INT128__) \
+          || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+    __uint128_t const product = (__uint128_t) lhs * (__uint128_t) rhs;
+    XXH128_hash_t     r128;
+    r128.low64  = (xxh_u64) (product);
+    r128.high64 = (xxh_u64) (product >> 64);
+    return r128;
+
+                /*
+     * MSVC for x64's _umul128 method.
+     *
+     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
+     *
+     * This compiles to single operand MUL on x64.
+     */
+        #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
+
+            #ifndef _MSC_VER
+                #pragma intrinsic(_umul128)
+            #endif
+    xxh_u64       product_high;
+    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
+    XXH128_hash_t r128;
+    r128.low64  = product_low;
+    r128.high64 = product_high;
+    return r128;
+
+                /*
+     * MSVC for ARM64's __umulh method.
+     *
+     * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
+     */
+        #elif defined(_M_ARM64) || defined(_M_ARM64EC)
+
+            #ifndef _MSC_VER
+                #pragma intrinsic(__umulh)
+            #endif
+    XXH128_hash_t r128;
+    r128.low64  = lhs * rhs;
+    r128.high64 = __umulh(lhs, rhs);
+    return r128;
+
+        #else
+    /*
+     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
+     *
+     * This is a fast and simple grade school multiply, which is shown below
+     * with base 10 arithmetic instead of base 0x100000000.
+     *
+     *           9 3 // D2 lhs = 93
+     *         x 7 5 // D2 rhs = 75
+     *     ----------
+     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
+     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
+     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
+     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
+     *     ---------
+     *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
+     *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
+     *     ---------
+     *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
+     *
+     * The reasons for adding the products like this are:
+     *  1. It avoids manual carry tracking. Just like how
+     *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
+     *     This avoids a lot of complexity.
+     *
+     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
+     *     instruction available in ARM's Digital Signal Processing extension
+     *     in 32-bit ARMv6 and later, which is shown below:
+     *
+     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
+     *         {
+     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
+     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
+     *             *RdHi = (xxh_u32)(product >> 32);
+     *         }
+     *
+     *     This instruction was designed for efficient long multiplication, and
+     *     allows this to be calculated in only 4 instructions at speeds
+     *     comparable to some 64-bit ALUs.
+     *
+     *  3. It isn't terrible on other platforms. Usually this will be a couple
+     *     of 32-bit ADD/ADCs.
+     */
+
+    /* First calculate all of the cross products. */
+    xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
+    xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
+    xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
+    xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
+
+    /* Now add the products together. These will never overflow. */
+    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
+    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+    XXH128_hash_t r128;
+    r128.low64  = lower;
+    r128.high64 = upper;
+    return r128;
+        #endif
+}
+
+/*!
+ * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
+ *
+ * The reason for the separate function is to prevent passing too many structs
+ * around by value. This will hopefully inline the multiply, but we don't force it.
+ *
+ * @param lhs , rhs The 64-bit integers to multiply
+ * @return The low 64 bits of the product XOR'd by the high 64 bits.
+ * @see XXH_mult64to128()
+ */
+static xxh_u64 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) {
+    XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
+    return product.low64 ^ product.high64;
+}
+
+/*! Seems to produce slightly better code on GCC for some reason. */
+XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) {
+    XXH_ASSERT(0 <= shift && shift < 64);
+    return v64 ^ (v64 >> shift);
+}
+
+/*
+ * This is a fast avalanche stage,
+ * suitable when input bits are already partially mixed
+ */
+static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) {
+    h64 = XXH_xorshift64(h64, 37);
+    h64 *= PRIME_MX1;
+    h64 = XXH_xorshift64(h64, 32);
+    return h64;
+}
+
+/*
+ * This is a stronger avalanche,
+ * inspired by Pelle Evensen's rrmxmx
+ * preferable when input has not been previously mixed
+ */
+static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) {
+    /* this mix is inspired by Pelle Evensen's rrmxmx */
+    h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
+    h64 *= PRIME_MX2;
+    h64 ^= (h64 >> 35) + len;
+    h64 *= PRIME_MX2;
+    return XXH_xorshift64(h64, 28);
+}
+
+
+/* ==========================================
+ * Short keys
+ * ==========================================
+ * One of the shortcomings of XXH32 and XXH64 was that their performance was
+ * sub-optimal on short lengths. It used an iterative algorithm which strongly
+ * favored lengths that were a multiple of 4 or 8.
+ *
+ * Instead of iterating over individual inputs, we use a set of single shot
+ * functions which piece together a range of lengths and operate in constant time.
+ *
+ * Additionally, the number of multiplies has been significantly reduced. This
+ * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
+ *
+ * Depending on the platform, this may or may not be faster than XXH32, but it
+ * is almost guaranteed to be faster than XXH64.
+ */
+
+/*
+ * At very short lengths, there isn't enough input to fully hide secrets, or use
+ * the entire secret.
+ *
+ * There is also only a limited amount of mixing we can do before significantly
+ * impacting performance.
+ *
+ * Therefore, we use different sections of the secret and always mix two secret
+ * samples with an XOR. This should have no effect on performance on the
+ * seedless or withSeed variants because everything _should_ be constant folded
+ * by modern compilers.
+ *
+ * The XOR mixing hides individual parts of the secret and increases entropy.
+ *
+ * This adds an extra layer of strength for custom secrets.
+ */
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_1to3_64b(const xxh_u8* input,
+                                                          size_t        len,
+                                                          const xxh_u8* secret,
+                                                          XXH64_hash_t  seed) {
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(1 <= len && len <= 3);
+    XXH_ASSERT(secret != NULL);
+    /*
+     * len = 1: combined = { input[0], 0x01, input[0], input[0] }
+     * len = 2: combined = { input[1], 0x02, input[0], input[1] }
+     * len = 3: combined = { input[2], 0x03, input[0], input[1] }
+     */
+    {
+        xxh_u8 const  c1 = input[0];
+        xxh_u8 const  c2 = input[len >> 1];
+        xxh_u8 const  c3 = input[len - 1];
+        xxh_u32 const combined =
+          ((xxh_u32) c1 << 16) | ((xxh_u32) c2 << 24) | ((xxh_u32) c3 << 0) | ((xxh_u32) len << 8);
+        xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret + 4)) + seed;
+        xxh_u64 const keyed   = (xxh_u64) combined ^ bitflip;
+        return XXH64_avalanche(keyed);
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_4to8_64b(const xxh_u8* input,
+                                                          size_t        len,
+                                                          const xxh_u8* secret,
+                                                          XXH64_hash_t  seed) {
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(4 <= len && len <= 8);
+    seed ^= (xxh_u64) XXH_swap32((xxh_u32) seed) << 32;
+    {
+        xxh_u32 const input1  = XXH_readLE32(input);
+        xxh_u32 const input2  = XXH_readLE32(input + len - 4);
+        xxh_u64 const bitflip = (XXH_readLE64(secret + 8) ^ XXH_readLE64(secret + 16)) - seed;
+        xxh_u64 const input64 = input2 + (((xxh_u64) input1) << 32);
+        xxh_u64 const keyed   = input64 ^ bitflip;
+        return XXH3_rrmxmx(keyed, len);
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_9to16_64b(const xxh_u8* input,
+                                                           size_t        len,
+                                                           const xxh_u8* secret,
+                                                           XXH64_hash_t  seed) {
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(9 <= len && len <= 16);
+    {
+        xxh_u64 const bitflip1 = (XXH_readLE64(secret + 24) ^ XXH_readLE64(secret + 32)) + seed;
+        xxh_u64 const bitflip2 = (XXH_readLE64(secret + 40) ^ XXH_readLE64(secret + 48)) - seed;
+        xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1;
+        xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
+        xxh_u64 const acc =
+          len + XXH_swap64(input_lo) + input_hi + XXH3_mul128_fold64(input_lo, input_hi);
+        return XXH3_avalanche(acc);
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_0to16_64b(const xxh_u8* input,
+                                                           size_t        len,
+                                                           const xxh_u8* secret,
+                                                           XXH64_hash_t  seed) {
+    XXH_ASSERT(len <= 16);
+    {
+        if (XXH_likely(len > 8))
+            return XXH3_len_9to16_64b(input, len, secret, seed);
+        if (XXH_likely(len >= 4))
+            return XXH3_len_4to8_64b(input, len, secret, seed);
+        if (len)
+            return XXH3_len_1to3_64b(input, len, secret, seed);
+        return XXH64_avalanche(seed ^ (XXH_readLE64(secret + 56) ^ XXH_readLE64(secret + 64)));
+    }
+}
+
+/*
+ * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
+ * multiplication by zero, affecting hashes of lengths 17 to 240.
+ *
+ * However, they are very unlikely.
+ *
+ * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
+ * unseeded non-cryptographic hashes, it does not attempt to defend itself
+ * against specially crafted inputs, only random inputs.
+ *
+ * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
+ * cancelling out the secret is taken an arbitrary number of times (addressed
+ * in XXH3_accumulate_512), this collision is very unlikely with random inputs
+ * and/or proper seeding:
+ *
+ * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
+ * function that is only called up to 16 times per hash with up to 240 bytes of
+ * input.
+ *
+ * This is not too bad for a non-cryptographic hash function, especially with
+ * only 64 bit outputs.
+ *
+ * The 128-bit variant (which trades some speed for strength) is NOT affected
+ * by this, although it is always a good idea to use a proper seed if you care
+ * about strength.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
+                                     const xxh_u8* XXH_RESTRICT secret,
+                                     xxh_u64                    seed64) {
+        #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+          && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
+          && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
+    /*
+     * UGLY HACK:
+     * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
+     * slower code.
+     *
+     * By forcing seed64 into a register, we disrupt the cost model and
+     * cause it to scalarize. See `XXH32_round()`
+     *
+     * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
+     * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
+     * GCC 9.2, despite both emitting scalar code.
+     *
+     * GCC generates much better scalar code than Clang for the rest of XXH3,
+     * which is why finding a more optimal codepath is an interest.
+     */
+    XXH_COMPILER_GUARD(seed64);
+        #endif
+    {
+        xxh_u64 const input_lo = XXH_readLE64(input);
+        xxh_u64 const input_hi = XXH_readLE64(input + 8);
+        return XXH3_mul128_fold64(input_lo ^ (XXH_readLE64(secret) + seed64),
+                                  input_hi ^ (XXH_readLE64(secret + 8) - seed64));
+    }
+}
+
+/* For mid range keys, XXH3 uses a Mum-hash variant. */
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input,
+                                                             size_t                     len,
+                                                             const xxh_u8* XXH_RESTRICT secret,
+                                                             size_t                     secretSize,
+                                                             XXH64_hash_t               seed) {
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    (void) secretSize;
+    XXH_ASSERT(16 < len && len <= 128);
+
+    {
+        xxh_u64 acc = len * XXH_PRIME64_1;
+        #if XXH_SIZE_OPT >= 1
+        /* Smaller and cleaner, but slightly slower. */
+        unsigned int i = (unsigned int) (len - 1) / 32;
+        do
+        {
+            acc += XXH3_mix16B(input + 16 * i, secret + 32 * i, seed);
+            acc += XXH3_mix16B(input + len - 16 * (i + 1), secret + 32 * i + 16, seed);
+        } while (i-- != 0);
+        #else
+        if (len > 32)
+        {
+            if (len > 64)
+            {
+                if (len > 96)
+                {
+                    acc += XXH3_mix16B(input + 48, secret + 96, seed);
+                    acc += XXH3_mix16B(input + len - 64, secret + 112, seed);
+                }
+                acc += XXH3_mix16B(input + 32, secret + 64, seed);
+                acc += XXH3_mix16B(input + len - 48, secret + 80, seed);
+            }
+            acc += XXH3_mix16B(input + 16, secret + 32, seed);
+            acc += XXH3_mix16B(input + len - 32, secret + 48, seed);
+        }
+        acc += XXH3_mix16B(input + 0, secret + 0, seed);
+        acc += XXH3_mix16B(input + len - 16, secret + 16, seed);
+        #endif
+        return XXH3_avalanche(acc);
+    }
+}
+
+        /*!
+ * @brief Maximum size of "short" key in bytes.
+ */
+        #define XXH3_MIDSIZE_MAX 240
+
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input,
+                                                           size_t                     len,
+                                                           const xxh_u8* XXH_RESTRICT secret,
+                                                           size_t                     secretSize,
+                                                           XXH64_hash_t               seed) {
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    (void) secretSize;
+    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+        #define XXH3_MIDSIZE_STARTOFFSET 3
+        #define XXH3_MIDSIZE_LASTOFFSET 17
+
+    {
+        xxh_u64            acc = len * XXH_PRIME64_1;
+        xxh_u64            acc_end;
+        unsigned int const nbRounds = (unsigned int) len / 16;
+        unsigned int       i;
+        XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+        for (i = 0; i < 8; i++)
+        {
+            acc += XXH3_mix16B(input + (16 * i), secret + (16 * i), seed);
+        }
+        /* last bytes */
+        acc_end = XXH3_mix16B(input + len - 16,
+                              secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+        XXH_ASSERT(nbRounds >= 8);
+        acc = XXH3_avalanche(acc);
+        #if defined(__clang__)                              /* Clang */ \
+          && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+          && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
+            /*
+         * UGLY HACK:
+         * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
+         * In everywhere else, it uses scalar code.
+         *
+         * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
+         * would still be slower than UMAAL (see XXH_mult64to128).
+         *
+         * Unfortunately, Clang doesn't handle the long multiplies properly and
+         * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
+         * scalarized into an ugly mess of VMOV.32 instructions.
+         *
+         * This mess is difficult to avoid without turning autovectorization
+         * off completely, but they are usually relatively minor and/or not
+         * worth it to fix.
+         *
+         * This loop is the easiest to fix, as unlike XXH32, this pragma
+         * _actually works_ because it is a loop vectorization instead of an
+         * SLP vectorization.
+         */
+            #pragma clang loop vectorize(disable)
+        #endif
+        for (i = 8; i < nbRounds; i++)
+        {
+            /*
+             * Prevents clang for unrolling the acc loop and interleaving with this one.
+             */
+            XXH_COMPILER_GUARD(acc);
+            acc_end += XXH3_mix16B(input + (16 * i),
+                                   secret + (16 * (i - 8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+        }
+        return XXH3_avalanche(acc + acc_end);
+    }
+}
+
+
+        /* =======     Long Keys     ======= */
+
+        #define XXH_STRIPE_LEN 64
+        #define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */
+        #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
+
+        #ifdef XXH_OLD_NAMES
+            #define STRIPE_LEN XXH_STRIPE_LEN
+            #define ACC_NB XXH_ACC_NB
+        #endif
+
+        #ifndef XXH_PREFETCH_DIST
+            #ifdef __clang__
+                #define XXH_PREFETCH_DIST 320
+            #else
+                #if (XXH_VECTOR == XXH_AVX512)
+                    #define XXH_PREFETCH_DIST 512
+                #else
+                    #define XXH_PREFETCH_DIST 384
+                #endif
+            #endif /* __clang__ */
+        #endif     /* XXH_PREFETCH_DIST */
+
+        /*
+ * These macros are to generate an XXH3_accumulate() function.
+ * The two arguments select the name suffix and target attribute.
+ *
+ * The name of this symbol is XXH3_accumulate_<name>() and it calls
+ * XXH3_accumulate_512_<name>().
+ *
+ * It may be useful to hand implement this function if the compiler fails to
+ * optimize the inline function.
+ */
+        #define XXH3_ACCUMULATE_TEMPLATE(name) \
+            void XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT      acc, \
+                                        const xxh_u8* XXH_RESTRICT input, \
+                                        const xxh_u8* XXH_RESTRICT secret, size_t nbStripes) { \
+                size_t n; \
+                for (n = 0; n < nbStripes; n++) \
+                { \
+                    const xxh_u8* const in = input + n * XXH_STRIPE_LEN; \
+                    XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
+                    XXH3_accumulate_512_##name(acc, in, secret + n * XXH_SECRET_CONSUME_RATE); \
+                } \
+            }
+
+
+XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) {
+    if (!XXH_CPU_LITTLE_ENDIAN)
+        v64 = XXH_swap64(v64);
+    XXH_memcpy(dst, &v64, sizeof(v64));
+}
+
+        /* Several intrinsic functions below are supposed to accept __int64 as argument,
+ * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
+ * However, several environments do not define __int64 type,
+ * requiring a workaround.
+ */
+        #if !defined(__VMS) \
+          && (defined(__cplusplus) \
+              || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
+typedef int64_t xxh_i64;
+        #else
+/* the following type must have a width of 64-bit */
+typedef long long xxh_i64;
+        #endif
+
+
+        /*
+ * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
+ *
+ * It is a hardened version of UMAC, based off of FARSH's implementation.
+ *
+ * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
+ * implementations, and it is ridiculously fast.
+ *
+ * We harden it by mixing the original input to the accumulators as well as the product.
+ *
+ * This means that in the (relatively likely) case of a multiply by zero, the
+ * original input is preserved.
+ *
+ * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
+ * cross-pollination, as otherwise the upper and lower halves would be
+ * essentially independent.
+ *
+ * This doesn't matter on 64-bit hashes since they all get merged together in
+ * the end, so we skip the extra step.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+        #if (XXH_VECTOR == XXH_AVX512) || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
+
+            #ifndef XXH_TARGET_AVX512
+                #define XXH_TARGET_AVX512 /* disable attribute target */
+            #endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_accumulate_512_avx512(
+  void* XXH_RESTRICT acc, const void* XXH_RESTRICT input, const void* XXH_RESTRICT secret) {
+    __m512i* const xacc = (__m512i*) acc;
+    XXH_ASSERT((((size_t) acc) & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+
+    {
+        /* data_vec    = input[0]; */
+        __m512i const data_vec = _mm512_loadu_si512(input);
+        /* key_vec     = secret[0]; */
+        __m512i const key_vec = _mm512_loadu_si512(secret);
+        /* data_key    = data_vec ^ key_vec; */
+        __m512i const data_key = _mm512_xor_si512(data_vec, key_vec);
+        /* data_key_lo = data_key >> 32; */
+        __m512i const data_key_lo = _mm512_srli_epi64(data_key, 32);
+        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+        __m512i const product = _mm512_mul_epu32(data_key, data_key_lo);
+        /* xacc[0] += swap(data_vec); */
+        __m512i const data_swap =
+          _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM) _MM_SHUFFLE(1, 0, 3, 2));
+        __m512i const sum = _mm512_add_epi64(*xacc, data_swap);
+        /* xacc[0] += product; */
+        *xacc = _mm512_add_epi64(product, sum);
+    }
+}
+XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
+
+  /*
+ * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
+ *
+ * Multiplication isn't perfect, as explained by Google in HighwayHash:
+ *
+ *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
+ *  // varying degrees. In descending order of goodness, bytes
+ *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
+ *  // As expected, the upper and lower bytes are much worse.
+ *
+ * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
+ *
+ * Since our algorithm uses a pseudorandom secret to add some variance into the
+ * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
+ *
+ * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
+ * extraction.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
+
+  XXH_FORCE_INLINE XXH_TARGET_AVX512
+  void XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) {
+    XXH_ASSERT((((size_t) acc) & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
+    {
+        __m512i* const xacc    = (__m512i*) acc;
+        const __m512i  prime32 = _mm512_set1_epi32((int) XXH_PRIME32_1);
+
+        /* xacc[0] ^= (xacc[0] >> 47) */
+        __m512i const acc_vec = *xacc;
+        __m512i const shifted = _mm512_srli_epi64(acc_vec, 47);
+        /* xacc[0] ^= secret; */
+        __m512i const key_vec  = _mm512_loadu_si512(secret);
+        __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted,
+                                                           0x96 /* key_vec ^ acc_vec ^ shifted */);
+
+        /* xacc[0] *= XXH_PRIME32_1; */
+        __m512i const data_key_hi = _mm512_srli_epi64(data_key, 32);
+        __m512i const prod_lo     = _mm512_mul_epu32(data_key, prime32);
+        __m512i const prod_hi     = _mm512_mul_epu32(data_key_hi, prime32);
+        *xacc                     = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX512 void
+XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) {
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
+    XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
+    XXH_ASSERT(((size_t) customSecret & 63) == 0);
+    (void) (&XXH_writeLE64);
+    {
+        int const     nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
+        __m512i const seed_pos = _mm512_set1_epi64((xxh_i64) seed64);
+        __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
+
+        const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
+        __m512i* const       dest = (__m512i*) customSecret;
+        int                  i;
+        XXH_ASSERT(((size_t) src & 63) == 0); /* control alignment */
+        XXH_ASSERT(((size_t) dest & 63) == 0);
+        for (i = 0; i < nbRounds; ++i)
+        {
+            dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
+        }
+    }
+}
+
+        #endif
+
+        #if (XXH_VECTOR == XXH_AVX2) || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
+
+            #ifndef XXH_TARGET_AVX2
+                #define XXH_TARGET_AVX2 /* disable attribute target */
+            #endif
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_accumulate_512_avx2(void* XXH_RESTRICT       acc,
+                                                               const void* XXH_RESTRICT input,
+                                                               const void* XXH_RESTRICT secret) {
+    XXH_ASSERT((((size_t) acc) & 31) == 0);
+    {
+        __m256i* const xacc = (__m256i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
+        const __m256i* const xinput = (const __m256i*) input;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+        const __m256i* const xsecret = (const __m256i*) secret;
+
+        size_t i;
+        for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m256i); i++)
+        {
+            /* data_vec    = xinput[i]; */
+            __m256i const data_vec = _mm256_loadu_si256(xinput + i);
+            /* key_vec     = xsecret[i]; */
+            __m256i const key_vec = _mm256_loadu_si256(xsecret + i);
+            /* data_key    = data_vec ^ key_vec; */
+            __m256i const data_key = _mm256_xor_si256(data_vec, key_vec);
+            /* data_key_lo = data_key >> 32; */
+            __m256i const data_key_lo = _mm256_srli_epi64(data_key, 32);
+            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+            __m256i const product = _mm256_mul_epu32(data_key, data_key_lo);
+            /* xacc[i] += swap(data_vec); */
+            __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+            __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
+            /* xacc[i] += product; */
+            xacc[i] = _mm256_add_epi64(product, sum);
+        }
+    }
+}
+XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
+
+  XXH_FORCE_INLINE XXH_TARGET_AVX2
+  void XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) {
+    XXH_ASSERT((((size_t) acc) & 31) == 0);
+    {
+        __m256i* const xacc = (__m256i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
+        const __m256i* const xsecret = (const __m256i*) secret;
+        const __m256i        prime32 = _mm256_set1_epi32((int) XXH_PRIME32_1);
+
+        size_t i;
+        for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m256i); i++)
+        {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m256i const acc_vec  = xacc[i];
+            __m256i const shifted  = _mm256_srli_epi64(acc_vec, 47);
+            __m256i const data_vec = _mm256_xor_si256(acc_vec, shifted);
+            /* xacc[i] ^= xsecret; */
+            __m256i const key_vec  = _mm256_loadu_si256(xsecret + i);
+            __m256i const data_key = _mm256_xor_si256(data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1; */
+            __m256i const data_key_hi = _mm256_srli_epi64(data_key, 32);
+            __m256i const prod_lo     = _mm256_mul_epu32(data_key, prime32);
+            __m256i const prod_hi     = _mm256_mul_epu32(data_key_hi, prime32);
+            xacc[i]                   = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret,
+                                                                 xxh_u64            seed64) {
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
+    XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
+    (void) (&XXH_writeLE64);
+    XXH_PREFETCH(customSecret);
+    {
+        __m256i const seed = _mm256_set_epi64x((xxh_i64) (0U - seed64), (xxh_i64) seed64,
+                                               (xxh_i64) (0U - seed64), (xxh_i64) seed64);
+
+        const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);
+        __m256i*             dest = (__m256i*) customSecret;
+
+            #if defined(__GNUC__) || defined(__clang__)
+        /*
+         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+         *   - do not extract the secret from sse registers in the internal loop
+         *   - use less common registers, and avoid pushing these reg into stack
+         */
+        XXH_COMPILER_GUARD(dest);
+            #endif
+        XXH_ASSERT(((size_t) src & 31) == 0); /* control alignment */
+        XXH_ASSERT(((size_t) dest & 31) == 0);
+
+        /* GCC -O2 need unroll loop manually */
+        dest[0] = _mm256_add_epi64(_mm256_load_si256(src + 0), seed);
+        dest[1] = _mm256_add_epi64(_mm256_load_si256(src + 1), seed);
+        dest[2] = _mm256_add_epi64(_mm256_load_si256(src + 2), seed);
+        dest[3] = _mm256_add_epi64(_mm256_load_si256(src + 3), seed);
+        dest[4] = _mm256_add_epi64(_mm256_load_si256(src + 4), seed);
+        dest[5] = _mm256_add_epi64(_mm256_load_si256(src + 5), seed);
+    }
+}
+
+        #endif
+
+        /* x86dispatch always generates SSE2 */
+        #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
+
+            #ifndef XXH_TARGET_SSE2
+                #define XXH_TARGET_SSE2 /* disable attribute target */
+            #endif
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_accumulate_512_sse2(void* XXH_RESTRICT       acc,
+                                                               const void* XXH_RESTRICT input,
+                                                               const void* XXH_RESTRICT secret) {
+    /* SSE2 is just a half-scale version of the AVX2 version. */
+    XXH_ASSERT((((size_t) acc) & 15) == 0);
+    {
+        __m128i* const xacc = (__m128i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const __m128i* const xinput = (const __m128i*) input;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const __m128i* const xsecret = (const __m128i*) secret;
+
+        size_t i;
+        for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++)
+        {
+            /* data_vec    = xinput[i]; */
+            __m128i const data_vec = _mm_loadu_si128(xinput + i);
+            /* key_vec     = xsecret[i]; */
+            __m128i const key_vec = _mm_loadu_si128(xsecret + i);
+            /* data_key    = data_vec ^ key_vec; */
+            __m128i const data_key = _mm_xor_si128(data_vec, key_vec);
+            /* data_key_lo = data_key >> 32; */
+            __m128i const data_key_lo = _mm_shuffle_epi32(data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
+            __m128i const product = _mm_mul_epu32(data_key, data_key_lo);
+            /* xacc[i] += swap(data_vec); */
+            __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+            __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
+            /* xacc[i] += product; */
+            xacc[i] = _mm_add_epi64(product, sum);
+        }
+    }
+}
+XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
+
+  XXH_FORCE_INLINE XXH_TARGET_SSE2
+  void XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) {
+    XXH_ASSERT((((size_t) acc) & 15) == 0);
+    {
+        __m128i* const xacc = (__m128i*) acc;
+        /* Unaligned. This is mainly for pointer arithmetic, and because
+         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
+        const __m128i* const xsecret = (const __m128i*) secret;
+        const __m128i        prime32 = _mm_set1_epi32((int) XXH_PRIME32_1);
+
+        size_t i;
+        for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++)
+        {
+            /* xacc[i] ^= (xacc[i] >> 47) */
+            __m128i const acc_vec  = xacc[i];
+            __m128i const shifted  = _mm_srli_epi64(acc_vec, 47);
+            __m128i const data_vec = _mm_xor_si128(acc_vec, shifted);
+            /* xacc[i] ^= xsecret[i]; */
+            __m128i const key_vec  = _mm_loadu_si128(xsecret + i);
+            __m128i const data_key = _mm_xor_si128(data_vec, key_vec);
+
+            /* xacc[i] *= XXH_PRIME32_1; */
+            __m128i const data_key_hi = _mm_shuffle_epi32(data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m128i const prod_lo     = _mm_mul_epu32(data_key, prime32);
+            __m128i const prod_hi     = _mm_mul_epu32(data_key_hi, prime32);
+            xacc[i]                   = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret,
+                                                                 xxh_u64            seed64) {
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+    (void) (&XXH_writeLE64);
+    {
+        int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
+
+            #if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
+        /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
+        XXH_ALIGN(16) const xxh_i64 seed64x2[2] = {(xxh_i64) seed64, (xxh_i64) (0U - seed64)};
+        __m128i const               seed        = _mm_load_si128((__m128i const*) seed64x2);
+            #else
+        __m128i const seed = _mm_set_epi64x((xxh_i64) (0U - seed64), (xxh_i64) seed64);
+            #endif
+        int i;
+
+        const void* const src16 = XXH3_kSecret;
+        __m128i*          dst16 = (__m128i*) customSecret;
+            #if defined(__GNUC__) || defined(__clang__)
+        /*
+         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
+         *   - do not extract the secret from sse registers in the internal loop
+         *   - use less common registers, and avoid pushing these reg into stack
+         */
+        XXH_COMPILER_GUARD(dst16);
+            #endif
+        XXH_ASSERT(((size_t) src16 & 15) == 0); /* control alignment */
+        XXH_ASSERT(((size_t) dst16 & 15) == 0);
+
+        for (i = 0; i < nbRounds; ++i)
+        {
+            dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i*) src16 + i), seed);
+        }
+    }
+}
+
+        #endif
+
+        #if (XXH_VECTOR == XXH_NEON)
+
+/* forward declarations for the scalar routines */
+XXH_FORCE_INLINE void XXH3_scalarRound(void* XXH_RESTRICT       acc,
+                                       void const* XXH_RESTRICT input,
+                                       void const* XXH_RESTRICT secret,
+                                       size_t                   lane);
+
+XXH_FORCE_INLINE void
+XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT secret, size_t lane);
+
+/*!
+ * @internal
+ * @brief The bulk processing loop for NEON and WASM SIMD128.
+ *
+ * The NEON code path is actually partially scalar when running on AArch64. This
+ * is to optimize the pipelining and can have up to 15% speedup depending on the
+ * CPU, and it also mitigates some GCC codegen issues.
+ *
+ * @see XXH3_NEON_LANES for configuring this and details about this optimization.
+ *
+ * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
+ * integers instead of the other platforms which mask full 64-bit vectors,
+ * so the setup is more complicated than just shifting right.
+ *
+ * Additionally, there is an optimization for 4 lanes at once noted below.
+ *
+ * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
+ * there needs to be *three* versions of the accumulate operation used
+ * for the remaining 2 lanes.
+ *
+ * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap
+ * nearly perfectly.
+ */
+
+XXH_FORCE_INLINE void XXH3_accumulate_512_neon(void* XXH_RESTRICT       acc,
+                                               const void* XXH_RESTRICT input,
+                                               const void* XXH_RESTRICT secret) {
+    XXH_ASSERT((((size_t) acc) & 15) == 0);
+    XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB
+                      && XXH3_NEON_LANES % 2 == 0);
+    { /* GCC for darwin arm64 does not like aliasing here */
+        xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
+        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
+        uint8_t const* xinput  = (const uint8_t*) input;
+        uint8_t const* xsecret = (const uint8_t*) secret;
+
+        size_t i;
+            #ifdef __wasm_simd128__
+        /*
+         * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret
+         * is constant propagated, which results in it converting it to this
+         * inside the loop:
+         *
+         *    a = v128.load(XXH3_kSecret +  0 + $secret_offset, offset = 0)
+         *    b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)
+         *    ...
+         *
+         * This requires a full 32-bit address immediate (and therefore a 6 byte
+         * instruction) as well as an add for each offset.
+         *
+         * Putting an asm guard prevents it from folding (at the cost of losing
+         * the alignment hint), and uses the free offset in `v128.load` instead
+         * of adding secret_offset each time which overall reduces code size by
+         * about a kilobyte and improves performance.
+         */
+        XXH_COMPILER_GUARD(xsecret);
+            #endif
+        /* Scalar lanes use the normal scalarRound routine */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++)
+        {
+            XXH3_scalarRound(acc, input, secret, i);
+        }
+        i = 0;
+        /* 4 NEON lanes at a time. */
+        for (; i + 1 < XXH3_NEON_LANES / 2; i += 2)
+        {
+            /* data_vec = xinput[i]; */
+            uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16));
+            uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i + 1) * 16));
+            /* key_vec  = xsecret[i];  */
+            uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i + 1) * 16));
+            /* data_swap = swap(data_vec) */
+            uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
+            uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
+            uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
+
+            /*
+             * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
+             * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
+             * get one vector with the low 32 bits of each lane, and one vector
+             * with the high 32 bits of each lane.
+             *
+             * The intrinsic returns a double vector because the original ARMv7-a
+             * instruction modified both arguments in place. AArch64 and SIMD128 emit
+             * two instructions from this intrinsic.
+             *
+             *  [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
+             *  [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
+             */
+            uint32x4x2_t unzipped =
+              vuzpq_u32(vreinterpretq_u32_u64(data_key_1), vreinterpretq_u32_u64(data_key_2));
+            /* data_key_lo = data_key & 0xFFFFFFFF */
+            uint32x4_t data_key_lo = unzipped.val[0];
+            /* data_key_hi = data_key >> 32 */
+            uint32x4_t data_key_hi = unzipped.val[1];
+            /*
+             * Then, we can split the vectors horizontally and multiply which, as for most
+             * widening intrinsics, have a variant that works on both high half vectors
+             * for free on AArch64. A similar instruction is available on SIMD128.
+             *
+             * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
+             */
+            uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
+            uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
+            /*
+             * Clang reorders
+             *    a += b * c;     // umlal   swap.2d, dkl.2s, dkh.2s
+             *    c += a;         // add     acc.2d, acc.2d, swap.2d
+             * to
+             *    c += a;         // add     acc.2d, acc.2d, swap.2d
+             *    c += b * c;     // umlal   acc.2d, dkl.2s, dkh.2s
+             *
+             * While it would make sense in theory since the addition is faster,
+             * for reasons likely related to umlal being limited to certain NEON
+             * pipelines, this is worse. A compiler guard fixes this.
+             */
+            XXH_COMPILER_GUARD_CLANG_NEON(sum_1);
+            XXH_COMPILER_GUARD_CLANG_NEON(sum_2);
+            /* xacc[i] = acc_vec + sum; */
+            xacc[i]     = vaddq_u64(xacc[i], sum_1);
+            xacc[i + 1] = vaddq_u64(xacc[i + 1], sum_2);
+        }
+        /* Operate on the remaining NEON lanes 2 at a time. */
+        for (; i < XXH3_NEON_LANES / 2; i++)
+        {
+            /* data_vec = xinput[i]; */
+            uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
+            /* acc_vec_2 = swap(data_vec) */
+            uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
+            /* For two lanes, just use VMOVN and VSHRN. */
+            /* data_key_lo = data_key & 0xFFFFFFFF; */
+            uint32x2_t data_key_lo = vmovn_u64(data_key);
+            /* data_key_hi = data_key >> 32; */
+            uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
+            /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
+            uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
+            /* Same Clang workaround as before */
+            XXH_COMPILER_GUARD_CLANG_NEON(sum);
+            /* xacc[i] = acc_vec + sum; */
+            xacc[i] = vaddq_u64(xacc[i], sum);
+        }
+    }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
+
+  XXH_FORCE_INLINE
+  void XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) {
+    XXH_ASSERT((((size_t) acc) & 15) == 0);
+
+    {
+        xxh_aliasing_uint64x2_t* xacc    = (xxh_aliasing_uint64x2_t*) acc;
+        uint8_t const*           xsecret = (uint8_t const*) secret;
+
+        size_t i;
+                    /* WASM uses operator overloads and doesn't need these. */
+            #ifndef __wasm_simd128__
+        /* { prime32_1, prime32_1 } */
+        uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);
+        /* { 0, prime32_1, 0, prime32_1 } */
+        uint32x4_t const kPrimeHi =
+          vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64) XXH_PRIME32_1 << 32));
+            #endif
+
+        /* AArch64 uses both scalar and neon at the same time */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++)
+        {
+            XXH3_scalarScrambleRound(acc, secret, i);
+        }
+        for (i = 0; i < XXH3_NEON_LANES / 2; i++)
+        {
+            /* xacc[i] ^= (xacc[i] >> 47); */
+            uint64x2_t acc_vec  = xacc[i];
+            uint64x2_t shifted  = vshrq_n_u64(acc_vec, 47);
+            uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
+
+            /* xacc[i] ^= xsecret[i]; */
+            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
+                        /* xacc[i] *= XXH_PRIME32_1 */
+            #ifdef __wasm_simd128__
+            /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */
+            xacc[i] = data_key * XXH_PRIME32_1;
+            #else
+            /*
+             * Expanded version with portable NEON intrinsics
+             *
+             *    lo(x) * lo(y) + (hi(x) * lo(y) << 32)
+             *
+             * prod_hi = hi(data_key) * lo(prime) << 32
+             *
+             * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector
+             * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits
+             * and avoid the shift.
+             */
+            uint32x4_t prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), kPrimeHi);
+            /* Extract low bits for vmlal_u32  */
+            uint32x2_t data_key_lo = vmovn_u64(data_key);
+            /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
+            xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
+            #endif
+        }
+    }
+}
+        #endif
+
+        #if (XXH_VECTOR == XXH_VSX)
+
+XXH_FORCE_INLINE void XXH3_accumulate_512_vsx(void* XXH_RESTRICT       acc,
+                                              const void* XXH_RESTRICT input,
+                                              const void* XXH_RESTRICT secret) {
+    /* presumed aligned */
+    xxh_aliasing_u64x2* const xacc    = (xxh_aliasing_u64x2*) acc;
+    xxh_u8 const* const       xinput  = (xxh_u8 const*) input;  /* no alignment restriction */
+    xxh_u8 const* const       xsecret = (xxh_u8 const*) secret; /* no alignment restriction */
+    xxh_u64x2 const           v32     = {32, 32};
+    size_t                    i;
+    for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++)
+    {
+        /* data_vec = xinput[i]; */
+        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16 * i);
+        /* key_vec = xsecret[i]; */
+        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16 * i);
+        xxh_u64x2 const data_key = data_vec ^ key_vec;
+        /* shuffled = (data_key << 32) | (data_key >> 32); */
+        xxh_u32x4 const shuffled = (xxh_u32x4) vec_rl(data_key, v32);
+        /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
+        xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4) data_key, shuffled);
+        /* acc_vec = xacc[i]; */
+        xxh_u64x2 acc_vec = xacc[i];
+        acc_vec += product;
+
+                    /* swap high and low halves */
+            #ifdef __s390x__
+        acc_vec += vec_permi(data_vec, data_vec, 2);
+            #else
+        acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
+            #endif
+        xacc[i] = acc_vec;
+    }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
+
+  XXH_FORCE_INLINE
+  void XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) {
+    XXH_ASSERT((((size_t) acc) & 15) == 0);
+
+    {
+        xxh_aliasing_u64x2* const xacc    = (xxh_aliasing_u64x2*) acc;
+        const xxh_u8* const       xsecret = (const xxh_u8*) secret;
+        /* constants */
+        xxh_u64x2 const v32   = {32, 32};
+        xxh_u64x2 const v47   = {47, 47};
+        xxh_u32x4 const prime = {XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1};
+        size_t          i;
+        for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++)
+        {
+            /* xacc[i] ^= (xacc[i] >> 47); */
+            xxh_u64x2 const acc_vec  = xacc[i];
+            xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+
+            /* xacc[i] ^= xsecret[i]; */
+            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16 * i);
+            xxh_u64x2 const data_key = data_vec ^ key_vec;
+
+            /* xacc[i] *= XXH_PRIME32_1 */
+            /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
+            xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4) data_key, prime);
+            /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
+            xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4) data_key, prime);
+            xacc[i]                  = prod_odd + (prod_even << v32);
+        }
+    }
+}
+
+        #endif
+
+        #if (XXH_VECTOR == XXH_SVE)
+
+XXH_FORCE_INLINE void XXH3_accumulate_512_sve(void* XXH_RESTRICT       acc,
+                                              const void* XXH_RESTRICT input,
+                                              const void* XXH_RESTRICT secret) {
+    uint64_t*       xacc          = (uint64_t*) acc;
+    const uint64_t* xinput        = (const uint64_t*) (const void*) input;
+    const uint64_t* xsecret       = (const uint64_t*) (const void*) secret;
+    svuint64_t      kSwap         = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+    uint64_t        element_count = svcntd();
+    if (element_count >= 8)
+    {
+        svbool_t   mask = svptrue_pat_b64(SV_VL8);
+        svuint64_t vacc = svld1_u64(mask, xacc);
+        ACCRND(vacc, 0);
+        svst1_u64(mask, xacc, vacc);
+    }
+    else if (element_count == 2)
+    { /* sve128 */
+        svbool_t   mask = svptrue_pat_b64(SV_VL2);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+        svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+        svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 2);
+        ACCRND(acc2, 4);
+        ACCRND(acc3, 6);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 2, acc1);
+        svst1_u64(mask, xacc + 4, acc2);
+        svst1_u64(mask, xacc + 6, acc3);
+    }
+    else
+    {
+        svbool_t   mask = svptrue_pat_b64(SV_VL4);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 4);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 4, acc1);
+    }
+}
+
+XXH_FORCE_INLINE void XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT      acc,
+                                          const xxh_u8* XXH_RESTRICT input,
+                                          const xxh_u8* XXH_RESTRICT secret,
+                                          size_t                     nbStripes) {
+    if (nbStripes != 0)
+    {
+        uint64_t*       xacc          = (uint64_t*) acc;
+        const uint64_t* xinput        = (const uint64_t*) (const void*) input;
+        const uint64_t* xsecret       = (const uint64_t*) (const void*) secret;
+        svuint64_t      kSwap         = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+        uint64_t        element_count = svcntd();
+        if (element_count >= 8)
+        {
+            svbool_t   mask = svptrue_pat_b64(SV_VL8);
+            svuint64_t vacc = svld1_u64(mask, xacc + 0);
+            do
+            {
+                /* svprfd(svbool_t, void *, enum svfprop); */
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(vacc, 0);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+            } while (nbStripes != 0);
+
+            svst1_u64(mask, xacc + 0, vacc);
+        }
+        else if (element_count == 2)
+        { /* sve128 */
+            svbool_t   mask = svptrue_pat_b64(SV_VL2);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+            svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+            svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+            do
+            {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 2);
+                ACCRND(acc2, 4);
+                ACCRND(acc3, 6);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+            } while (nbStripes != 0);
+
+            svst1_u64(mask, xacc + 0, acc0);
+            svst1_u64(mask, xacc + 2, acc1);
+            svst1_u64(mask, xacc + 4, acc2);
+            svst1_u64(mask, xacc + 6, acc3);
+        }
+        else
+        {
+            svbool_t   mask = svptrue_pat_b64(SV_VL4);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+            do
+            {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 4);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+            } while (nbStripes != 0);
+
+            svst1_u64(mask, xacc + 0, acc0);
+            svst1_u64(mask, xacc + 4, acc1);
+        }
+    }
+}
+
+        #endif
+
+        /* scalar variants - universal */
+
+        #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
+/*
+ * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
+ * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
+ *
+ * While this might not seem like much, as AArch64 is a 64-bit architecture, only
+ * big Cortex designs have a full 64-bit multiplier.
+ *
+ * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
+ * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
+ * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
+ *
+ * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
+ * not have this penalty and does the mask automatically.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) {
+    xxh_u64 ret;
+    /* note: %x = 64-bit register, %w = 32-bit register */
+    __asm__("umaddl %x0, %w1, %w2, %x3" : "=r"(ret) : "r"(lhs), "r"(rhs), "r"(acc));
+    return ret;
+}
+        #else
+XXH_FORCE_INLINE xxh_u64 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) {
+    return XXH_mult32to64((xxh_u32) lhs, (xxh_u32) rhs) + acc;
+}
+        #endif
+
+/*!
+ * @internal
+ * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
+ *
+ * This is extracted to its own function because the NEON path uses a combination
+ * of NEON and scalar.
+ */
+XXH_FORCE_INLINE void XXH3_scalarRound(void* XXH_RESTRICT       acc,
+                                       void const* XXH_RESTRICT input,
+                                       void const* XXH_RESTRICT secret,
+                                       size_t                   lane) {
+    xxh_u64*      xacc    = (xxh_u64*) acc;
+    xxh_u8 const* xinput  = (xxh_u8 const*) input;
+    xxh_u8 const* xsecret = (xxh_u8 const*) secret;
+    XXH_ASSERT(lane < XXH_ACC_NB);
+    XXH_ASSERT(((size_t) acc & (XXH_ACC_ALIGN - 1)) == 0);
+    {
+        xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
+        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
+        xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
+        xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
+    }
+}
+
+/*!
+ * @internal
+ * @brief Processes a 64 byte block of data using the scalar path.
+ */
+XXH_FORCE_INLINE void XXH3_accumulate_512_scalar(void* XXH_RESTRICT       acc,
+                                                 const void* XXH_RESTRICT input,
+                                                 const void* XXH_RESTRICT secret) {
+    size_t i;
+            /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
+        #if defined(__GNUC__) && !defined(__clang__) && (defined(__arm__) || defined(__thumb2__)) \
+          && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
+          && XXH_SIZE_OPT <= 0
+            #pragma GCC unroll 8
+        #endif
+    for (i = 0; i < XXH_ACC_NB; i++)
+    {
+        XXH3_scalarRound(acc, input, secret, i);
+    }
+}
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
+
+  /*!
+ * @internal
+ * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
+ *
+ * This is extracted to its own function because the NEON path uses a combination
+ * of NEON and scalar.
+ */
+  XXH_FORCE_INLINE void XXH3_scalarScrambleRound(void* XXH_RESTRICT       acc,
+                                                 void const* XXH_RESTRICT secret,
+                                                 size_t                   lane) {
+    xxh_u64* const      xacc    = (xxh_u64*) acc;         /* presumed aligned */
+    const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
+    XXH_ASSERT((((size_t) acc) & (XXH_ACC_ALIGN - 1)) == 0);
+    XXH_ASSERT(lane < XXH_ACC_NB);
+    {
+        xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
+        xxh_u64       acc64 = xacc[lane];
+        acc64               = XXH_xorshift64(acc64, 47);
+        acc64 ^= key64;
+        acc64 *= XXH_PRIME32_1;
+        xacc[lane] = acc64;
+    }
+}
+
+/*!
+ * @internal
+ * @brief Scrambles the accumulators after a large chunk has been read
+ */
+XXH_FORCE_INLINE void XXH3_scrambleAcc_scalar(void* XXH_RESTRICT       acc,
+                                              const void* XXH_RESTRICT secret) {
+    size_t i;
+    for (i = 0; i < XXH_ACC_NB; i++)
+    {
+        XXH3_scalarScrambleRound(acc, secret, i);
+    }
+}
+
+XXH_FORCE_INLINE void XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret,
+                                                   xxh_u64            seed64) {
+    /*
+     * We need a separate pointer for the hack below,
+     * which requires a non-const pointer.
+     * Any decent compiler will optimize this out otherwise.
+     */
+    const xxh_u8* kSecretPtr = XXH3_kSecret;
+    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+        #if defined(__GNUC__) && defined(__aarch64__)
+    /*
+     * UGLY HACK:
+     * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
+     * placed sequentially, in order, at the top of the unrolled loop.
+     *
+     * While MOVK is great for generating constants (2 cycles for a 64-bit
+     * constant compared to 4 cycles for LDR), it fights for bandwidth with
+     * the arithmetic instructions.
+     *
+     *   I   L   S
+     * MOVK
+     * MOVK
+     * MOVK
+     * MOVK
+     * ADD
+     * SUB      STR
+     *          STR
+     * By forcing loads from memory (as the asm line causes the compiler to assume
+     * that XXH3_kSecretPtr has been changed), the pipelines are used more
+     * efficiently:
+     *   I   L   S
+     *      LDR
+     *  ADD LDR
+     *  SUB     STR
+     *          STR
+     *
+     * See XXH3_NEON_LANES for details on the pipsline.
+     *
+     * XXH3_64bits_withSeed, len == 256, Snapdragon 835
+     *   without hack: 2654.4 MB/s
+     *   with hack:    3202.9 MB/s
+     */
+    XXH_COMPILER_GUARD(kSecretPtr);
+        #endif
+    {
+        int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
+        int       i;
+        for (i = 0; i < nbRounds; i++)
+        {
+            /*
+             * The asm hack causes the compiler to assume that kSecretPtr aliases with
+             * customSecret, and on aarch64, this prevented LDP from merging two
+             * loads together for free. Putting the loads together before the stores
+             * properly generates LDP.
+             */
+            xxh_u64 lo = XXH_readLE64(kSecretPtr + 16 * i) + seed64;
+            xxh_u64 hi = XXH_readLE64(kSecretPtr + 16 * i + 8) - seed64;
+            XXH_writeLE64((xxh_u8*) customSecret + 16 * i, lo);
+            XXH_writeLE64((xxh_u8*) customSecret + 16 * i + 8, hi);
+        }
+    }
+}
+
+
+typedef void (*XXH3_f_accumulate)(xxh_u64*      XXH_RESTRICT,
+                                  const xxh_u8* XXH_RESTRICT,
+                                  const xxh_u8* XXH_RESTRICT,
+                                  size_t);
+typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
+typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
+
+
+        #if (XXH_VECTOR == XXH_AVX512)
+
+            #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+            #define XXH3_accumulate XXH3_accumulate_avx512
+            #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
+            #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
+
+        #elif (XXH_VECTOR == XXH_AVX2)
+
+            #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+            #define XXH3_accumulate XXH3_accumulate_avx2
+            #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
+            #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
+
+        #elif (XXH_VECTOR == XXH_SSE2)
+
+            #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+            #define XXH3_accumulate XXH3_accumulate_sse2
+            #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
+            #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
+
+        #elif (XXH_VECTOR == XXH_NEON)
+
+            #define XXH3_accumulate_512 XXH3_accumulate_512_neon
+            #define XXH3_accumulate XXH3_accumulate_neon
+            #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
+            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+        #elif (XXH_VECTOR == XXH_VSX)
+
+            #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+            #define XXH3_accumulate XXH3_accumulate_vsx
+            #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
+            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+        #elif (XXH_VECTOR == XXH_SVE)
+            #define XXH3_accumulate_512 XXH3_accumulate_512_sve
+            #define XXH3_accumulate XXH3_accumulate_sve
+            #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
+            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+        #else /* scalar */
+
+            #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+            #define XXH3_accumulate XXH3_accumulate_scalar
+            #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
+            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
+        #endif
+
+        #if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
+            #undef XXH3_initCustomSecret
+            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+        #endif
+
+XXH_FORCE_INLINE void XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT      acc,
+                                                  const xxh_u8* XXH_RESTRICT input,
+                                                  size_t                     len,
+                                                  const xxh_u8* XXH_RESTRICT secret,
+                                                  size_t                     secretSize,
+                                                  XXH3_f_accumulate          f_acc,
+                                                  XXH3_f_scrambleAcc         f_scramble) {
+    size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+    size_t const block_len         = XXH_STRIPE_LEN * nbStripesPerBlock;
+    size_t const nb_blocks         = (len - 1) / block_len;
+
+    size_t n;
+
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+
+    for (n = 0; n < nb_blocks; n++)
+    {
+        f_acc(acc, input + n * block_len, secret, nbStripesPerBlock);
+        f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
+    }
+
+    /* last partial block */
+    XXH_ASSERT(len > XXH_STRIPE_LEN);
+    {
+        size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
+        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
+        f_acc(acc, input + nb_blocks * block_len, secret, nbStripes);
+
+        /* last stripe */
+        {
+            const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
+        #define XXH_SECRET_LASTACC_START \
+            7 /* not aligned on 8, last secret is different from acc & scrambler */
+            XXH3_accumulate_512(acc, p,
+                                secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+        }
+    }
+}
+
+XXH_FORCE_INLINE xxh_u64 XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc,
+                                       const xxh_u8* XXH_RESTRICT  secret) {
+    return XXH3_mul128_fold64(acc[0] ^ XXH_readLE64(secret), acc[1] ^ XXH_readLE64(secret + 8));
+}
+
+static XXH64_hash_t
+XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) {
+    xxh_u64 result64 = start;
+    size_t  i        = 0;
+
+    for (i = 0; i < 4; i++)
+    {
+        result64 += XXH3_mix2Accs(acc + 2 * i, secret + 16 * i);
+        #if defined(__clang__)                              /* Clang */ \
+          && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
+          && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+          && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
+        /*
+         * UGLY HACK:
+         * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
+         * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
+         * XXH3_64bits, len == 256, Snapdragon 835:
+         *   without hack: 2063.7 MB/s
+         *   with hack:    2560.7 MB/s
+         */
+        XXH_COMPILER_GUARD(result64);
+        #endif
+    }
+
+    return XXH3_avalanche(result64);
+}
+
+        #define XXH3_INIT_ACC \
+            {XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
+             XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1}
+
+XXH_FORCE_INLINE XXH64_hash_t XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input,
+                                                         size_t                   len,
+                                                         const void* XXH_RESTRICT secret,
+                                                         size_t                   secretSize,
+                                                         XXH3_f_accumulate        f_acc,
+                                                         XXH3_f_scrambleAcc       f_scramble) {
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*) input, len, (const xxh_u8*) secret, secretSize,
+                                f_acc, f_scramble);
+
+    /* converge into final hash */
+    XXH_STATIC_ASSERT(sizeof(acc) == 64);
+            /* do not align on 8, so that the secret is different from the accumulator */
+        #define XXH_SECRET_MERGEACCS_START 11
+    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    return XXH3_mergeAccs(acc, (const xxh_u8*) secret + XXH_SECRET_MERGEACCS_START,
+                          (xxh_u64) len * XXH_PRIME64_1);
+}
+
+/*
+ * It's important for performance to transmit secret's size (when it's static)
+ * so that the compiler can properly optimize the vectorized loop.
+ * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
+ */
+XXH3_WITH_SECRET_INLINE XXH64_hash_t XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT   input,
+                                                                  size_t                     len,
+                                                                  XXH64_hash_t               seed64,
+                                                                  const xxh_u8* XXH_RESTRICT secret,
+                                                                  size_t secretLen) {
+    (void) seed64;
+    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate,
+                                      XXH3_scrambleAcc);
+}
+
+/*
+ * It's preferable for performance that XXH3_hashLong is not inlined,
+ * as it results in a smaller function for small data, easier to the instruction cache.
+ * Note that inside this no_inline function, we do inline the internal loop,
+ * and provide a statically defined secret size to allow optimization of vector loop.
+ */
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t XXH3_hashLong_64b_default(const void* XXH_RESTRICT   input,
+                                                               size_t                     len,
+                                                               XXH64_hash_t               seed64,
+                                                               const xxh_u8* XXH_RESTRICT secret,
+                                                               size_t secretLen) {
+    (void) seed64;
+    (void) secret;
+    (void) secretLen;
+    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                      XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*
+ * XXH3_hashLong_64b_withSeed():
+ * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
+ * and then use this key for long mode hashing.
+ *
+ * This operation is decently fast but nonetheless costs a little bit of time.
+ * Try to avoid it whenever possible (typically when seed==0).
+ *
+ * It's important for performance that XXH3_hashLong is not inlined. Not sure
+ * why (uop cache maybe?), but the difference is large and easily measurable.
+ */
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_hashLong_64b_withSeed_internal(const void*             input,
+                                    size_t                  len,
+                                    XXH64_hash_t            seed,
+                                    XXH3_f_accumulate       f_acc,
+                                    XXH3_f_scrambleAcc      f_scramble,
+                                    XXH3_f_initCustomSecret f_initSec) {
+        #if XXH_SIZE_OPT <= 0
+    if (seed == 0)
+        return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), f_acc,
+                                          f_scramble);
+        #endif
+    {
+        XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+        f_initSec(secret, seed);
+        return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), f_acc, f_scramble);
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH64_hash_t XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT   input,
+                                                      size_t                     len,
+                                                      XXH64_hash_t               seed,
+                                                      const xxh_u8* XXH_RESTRICT secret,
+                                                      size_t                     secretLen) {
+    (void) secret;
+    (void) secretLen;
+    return XXH3_hashLong_64b_withSeed_internal(input, len, seed, XXH3_accumulate, XXH3_scrambleAcc,
+                                               XXH3_initCustomSecret);
+}
+
+
+typedef XXH64_hash_t (*XXH3_hashLong64_f)(
+  const void* XXH_RESTRICT, size_t, XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH64_hash_t XXH3_64bits_internal(const void* XXH_RESTRICT input,
+                                                   size_t                   len,
+                                                   XXH64_hash_t             seed64,
+                                                   const void* XXH_RESTRICT secret,
+                                                   size_t                   secretLen,
+                                                   XXH3_hashLong64_f        f_hashLong) {
+    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+    /*
+     * If an action is to be taken if `secretLen` condition is not respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash.
+     * Also, note that function signature doesn't offer room to return an error.
+     */
+    if (len <= 16)
+        return XXH3_len_0to16_64b((const xxh_u8*) input, len, (const xxh_u8*) secret, seed64);
+    if (len <= 128)
+        return XXH3_len_17to128_64b((const xxh_u8*) input, len, (const xxh_u8*) secret, secretLen,
+                                    seed64);
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_len_129to240_64b((const xxh_u8*) input, len, (const xxh_u8*) secret, secretLen,
+                                     seed64);
+    return f_hashLong(input, len, seed64, (const xxh_u8*) secret, secretLen);
+}
+
+
+/* ===   Public entry point   === */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length) {
+    return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                XXH3_hashLong_64b_default);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* input,
+                                                   size_t                   length,
+                                                   XXH_NOESCAPE const void* secret,
+                                                   size_t                   secretSize) {
+    return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input,
+                                                 size_t                   length,
+                                                 XXH64_hash_t             seed) {
+    return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                XXH3_hashLong_64b_withSeed);
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input,
+                                                          size_t                   length,
+                                                          XXH_NOESCAPE const void* secret,
+                                                          size_t                   secretSize,
+                                                          XXH64_hash_t             seed) {
+    if (length <= XXH3_MIDSIZE_MAX)
+        return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*) secret, secretSize);
+}
+
+
+        /* ===   XXH3 streaming   === */
+        #ifndef XXH_NO_STREAM
+/*
+ * Malloc's a pointer that is always aligned to align.
+ *
+ * This must be freed with `XXH_alignedFree()`.
+ *
+ * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
+ * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
+ * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
+ *
+ * This underalignment previously caused a rather obvious crash which went
+ * completely unnoticed due to XXH3_createState() not actually being tested.
+ * Credit to RedSpah for noticing this bug.
+ *
+ * The alignment is done manually: Functions like posix_memalign or _mm_malloc
+ * are avoided: To maintain portability, we would have to write a fallback
+ * like this anyways, and besides, testing for the existence of library
+ * functions without relying on external build tools is impossible.
+ *
+ * The method is simple: Overallocate, manually align, and store the offset
+ * to the original behind the returned pointer.
+ *
+ * Align must be a power of 2 and 8 <= align <= 128.
+ */
+static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align) {
+    XXH_ASSERT(align <= 128 && align >= 8); /* range check */
+    XXH_ASSERT((align & (align - 1)) == 0); /* power of 2 */
+    XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
+    { /* Overallocate to make room for manual realignment and an offset byte */
+        xxh_u8* base = (xxh_u8*) XXH_malloc(s + align);
+        if (base != NULL)
+        {
+            /*
+             * Get the offset needed to align this pointer.
+             *
+             * Even if the returned pointer is aligned, there will always be
+             * at least one byte to store the offset to the original pointer.
+             */
+            size_t offset = align - ((size_t) base & (align - 1)); /* base % align */
+            /* Add the offset for the now-aligned pointer */
+            xxh_u8* ptr = base + offset;
+
+            XXH_ASSERT((size_t) ptr % align == 0);
+
+            /* Store the offset immediately before the returned pointer. */
+            ptr[-1] = (xxh_u8) offset;
+            return ptr;
+        }
+        return NULL;
+    }
+}
+/*
+ * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
+ * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
+ */
+static void XXH_alignedFree(void* p) {
+    if (p != NULL)
+    {
+        xxh_u8* ptr = (xxh_u8*) p;
+        /* Get the offset byte we added in XXH_malloc. */
+        xxh_u8 offset = ptr[-1];
+        /* Free the original malloc'd pointer */
+        xxh_u8* base = ptr - offset;
+        XXH_free(base);
+    }
+}
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Allocate an @ref XXH3_state_t.
+ *
+ * @return An allocated pointer of @ref XXH3_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH3_freeState().
+ */
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) {
+    XXH3_state_t* const state = (XXH3_state_t*) XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
+    if (state == NULL)
+        return NULL;
+    XXH3_INITSTATE(state);
+    return state;
+}
+
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Frees an @ref XXH3_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note Must be allocated with XXH3_createState().
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) {
+    XXH_alignedFree(statePtr);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t*       dst_state,
+                                   XXH_NOESCAPE const XXH3_state_t* src_state) {
+    XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
+}
+
+static void XXH3_reset_internal(XXH3_state_t* statePtr,
+                                XXH64_hash_t  seed,
+                                const void*   secret,
+                                size_t        secretSize) {
+    size_t const initStart  = offsetof(XXH3_state_t, bufferedSize);
+    size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
+    XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
+    XXH_ASSERT(statePtr != NULL);
+    /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
+    memset((char*) statePtr + initStart, 0, initLength);
+    statePtr->acc[0]    = XXH_PRIME32_3;
+    statePtr->acc[1]    = XXH_PRIME64_1;
+    statePtr->acc[2]    = XXH_PRIME64_2;
+    statePtr->acc[3]    = XXH_PRIME64_3;
+    statePtr->acc[4]    = XXH_PRIME64_4;
+    statePtr->acc[5]    = XXH_PRIME32_2;
+    statePtr->acc[6]    = XXH_PRIME64_5;
+    statePtr->acc[7]    = XXH_PRIME32_1;
+    statePtr->seed      = seed;
+    statePtr->useSeed   = (seed != 0);
+    statePtr->extSecret = (const unsigned char*) secret;
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    statePtr->secretLimit       = secretSize - XXH_STRIPE_LEN;
+    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) {
+    if (statePtr == NULL)
+        return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                          XXH_NOESCAPE const void*   secret,
+                                                          size_t                     secretSize) {
+    if (statePtr == NULL)
+        return XXH_ERROR;
+    XXH3_reset_internal(statePtr, 0, secret, secretSize);
+    if (secret == NULL)
+        return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN)
+        return XXH_ERROR;
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                        XXH64_hash_t               seed) {
+    if (statePtr == NULL)
+        return XXH_ERROR;
+    if (seed == 0)
+        return XXH3_64bits_reset(statePtr);
+    if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
+        XXH3_initCustomSecret(statePtr->customSecret, seed);
+    XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                    XXH_NOESCAPE const void*   secret,
+                                    size_t                     secretSize,
+                                    XXH64_hash_t               seed64) {
+    if (statePtr == NULL)
+        return XXH_ERROR;
+    if (secret == NULL)
+        return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN)
+        return XXH_ERROR;
+    XXH3_reset_internal(statePtr, seed64, secret, secretSize);
+    statePtr->useSeed = 1; /* always, even if seed64==0 */
+    return XXH_OK;
+}
+
+/*!
+ * @internal
+ * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
+ *
+ * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
+ *
+ * @param acc                Pointer to the 8 accumulator lanes
+ * @param nbStripesSoFarPtr  In/out pointer to the number of leftover stripes in the block*
+ * @param nbStripesPerBlock  Number of stripes in a block
+ * @param input              Input pointer
+ * @param nbStripes          Number of stripes to process
+ * @param secret             Secret pointer
+ * @param secretLimit        Offset of the last block in @p secret
+ * @param f_acc              Pointer to an XXH3_accumulate implementation
+ * @param f_scramble         Pointer to an XXH3_scrambleAcc implementation
+ * @return                   Pointer past the end of @p input after processing
+ */
+XXH_FORCE_INLINE const xxh_u8* XXH3_consumeStripes(xxh_u64* XXH_RESTRICT      acc,
+                                                   size_t* XXH_RESTRICT       nbStripesSoFarPtr,
+                                                   size_t                     nbStripesPerBlock,
+                                                   const xxh_u8* XXH_RESTRICT input,
+                                                   size_t                     nbStripes,
+                                                   const xxh_u8* XXH_RESTRICT secret,
+                                                   size_t                     secretLimit,
+                                                   XXH3_f_accumulate          f_acc,
+                                                   XXH3_f_scrambleAcc         f_scramble) {
+    const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
+    /* Process full blocks */
+    if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr))
+    {
+        /* Process the initial partial block... */
+        size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
+
+        do
+        {
+            /* Accumulate and scramble */
+            f_acc(acc, input, initialSecret, nbStripesThisIter);
+            f_scramble(acc, secret + secretLimit);
+            input += nbStripesThisIter * XXH_STRIPE_LEN;
+            nbStripes -= nbStripesThisIter;
+            /* Then continue the loop with the full block size */
+            nbStripesThisIter = nbStripesPerBlock;
+            initialSecret     = secret;
+        } while (nbStripes >= nbStripesPerBlock);
+        *nbStripesSoFarPtr = 0;
+    }
+    /* Process a partial block */
+    if (nbStripes > 0)
+    {
+        f_acc(acc, input, initialSecret, nbStripes);
+        input += nbStripes * XXH_STRIPE_LEN;
+        *nbStripesSoFarPtr += nbStripes;
+    }
+    /* Return end pointer */
+    return input;
+}
+
+            #ifndef XXH3_STREAM_USE_STACK
+                #if XXH_SIZE_OPT <= 0 \
+                  && !defined(__clang__) /* clang doesn't need additional stack space */
+                    #define XXH3_STREAM_USE_STACK 1
+                #endif
+            #endif
+/*
+ * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
+ */
+XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
+                                           const xxh_u8* XXH_RESTRICT       input,
+                                           size_t                           len,
+                                           XXH3_f_accumulate                f_acc,
+                                           XXH3_f_scrambleAcc               f_scramble) {
+    if (input == NULL)
+    {
+        XXH_ASSERT(len == 0);
+        return XXH_OK;
+    }
+
+    XXH_ASSERT(state != NULL);
+    {
+        const xxh_u8* const        bEnd = input + len;
+        const unsigned char* const secret =
+          (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+            #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
+        /* For some reason, gcc and MSVC seem to suffer greatly
+         * when operating accumulators directly into state.
+         * Operating into stack space seems to enable proper optimization.
+         * clang, on the other hand, doesn't seem to need this trick */
+        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
+        XXH_memcpy(acc, state->acc, sizeof(acc));
+            #else
+        xxh_u64* XXH_RESTRICT const acc = state->acc;
+            #endif
+        state->totalLen += len;
+        XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
+
+        /* small input : just fill in tmp buffer */
+        if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize)
+        {
+            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+            state->bufferedSize += (XXH32_hash_t) len;
+            return XXH_OK;
+        }
+
+            /* total input is now > XXH3_INTERNALBUFFER_SIZE */
+            #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
+        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
+
+        /*
+         * Internal buffer is partially filled (always, except at beginning)
+         * Complete it, then consume it.
+         */
+        if (state->bufferedSize)
+        {
+            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
+            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
+            input += loadSize;
+            XXH3_consumeStripes(acc, &state->nbStripesSoFar, state->nbStripesPerBlock,
+                                state->buffer, XXH3_INTERNALBUFFER_STRIPES, secret,
+                                state->secretLimit, f_acc, f_scramble);
+            state->bufferedSize = 0;
+        }
+        XXH_ASSERT(input < bEnd);
+        if (bEnd - input > XXH3_INTERNALBUFFER_SIZE)
+        {
+            size_t nbStripes = (size_t) (bEnd - 1 - input) / XXH_STRIPE_LEN;
+            input =
+              XXH3_consumeStripes(acc, &state->nbStripesSoFar, state->nbStripesPerBlock, input,
+                                  nbStripes, secret, state->secretLimit, f_acc, f_scramble);
+            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN,
+                       input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
+        }
+        /* Some remaining input (always) : buffer it */
+        XXH_ASSERT(input < bEnd);
+        XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
+        XXH_ASSERT(state->bufferedSize == 0);
+        XXH_memcpy(state->buffer, input, (size_t) (bEnd - input));
+        state->bufferedSize = (XXH32_hash_t) (bEnd - input);
+            #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
+        /* save stack accumulators into state */
+        XXH_memcpy(state->acc, acc, sizeof(acc));
+            #endif
+    }
+
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state,
+                                                XXH_NOESCAPE const void*   input,
+                                                size_t                     len) {
+    return XXH3_update(state, (const xxh_u8*) input, len, XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+
+XXH_FORCE_INLINE void
+XXH3_digest_long(XXH64_hash_t* acc, const XXH3_state_t* state, const unsigned char* secret) {
+    xxh_u8        lastStripe[XXH_STRIPE_LEN];
+    const xxh_u8* lastStripePtr;
+
+    /*
+     * Digest on a local copy. This way, the state remains unaltered, and it can
+     * continue ingesting more input afterwards.
+     */
+    XXH_memcpy(acc, state->acc, sizeof(state->acc));
+    if (state->bufferedSize >= XXH_STRIPE_LEN)
+    {
+        /* Consume remaining stripes then point to remaining data in buffer */
+        size_t const nbStripes      = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
+        size_t       nbStripesSoFar = state->nbStripesSoFar;
+        XXH3_consumeStripes(acc, &nbStripesSoFar, state->nbStripesPerBlock, state->buffer,
+                            nbStripes, secret, state->secretLimit, XXH3_accumulate,
+                            XXH3_scrambleAcc);
+        lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
+    }
+    else
+    { /* bufferedSize < XXH_STRIPE_LEN */
+        /* Copy to temp buffer */
+        size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
+        XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
+        XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
+        XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
+        lastStripePtr = lastStripe;
+    }
+    /* Last stripe */
+    XXH3_accumulate_512(acc, lastStripePtr, secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(XXH_NOESCAPE const XXH3_state_t* state) {
+    const unsigned char* const secret =
+      (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+    if (state->totalLen > XXH3_MIDSIZE_MAX)
+    {
+        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+        XXH3_digest_long(acc, state, secret);
+        return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START,
+                              (xxh_u64) state->totalLen * XXH_PRIME64_1);
+    }
+    /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
+    if (state->useSeed)
+        return XXH3_64bits_withSeed(state->buffer, (size_t) state->totalLen, state->seed);
+    return XXH3_64bits_withSecret(state->buffer, (size_t) (state->totalLen), secret,
+                                  state->secretLimit + XXH_STRIPE_LEN);
+}
+        #endif /* !XXH_NO_STREAM */
+
+
+/* ==========================================
+ * XXH3 128 bits (a.k.a XXH128)
+ * ==========================================
+ * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
+ * even without counting the significantly larger output size.
+ *
+ * For example, extra steps are taken to avoid the seed-dependent collisions
+ * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
+ *
+ * This strength naturally comes at the cost of some speed, especially on short
+ * lengths. Note that longer hashes are about as fast as the 64-bit version
+ * due to it using only a slight modification of the 64-bit loop.
+ *
+ * XXH128 is also more oriented towards 64-bit machines. It is still extremely
+ * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
+ */
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_1to3_128b(const xxh_u8* input,
+                                                            size_t        len,
+                                                            const xxh_u8* secret,
+                                                            XXH64_hash_t  seed) {
+    /* A doubled version of 1to3_64b with different constants. */
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(1 <= len && len <= 3);
+    XXH_ASSERT(secret != NULL);
+    /*
+     * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
+     * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
+     * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
+     */
+    {
+        xxh_u8 const  c1 = input[0];
+        xxh_u8 const  c2 = input[len >> 1];
+        xxh_u8 const  c3 = input[len - 1];
+        xxh_u32 const combinedl =
+          ((xxh_u32) c1 << 16) | ((xxh_u32) c2 << 24) | ((xxh_u32) c3 << 0) | ((xxh_u32) len << 8);
+        xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
+        xxh_u64 const bitflipl  = (XXH_readLE32(secret) ^ XXH_readLE32(secret + 4)) + seed;
+        xxh_u64 const bitfliph  = (XXH_readLE32(secret + 8) ^ XXH_readLE32(secret + 12)) - seed;
+        xxh_u64 const keyed_lo  = (xxh_u64) combinedl ^ bitflipl;
+        xxh_u64 const keyed_hi  = (xxh_u64) combinedh ^ bitfliph;
+        XXH128_hash_t h128;
+        h128.low64  = XXH64_avalanche(keyed_lo);
+        h128.high64 = XXH64_avalanche(keyed_hi);
+        return h128;
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_4to8_128b(const xxh_u8* input,
+                                                            size_t        len,
+                                                            const xxh_u8* secret,
+                                                            XXH64_hash_t  seed) {
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(4 <= len && len <= 8);
+    seed ^= (xxh_u64) XXH_swap32((xxh_u32) seed) << 32;
+    {
+        xxh_u32 const input_lo = XXH_readLE32(input);
+        xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
+        xxh_u64 const input_64 = input_lo + ((xxh_u64) input_hi << 32);
+        xxh_u64 const bitflip  = (XXH_readLE64(secret + 16) ^ XXH_readLE64(secret + 24)) + seed;
+        xxh_u64 const keyed    = input_64 ^ bitflip;
+
+        /* Shift len to the left to ensure it is even, this avoids even multiplies. */
+        XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
+
+        m128.high64 += (m128.low64 << 1);
+        m128.low64 ^= (m128.high64 >> 3);
+
+        m128.low64 = XXH_xorshift64(m128.low64, 35);
+        m128.low64 *= PRIME_MX2;
+        m128.low64  = XXH_xorshift64(m128.low64, 28);
+        m128.high64 = XXH3_avalanche(m128.high64);
+        return m128;
+    }
+}
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_9to16_128b(const xxh_u8* input,
+                                                             size_t        len,
+                                                             const xxh_u8* secret,
+                                                             XXH64_hash_t  seed) {
+    XXH_ASSERT(input != NULL);
+    XXH_ASSERT(secret != NULL);
+    XXH_ASSERT(9 <= len && len <= 16);
+    {
+        xxh_u64 const bitflipl = (XXH_readLE64(secret + 32) ^ XXH_readLE64(secret + 40)) - seed;
+        xxh_u64 const bitfliph = (XXH_readLE64(secret + 48) ^ XXH_readLE64(secret + 56)) + seed;
+        xxh_u64 const input_lo = XXH_readLE64(input);
+        xxh_u64       input_hi = XXH_readLE64(input + len - 8);
+        XXH128_hash_t m128     = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
+        /*
+         * Put len in the middle of m128 to ensure that the length gets mixed to
+         * both the low and high bits in the 128x64 multiply below.
+         */
+        m128.low64 += (xxh_u64) (len - 1) << 54;
+        input_hi ^= bitfliph;
+        /*
+         * Add the high 32 bits of input_hi to the high 32 bits of m128, then
+         * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
+         * the high 64 bits of m128.
+         *
+         * The best approach to this operation is different on 32-bit and 64-bit.
+         */
+        if (sizeof(void*) < sizeof(xxh_u64))
+        { /* 32-bit */
+            /*
+             * 32-bit optimized version, which is more readable.
+             *
+             * On 32-bit, it removes an ADC and delays a dependency between the two
+             * halves of m128.high64, but it generates an extra mask on 64-bit.
+             */
+            m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL)
+                         + XXH_mult32to64((xxh_u32) input_hi, XXH_PRIME32_2);
+        }
+        else
+        {
+            /*
+             * 64-bit optimized (albeit more confusing) version.
+             *
+             * Uses some properties of addition and multiplication to remove the mask:
+             *
+             * Let:
+             *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
+             *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
+             *    c = XXH_PRIME32_2
+             *
+             *    a + (b * c)
+             * Inverse Property: x + y - x == y
+             *    a + (b * (1 + c - 1))
+             * Distributive Property: x * (y + z) == (x * y) + (x * z)
+             *    a + (b * 1) + (b * (c - 1))
+             * Identity Property: x * 1 == x
+             *    a + b + (b * (c - 1))
+             *
+             * Substitute a, b, and c:
+             *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+             *
+             * Since input_hi.hi + input_hi.lo == input_hi, we get this:
+             *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
+             */
+            m128.high64 += input_hi + XXH_mult32to64((xxh_u32) input_hi, XXH_PRIME32_2 - 1);
+        }
+        /* m128 ^= XXH_swap64(m128 >> 64); */
+        m128.low64 ^= XXH_swap64(m128.high64);
+
+        { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
+            XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
+            h128.high64 += m128.high64 * XXH_PRIME64_2;
+
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+/*
+ * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
+ */
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_0to16_128b(const xxh_u8* input,
+                                                             size_t        len,
+                                                             const xxh_u8* secret,
+                                                             XXH64_hash_t  seed) {
+    XXH_ASSERT(len <= 16);
+    {
+        if (len > 8)
+            return XXH3_len_9to16_128b(input, len, secret, seed);
+        if (len >= 4)
+            return XXH3_len_4to8_128b(input, len, secret, seed);
+        if (len)
+            return XXH3_len_1to3_128b(input, len, secret, seed);
+        {
+            XXH128_hash_t h128;
+            xxh_u64 const bitflipl = XXH_readLE64(secret + 64) ^ XXH_readLE64(secret + 72);
+            xxh_u64 const bitfliph = XXH_readLE64(secret + 80) ^ XXH_readLE64(secret + 88);
+            h128.low64             = XXH64_avalanche(seed ^ bitflipl);
+            h128.high64            = XXH64_avalanche(seed ^ bitfliph);
+            return h128;
+        }
+    }
+}
+
+/*
+ * A bit slower than XXH3_mix16B, but handles multiply by zero better.
+ */
+XXH_FORCE_INLINE XXH128_hash_t XXH128_mix32B(XXH128_hash_t acc,
+                                             const xxh_u8* input_1,
+                                             const xxh_u8* input_2,
+                                             const xxh_u8* secret,
+                                             XXH64_hash_t  seed) {
+    acc.low64 += XXH3_mix16B(input_1, secret + 0, seed);
+    acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
+    acc.high64 += XXH3_mix16B(input_2, secret + 16, seed);
+    acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
+    return acc;
+}
+
+
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input,
+                                                               size_t                     len,
+                                                               const xxh_u8* XXH_RESTRICT secret,
+                                                               size_t       secretSize,
+                                                               XXH64_hash_t seed) {
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    (void) secretSize;
+    XXH_ASSERT(16 < len && len <= 128);
+
+    {
+        XXH128_hash_t acc;
+        acc.low64  = len * XXH_PRIME64_1;
+        acc.high64 = 0;
+
+        #if XXH_SIZE_OPT >= 1
+        {
+            /* Smaller, but slightly slower. */
+            unsigned int i = (unsigned int) (len - 1) / 32;
+            do
+            {
+                acc = XXH128_mix32B(acc, input + 16 * i, input + len - 16 * (i + 1),
+                                    secret + 32 * i, seed);
+            } while (i-- != 0);
+        }
+        #else
+        if (len > 32)
+        {
+            if (len > 64)
+            {
+                if (len > 96)
+                {
+                    acc = XXH128_mix32B(acc, input + 48, input + len - 64, secret + 96, seed);
+                }
+                acc = XXH128_mix32B(acc, input + 32, input + len - 48, secret + 64, seed);
+            }
+            acc = XXH128_mix32B(acc, input + 16, input + len - 32, secret + 32, seed);
+        }
+        acc = XXH128_mix32B(acc, input, input + len - 16, secret, seed);
+        #endif
+        {
+            XXH128_hash_t h128;
+            h128.low64  = acc.low64 + acc.high64;
+            h128.high64 = (acc.low64 * XXH_PRIME64_1) + (acc.high64 * XXH_PRIME64_4)
+                        + ((len - seed) * XXH_PRIME64_2);
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = (XXH64_hash_t) 0 - XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input,
+                                                             size_t                     len,
+                                                             const xxh_u8* XXH_RESTRICT secret,
+                                                             size_t                     secretSize,
+                                                             XXH64_hash_t               seed) {
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+    (void) secretSize;
+    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+    {
+        XXH128_hash_t acc;
+        unsigned      i;
+        acc.low64  = len * XXH_PRIME64_1;
+        acc.high64 = 0;
+        /*
+         *  We set as `i` as offset + 32. We do this so that unchanged
+         * `len` can be used as upper bound. This reaches a sweet spot
+         * where both x86 and aarch64 get simple agen and good codegen
+         * for the loop.
+         */
+        for (i = 32; i < 160; i += 32)
+        {
+            acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + i - 32, seed);
+        }
+        acc.low64  = XXH3_avalanche(acc.low64);
+        acc.high64 = XXH3_avalanche(acc.high64);
+        /*
+         * NB: `i <= len` will duplicate the last 32-bytes if
+         * len % 32 was zero. This is an unfortunate necessity to keep
+         * the hash result stable.
+         */
+        for (i = 160; i <= len; i += 32)
+        {
+            acc = XXH128_mix32B(acc, input + i - 32, input + i - 16,
+                                secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, seed);
+        }
+        /* last bytes */
+        acc = XXH128_mix32B(acc, input + len - 16, input + len - 32,
+                            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
+                            (XXH64_hash_t) 0 - seed);
+
+        {
+            XXH128_hash_t h128;
+            h128.low64  = acc.low64 + acc.high64;
+            h128.high64 = (acc.low64 * XXH_PRIME64_1) + (acc.high64 * XXH_PRIME64_4)
+                        + ((len - seed) * XXH_PRIME64_2);
+            h128.low64  = XXH3_avalanche(h128.low64);
+            h128.high64 = (XXH64_hash_t) 0 - XXH3_avalanche(h128.high64);
+            return h128;
+        }
+    }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t XXH3_hashLong_128b_internal(const void* XXH_RESTRICT   input,
+                                                           size_t                     len,
+                                                           const xxh_u8* XXH_RESTRICT secret,
+                                                           size_t                     secretSize,
+                                                           XXH3_f_accumulate          f_acc,
+                                                           XXH3_f_scrambleAcc         f_scramble) {
+    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
+
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*) input, len, secret, secretSize, f_acc,
+                                f_scramble);
+
+    /* converge into final hash */
+    XXH_STATIC_ASSERT(sizeof(acc) == 64);
+    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    {
+        XXH128_hash_t h128;
+        h128.low64 =
+          XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, (xxh_u64) len * XXH_PRIME64_1);
+        h128.high64 =
+          XXH3_mergeAccs(acc, secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+                         ~((xxh_u64) len * XXH_PRIME64_2));
+        return h128;
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong() is not inlined.
+ */
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t XXH3_hashLong_128b_default(const void* XXH_RESTRICT input,
+                                                                 size_t                   len,
+                                                                 XXH64_hash_t             seed64,
+                                                                 const void* XXH_RESTRICT secret,
+                                                                 size_t secretLen) {
+    (void) seed64;
+    (void) secret;
+    (void) secretLen;
+    return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                       XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+/*
+ * It's important for performance to pass @p secretLen (when it's static)
+ * to the compiler, so that it can properly optimize the vectorized loop.
+ *
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
+ */
+XXH3_WITH_SECRET_INLINE XXH128_hash_t XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input,
+                                                                    size_t                   len,
+                                                                    XXH64_hash_t             seed64,
+                                                                    const void* XXH_RESTRICT secret,
+                                                                    size_t secretLen) {
+    (void) seed64;
+    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*) secret, secretLen,
+                                       XXH3_accumulate, XXH3_scrambleAcc);
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input,
+                                     size_t                   len,
+                                     XXH64_hash_t             seed64,
+                                     XXH3_f_accumulate        f_acc,
+                                     XXH3_f_scrambleAcc       f_scramble,
+                                     XXH3_f_initCustomSecret  f_initSec) {
+    if (seed64 == 0)
+        return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), f_acc,
+                                           f_scramble);
+    {
+        XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+        f_initSec(secret, seed64);
+        return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*) secret, sizeof(secret),
+                                           f_acc, f_scramble);
+    }
+}
+
+/*
+ * It's important for performance that XXH3_hashLong is not inlined.
+ */
+XXH_NO_INLINE XXH128_hash_t XXH3_hashLong_128b_withSeed(const void*              input,
+                                                        size_t                   len,
+                                                        XXH64_hash_t             seed64,
+                                                        const void* XXH_RESTRICT secret,
+                                                        size_t                   secretLen) {
+    (void) secret;
+    (void) secretLen;
+    return XXH3_hashLong_128b_withSeed_internal(input, len, seed64, XXH3_accumulate,
+                                                XXH3_scrambleAcc, XXH3_initCustomSecret);
+}
+
+typedef XXH128_hash_t (*XXH3_hashLong128_f)(
+  const void* XXH_RESTRICT, size_t, XXH64_hash_t, const void* XXH_RESTRICT, size_t);
+
+XXH_FORCE_INLINE XXH128_hash_t XXH3_128bits_internal(const void*              input,
+                                                     size_t                   len,
+                                                     XXH64_hash_t             seed64,
+                                                     const void* XXH_RESTRICT secret,
+                                                     size_t                   secretLen,
+                                                     XXH3_hashLong128_f       f_hl128) {
+    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
+    /*
+     * If an action is to be taken if `secret` conditions are not respected,
+     * it should be done here.
+     * For now, it's a contract pre-condition.
+     * Adding a check and a branch here would cost performance at every hash.
+     */
+    if (len <= 16)
+        return XXH3_len_0to16_128b((const xxh_u8*) input, len, (const xxh_u8*) secret, seed64);
+    if (len <= 128)
+        return XXH3_len_17to128_128b((const xxh_u8*) input, len, (const xxh_u8*) secret, secretLen,
+                                     seed64);
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_len_129to240_128b((const xxh_u8*) input, len, (const xxh_u8*) secret, secretLen,
+                                      seed64);
+    return f_hl128(input, len, seed64, secret, secretLen);
+}
+
+
+/* ===   Public XXH128 API   === */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len) {
+    return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                 XXH3_hashLong_128b_default);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* input,
+                                                     size_t                   len,
+                                                     XXH_NOESCAPE const void* secret,
+                                                     size_t                   secretSize) {
+    return XXH3_128bits_internal(input, len, 0, (const xxh_u8*) secret, secretSize,
+                                 XXH3_hashLong_128b_withSecret);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* input,
+                                                   size_t                   len,
+                                                   XXH64_hash_t             seed) {
+    return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret),
+                                 XXH3_hashLong_128b_withSeed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input,
+                                                            size_t                   len,
+                                                            XXH_NOESCAPE const void* secret,
+                                                            size_t                   secretSize,
+                                                            XXH64_hash_t             seed) {
+    if (len <= XXH3_MIDSIZE_MAX)
+        return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) {
+    return XXH3_128bits_withSeed(input, len, seed);
+}
+
+
+        /* ===   XXH3 128-bit streaming   === */
+        #ifndef XXH_NO_STREAM
+/*
+ * All initialization and update functions are identical to 64-bit streaming variant.
+ * The only difference is the finalization routine.
+ */
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) {
+    return XXH3_64bits_reset(statePtr);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                           XXH_NOESCAPE const void*   secret,
+                                                           size_t                     secretSize) {
+    return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                                         XXH64_hash_t               seed) {
+    return XXH3_64bits_reset_withSeed(statePtr, seed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                     XXH_NOESCAPE const void*   secret,
+                                     size_t                     secretSize,
+                                     XXH64_hash_t               seed) {
+    return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state,
+                                                 XXH_NOESCAPE const void*   input,
+                                                 size_t                     len) {
+    return XXH3_64bits_update(state, input, len);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest(XXH_NOESCAPE const XXH3_state_t* state) {
+    const unsigned char* const secret =
+      (state->extSecret == NULL) ? state->customSecret : state->extSecret;
+    if (state->totalLen > XXH3_MIDSIZE_MAX)
+    {
+        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
+        XXH3_digest_long(acc, state, secret);
+        XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+        {
+            XXH128_hash_t h128;
+            h128.low64  = XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START,
+                                         (xxh_u64) state->totalLen * XXH_PRIME64_1);
+            h128.high64 = XXH3_mergeAccs(acc,
+                                         secret + state->secretLimit + XXH_STRIPE_LEN - sizeof(acc)
+                                           - XXH_SECRET_MERGEACCS_START,
+                                         ~((xxh_u64) state->totalLen * XXH_PRIME64_2));
+            return h128;
+        }
+    }
+    /* len <= XXH3_MIDSIZE_MAX : short code */
+    if (state->seed)
+        return XXH3_128bits_withSeed(state->buffer, (size_t) state->totalLen, state->seed);
+    return XXH3_128bits_withSecret(state->buffer, (size_t) (state->totalLen), secret,
+                                   state->secretLimit + XXH_STRIPE_LEN);
+}
+        #endif /* !XXH_NO_STREAM */
+        /* 128-bit utility functions */
+
+        #include <string.h> /* memcmp, memcpy */
+
+/* return : 1 is equal, 0 if different */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) {
+    /* note : XXH128_hash_t is compact, it has no padding byte */
+    return !(memcmp(&h1, &h2, sizeof(h1)));
+}
+
+/* This prototype is compatible with stdlib's qsort().
+ * @return : >0 if *h128_1  > *h128_2
+ *           <0 if *h128_1  < *h128_2
+ *           =0 if *h128_1 == *h128_2  */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2) {
+    XXH128_hash_t const h1   = *(const XXH128_hash_t*) h128_1;
+    XXH128_hash_t const h2   = *(const XXH128_hash_t*) h128_2;
+    int const           hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
+    /* note : bets that, in most cases, hash values are different */
+    if (hcmp)
+        return hcmp;
+    return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
+}
+
+
+/*======   Canonical representation   ======*/
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst,
+                                             XXH128_hash_t                    hash) {
+    XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN)
+    {
+        hash.high64 = XXH_swap64(hash.high64);
+        hash.low64  = XXH_swap64(hash.low64);
+    }
+    XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
+    XXH_memcpy((char*) dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src) {
+    XXH128_hash_t h;
+    h.high64 = XXH_readBE64(src);
+    h.low64  = XXH_readBE64(src->digest + 8);
+    return h;
+}
+
+
+        /* ==========================================
+ * Secret generators
+ * ==========================================
+ */
+        #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+
+XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128) {
+    XXH_writeLE64(dst, XXH_readLE64(dst) ^ h128.low64);
+    XXH_writeLE64((char*) dst + 8, XXH_readLE64((char*) dst + 8) ^ h128.high64);
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void*       secretBuffer,
+                                                 size_t                   secretSize,
+                                                 XXH_NOESCAPE const void* customSeed,
+                                                 size_t                   customSeedSize) {
+        #if (XXH_DEBUGLEVEL >= 1)
+    XXH_ASSERT(secretBuffer != NULL);
+    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+        #else
+    /* production mode, assert() are disabled */
+    if (secretBuffer == NULL)
+        return XXH_ERROR;
+    if (secretSize < XXH3_SECRET_SIZE_MIN)
+        return XXH_ERROR;
+        #endif
+
+    if (customSeedSize == 0)
+    {
+        customSeed     = XXH3_kSecret;
+        customSeedSize = XXH_SECRET_DEFAULT_SIZE;
+    }
+        #if (XXH_DEBUGLEVEL >= 1)
+    XXH_ASSERT(customSeed != NULL);
+        #else
+    if (customSeed == NULL)
+        return XXH_ERROR;
+        #endif
+
+    /* Fill secretBuffer with a copy of customSeed - repeat as needed */
+    {
+        size_t pos = 0;
+        while (pos < secretSize)
+        {
+            size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
+            memcpy((char*) secretBuffer + pos, customSeed, toCopy);
+            pos += toCopy;
+        }
+    }
+
+    {
+        size_t const       nbSeg16 = secretSize / 16;
+        size_t             n;
+        XXH128_canonical_t scrambler;
+        XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
+        for (n = 0; n < nbSeg16; n++)
+        {
+            XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
+            XXH3_combine16((char*) secretBuffer + n * 16, h128);
+        }
+        /* last segment */
+        XXH3_combine16((char*) secretBuffer + secretSize - 16,
+                       XXH128_hashFromCanonical(&scrambler));
+    }
+    return XXH_OK;
+}
+
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer,
+                                                 XXH64_hash_t       seed) {
+    XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
+    XXH3_initCustomSecret(secret, seed);
+    XXH_ASSERT(secretBuffer != NULL);
+    memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
+}
+
+
+        /* Pop our optimization override from above */
+        #if XXH_VECTOR == XXH_AVX2                      /* AVX2 */ \
+          && defined(__GNUC__) && !defined(__clang__)   /* GCC, not Clang */ \
+          && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
+            #pragma GCC pop_options
+        #endif
+
+    #endif /* XXH_NO_LONG_LONG */
+
+#endif /* XXH_NO_XXH3 */
+
+/*!
+ * @}
+ */
+#endif /* XXH_IMPLEMENTATION */
+
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif
diff --git a/src/external/common/zstd_common.cpp b/src/external/common/zstd_common.cpp
new file mode 100644
index 00000000..759452e5
--- /dev/null
+++ b/src/external/common/zstd_common.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#define ZSTD_DEPS_NEED_MALLOC
+#include "error_private.h"
+#include "zstd_internal.h"
+
+
+/*-****************************************
+*  Version
+******************************************/
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+#undef ZSTD_isError /* defined within zstd_internal.h */
+/*! ZSTD_isError() :
+ *  tells if a return value is an error code
+ *  symbol is required for external callers */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName() :
+ *  provides error code string from function result (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+/*! ZSTD_getError() :
+ *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+
+/*! ZSTD_getErrorString() :
+ *  provides error code string from enum */
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
diff --git a/src/external/common/zstd_deps.h b/src/external/common/zstd_deps.h
new file mode 100644
index 00000000..34aba498
--- /dev/null
+++ b/src/external/common/zstd_deps.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This file provides common libc dependencies that zstd requires.
+ * The purpose is to allow replacing this file with a custom implementation
+ * to compile zstd without libc support.
+ */
+
+/* Need:
+ * NULL
+ * INT_MAX
+ * UINT_MAX
+ * ZSTD_memcpy()
+ * ZSTD_memset()
+ * ZSTD_memmove()
+ */
+#ifndef ZSTD_DEPS_COMMON
+#define ZSTD_DEPS_COMMON
+
+/* Even though we use qsort_r only for the dictionary builder, the macro
+ * _GNU_SOURCE has to be declared *before* the inclusion of any standard
+ * header and the script 'combine.sh' combines the whole zstd source code
+ * in a single file.
+ */
+#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) \
+  || defined(__CYGWIN__) || defined(__MSYS__)
+    #if !defined(_GNU_SOURCE) && !defined(__ANDROID__) /* NDK doesn't ship qsort_r(). */
+        #define _GNU_SOURCE
+    #endif
+#endif
+
+#include <limits.h>
+#include <stddef.h>
+#include <string.h>
+
+#if defined(__GNUC__) && __GNUC__ >= 4
+    #define ZSTD_memcpy(d, s, l) __builtin_memcpy((d), (s), (l))
+    #define ZSTD_memmove(d, s, l) __builtin_memmove((d), (s), (l))
+    #define ZSTD_memset(p, v, l) __builtin_memset((p), (v), (l))
+#else
+    #define ZSTD_memcpy(d, s, l) memcpy((d), (s), (l))
+    #define ZSTD_memmove(d, s, l) memmove((d), (s), (l))
+    #define ZSTD_memset(p, v, l) memset((p), (v), (l))
+#endif
+
+#endif /* ZSTD_DEPS_COMMON */
+
+/* Need:
+ * ZSTD_malloc()
+ * ZSTD_free()
+ * ZSTD_calloc()
+ */
+#ifdef ZSTD_DEPS_NEED_MALLOC
+#ifndef ZSTD_DEPS_MALLOC
+    #define ZSTD_DEPS_MALLOC
+
+    #include <stdlib.h>
+
+    #define ZSTD_malloc(s) malloc(s)
+    #define ZSTD_calloc(n, s) calloc((n), (s))
+    #define ZSTD_free(p) free((p))
+
+#endif /* ZSTD_DEPS_MALLOC */
+#endif /* ZSTD_DEPS_NEED_MALLOC */
+
+/*
+ * Provides 64-bit math support.
+ * Need:
+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
+ */
+#ifdef ZSTD_DEPS_NEED_MATH64
+#ifndef ZSTD_DEPS_MATH64
+    #define ZSTD_DEPS_MATH64
+
+    #define ZSTD_div64(dividend, divisor) ((dividend) / (divisor))
+
+#endif /* ZSTD_DEPS_MATH64 */
+#endif /* ZSTD_DEPS_NEED_MATH64 */
+
+/* Need:
+ * assert()
+ */
+#ifdef ZSTD_DEPS_NEED_ASSERT
+#ifndef ZSTD_DEPS_ASSERT
+    #define ZSTD_DEPS_ASSERT
+
+    #include <assert.h>
+
+#endif /* ZSTD_DEPS_ASSERT */
+#endif /* ZSTD_DEPS_NEED_ASSERT */
+
+/* Need:
+ * ZSTD_DEBUG_PRINT()
+ */
+#ifdef ZSTD_DEPS_NEED_IO
+#ifndef ZSTD_DEPS_IO
+    #define ZSTD_DEPS_IO
+
+    #include <stdio.h>
+    #define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__)
+
+#endif /* ZSTD_DEPS_IO */
+#endif /* ZSTD_DEPS_NEED_IO */
+
+/* Only requested when <stdint.h> is known to be present.
+ * Need:
+ * intptr_t
+ */
+#ifdef ZSTD_DEPS_NEED_STDINT
+#ifndef ZSTD_DEPS_STDINT
+    #define ZSTD_DEPS_STDINT
+
+    #include <stdint.h>
+
+#endif /* ZSTD_DEPS_STDINT */
+#endif /* ZSTD_DEPS_NEED_STDINT */
diff --git a/src/external/common/zstd_internal.h b/src/external/common/zstd_internal.h
new file mode 100644
index 00000000..b0589857
--- /dev/null
+++ b/src/external/common/zstd_internal.h
@@ -0,0 +1,399 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/* this module contains definitions which must be identical
+ * across compression, decompression and dictBuilder.
+ * It also contains a few functions useful to at least 2 of them
+ * and which benefit from being inlined */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "compiler.h"
+#include "cpu.h"
+#include "mem.h"
+#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
+#include "error_private.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include "../zstd.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#include "huf.h"
+#ifndef XXH_STATIC_LINKING_ONLY
+    #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
+#endif
+#include "xxhash.h" /* XXH_reset, update, digest */
+#ifndef ZSTD_NO_TRACE
+    #include "zstd_trace.h"
+#else
+    #define ZSTD_TRACE 0
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
+#define ZSTD_isError ERR_isError /* for inlining */
+#define FSE_isError ERR_isError
+#define HUF_isError ERR_isError
+
+
+/*-*************************************
+*  shared macros
+***************************************/
+#undef MIN
+#undef MAX
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define BOUNDED(min, val, max) (MAX(min, MIN(val, max)))
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_NUM (1 << 12)
+
+#define ZSTD_REP_NUM 3 /* number of repcodes */
+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8};
+
+#define KB *(1 << 10)
+#define MB *(1 << 20)
+#define GB *(1U << 30)
+
+#define BIT7 128
+#define BIT6 64
+#define BIT5 32
+#define BIT4 16
+#define BIT1 2
+#define BIT0 1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8};
+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4};
+
+#define ZSTD_FRAMEIDSIZE 4 /* magic number size */
+
+#define ZSTD_BLOCKHEADERSIZE \
+    3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum {
+    bt_raw,
+    bt_rle,
+    bt_compressed,
+    bt_reserved
+} blockType_e;
+
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
+#define MIN_SEQUENCES_SIZE 1                                  /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
+#define MIN_LITERALS_FOR_4_STREAMS 6
+
+typedef enum {
+    set_basic,
+    set_rle,
+    set_compressed,
+    set_repeat
+} symbolEncodingType_e;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+
+#define Litbits 8
+#define LitHufLog 11
+#define MaxLit ((1 << Litbits) - 1)
+#define MaxML 52
+#define MaxLL 35
+#define DefaultMaxOff 28
+#define MaxOff 31
+#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog 9
+#define LLFSELog 9
+#define OffFSELog 8
+#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
+#define MaxMLBits 16
+#define MaxLLBits 16
+
+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+/* Each table cannot take more than #symbols * FSELog bits */
+#define ZSTD_MAX_FSE_HEADERS_SIZE \
+    (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
+
+static UNUSED_ATTR const U8  LL_bits[MaxLL + 1]        = {0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,
+                                                          0, 0, 0, 0, 1, 1,  1,  1,  2,  2,  3,  3,
+                                                          4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+                                                          2, 1, 1, 1, 2, 2, 2, 2, 2,  2,  2,  2,
+                                                          2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};
+#define LL_DEFAULTNORMLOG 6 /* for static allocation */
+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const U8 ML_bits[MaxML + 1] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0, 0,
+  0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML + 1] = {
+  1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,  1,  1,  1,  1,  1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};
+#define ML_DEFAULTNORMLOG 6 /* for static allocation */
+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff + 1] = {
+  1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
+#define OF_DEFAULTNORMLOG 5 /* for static allocation */
+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) {
+#if defined(ZSTD_ARCH_ARM_NEON)
+    vst1_u8((uint8_t*) dst, vld1_u8((const uint8_t*) src));
+#else
+    ZSTD_memcpy(dst, src, 8);
+#endif
+}
+#define COPY8(d, s) \
+    do \
+    { \
+        ZSTD_copy8(d, s); \
+        d += 8; \
+        s += 8; \
+    } while (0)
+
+/* Need to use memmove here since the literal buffer can now be located within
+   the dst buffer. In circumstances where the op "catches up" to where the
+   literal buffer is, there can be partial overlaps in this call on the final
+   copy if the literal is being shifted by less than 16 bytes. */
+static void ZSTD_copy16(void* dst, const void* src) {
+#if defined(ZSTD_ARCH_ARM_NEON)
+    vst1q_u8((uint8_t*) dst, vld1q_u8((const uint8_t*) src));
+#elif defined(ZSTD_ARCH_X86_SSE2)
+    _mm_storeu_si128((__m128i*) dst, _mm_loadu_si128((const __m128i*) src));
+#elif defined(__clang__)
+    ZSTD_memmove(dst, src, 16);
+#else
+    /* ZSTD_memmove is not inlined properly by gcc */
+    BYTE copy16_buf[16];
+    ZSTD_memcpy(copy16_buf, src, 16);
+    ZSTD_memcpy(dst, copy16_buf, 16);
+#endif
+}
+#define COPY16(d, s) \
+    do \
+    { \
+        ZSTD_copy16(d, s); \
+        d += 16; \
+        s += 16; \
+    } while (0)
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
+
+/*! ZSTD_wildcopy() :
+ *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR void
+ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) {
+    ptrdiff_t   diff = (BYTE*) dst - (const BYTE*) src;
+    const BYTE* ip   = (const BYTE*) src;
+    BYTE*       op   = (BYTE*) dst;
+    BYTE* const oend = op + length;
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN)
+    {
+        /* Handle short offset copies. */
+        do
+        {
+            COPY8(op, ip);
+        } while (op < oend);
+    }
+    else
+    {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first COPY16() call because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities. Since it is almost certain to be short, only do
+         * one COPY16() in the first call. Then, do two calls per loop since
+         * at that point it is more likely to have a high trip count.
+         */
+        ZSTD_copy16(op, ip);
+        if (16 >= length)
+            return;
+        op += 16;
+        ip += 16;
+        do
+        {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        } while (op < oend);
+    }
+}
+
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) {
+    size_t const length = MIN(dstCapacity, srcSize);
+    if (length > 0)
+    {
+        ZSTD_memcpy(dst, src, length);
+    }
+    return length;
+}
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+    ZSTD_bm_buffered = 0, /* Buffer the input/output */
+    ZSTD_bm_stable   = 1  /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
+
+
+/*-*******************************************
+*  Private declarations
+*********************************************/
+typedef struct seqDef_s {
+    U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
+    U16 litLength;
+    U16 mlBase; /* mlBase == matchLength - MINMATCH */
+} seqDef;
+
+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
+typedef enum {
+    ZSTD_llt_none          = 0, /* no longLengthType */
+    ZSTD_llt_literalLength = 1, /* represents a long literal */
+    ZSTD_llt_matchLength   = 2  /* represents a long match */
+} ZSTD_longLengthType_e;
+
+typedef struct {
+    seqDef* sequencesStart;
+    seqDef* sequences; /* ptr to end of sequences */
+    BYTE*   litStart;
+    BYTE*   lit; /* ptr to end of literals */
+    BYTE*   llCode;
+    BYTE*   mlCode;
+    BYTE*   ofCode;
+    size_t  maxNbSeq;
+    size_t  maxNbLit;
+
+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000.
+     */
+    ZSTD_longLengthType_e longLengthType;
+    U32 longLengthPos; /* Index of the sequence to apply long length modification to */
+} seqStore_t;
+
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
+
+/**
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore,
+                                                      seqDef const*     seq) {
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength   = seq->litLength;
+    seqLen.matchLength = seq->mlBase + MINMATCH;
+    if (seqStore->longLengthPos == (U32) (seq - seqStore->sequencesStart))
+    {
+        if (seqStore->longLengthType == ZSTD_llt_literalLength)
+        {
+            seqLen.litLength += 0x10000;
+        }
+        if (seqStore->longLengthType == ZSTD_llt_matchLength)
+        {
+            seqLen.matchLength += 0x10000;
+        }
+    }
+    return seqLen;
+}
+
+/**
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ *       similarly, before using `decompressedBound`, check for errors using:
+ *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+    size_t             nbBlocks;
+    size_t             compressedSize;
+    unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo; /* decompress & legacy */
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
+int               ZSTD_seqToCodes(
+                const seqStore_t*
+                  seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(
+  ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
+
+
+typedef struct {
+    blockType_e blockType;
+    U32         lastBlock;
+    U32         origSize;
+} blockProperties_t; /* declared here for decompress and fullbench */
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+/*  Used by: decompress, fullbench */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr);
+
+/*! ZSTD_decodeSeqHeaders() :
+ *  decode sequence header from src */
+/*  Used by: zstd_decompress_block, fullbench */
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize);
+
+/**
+ * @returns true iff the CPU supports dynamic BMI2 dispatch.
+ */
+MEM_STATIC int ZSTD_cpuSupportsBmi2(void) {
+    ZSTD_cpuid_t cpuid = ZSTD_cpuid();
+    return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_CCOMMON_H_MODULE */
diff --git a/src/external/common/zstd_trace.h b/src/external/common/zstd_trace.h
new file mode 100644
index 00000000..d9b53904
--- /dev/null
+++ b/src/external/common/zstd_trace.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_TRACE_H
+#define ZSTD_TRACE_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+/* weak symbol support
+ * For now, enable conservatively:
+ * - Only GNUC
+ * - Only ELF
+ * - Only x86-64, i386, aarch64 and risc-v.
+ * Also, explicitly disable on platforms known not to work so they aren't
+ * forgotten in the future.
+ */
+#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && defined(__ELF__) \
+  && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) \
+      || defined(__aarch64__) || defined(__riscv)) \
+  && !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__) \
+  && !defined(_AIX)
+    #define ZSTD_HAVE_WEAK_SYMBOLS 1
+#else
+    #define ZSTD_HAVE_WEAK_SYMBOLS 0
+#endif
+#if ZSTD_HAVE_WEAK_SYMBOLS
+    #define ZSTD_WEAK_ATTR __attribute__((__weak__))
+#else
+    #define ZSTD_WEAK_ATTR
+#endif
+
+/* Only enable tracing when weak symbols are available. */
+#ifndef ZSTD_TRACE
+    #define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
+#endif
+
+#if ZSTD_TRACE
+
+struct ZSTD_CCtx_s;
+struct ZSTD_DCtx_s;
+struct ZSTD_CCtx_params_s;
+
+typedef struct {
+    /**
+     * ZSTD_VERSION_NUMBER
+     *
+     * This is guaranteed to be the first member of ZSTD_trace.
+     * Otherwise, this struct is not stable between versions. If
+     * the version number does not match your expectation, you
+     * should not interpret the rest of the struct.
+     */
+    unsigned version;
+    /**
+     * Non-zero if streaming (de)compression is used.
+     */
+    unsigned streaming;
+    /**
+     * The dictionary ID.
+     */
+    unsigned dictionaryID;
+    /**
+     * Is the dictionary cold?
+     * Only set on decompression.
+     */
+    unsigned dictionaryIsCold;
+    /**
+     * The dictionary size or zero if no dictionary.
+     */
+    size_t dictionarySize;
+    /**
+     * The uncompressed size of the data.
+     */
+    size_t uncompressedSize;
+    /**
+     * The compressed size of the data.
+     */
+    size_t compressedSize;
+    /**
+     * The fully resolved CCtx parameters (NULL on decompression).
+     */
+    struct ZSTD_CCtx_params_s const* params;
+    /**
+     * The ZSTD_CCtx pointer (NULL on decompression).
+     */
+    struct ZSTD_CCtx_s const* cctx;
+    /**
+     * The ZSTD_DCtx pointer (NULL on compression).
+     */
+    struct ZSTD_DCtx_s const* dctx;
+} ZSTD_Trace;
+
+/**
+ * A tracing context. It must be 0 when tracing is disabled.
+ * Otherwise, any non-zero value returned by a tracing begin()
+ * function is presented to any subsequent calls to end().
+ *
+ * Any non-zero value is treated as tracing is enabled and not
+ * interpreted by the library.
+ *
+ * Two possible uses are:
+ * * A timestamp for when the begin() function was called.
+ * * A unique key identifying the (de)compression, like the
+ *   address of the [dc]ctx pointer if you need to track
+ *   more information than just a timestamp.
+ */
+typedef unsigned long long ZSTD_TraceCtx;
+
+/**
+ * Trace the beginning of a compression call.
+ * @param cctx The dctx pointer for the compression.
+ *             It can be used as a key to map begin() to end().
+ * @returns Non-zero if tracing is enabled. The return value is
+ *          passed to ZSTD_trace_compress_end().
+ */
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx);
+
+/**
+ * Trace the end of a compression call.
+ * @param ctx The return value of ZSTD_trace_compress_begin().
+ * @param trace The zstd tracing info.
+ */
+ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace);
+
+/**
+ * Trace the beginning of a decompression call.
+ * @param dctx The dctx pointer for the decompression.
+ *             It can be used as a key to map begin() to end().
+ * @returns Non-zero if tracing is enabled. The return value is
+ *          passed to ZSTD_trace_compress_end().
+ */
+ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx);
+
+/**
+ * Trace the end of a decompression call.
+ * @param ctx The return value of ZSTD_trace_decompress_begin().
+ * @param trace The zstd tracing info.
+ */
+ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace);
+
+#endif /* ZSTD_TRACE */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_TRACE_H */
diff --git a/src/external/decompress/huf_decompress.cpp b/src/external/decompress/huf_decompress.cpp
new file mode 100644
index 00000000..31eb5ace
--- /dev/null
+++ b/src/external/decompress/huf_decompress.cpp
@@ -0,0 +1,2184 @@
+/* ******************************************************************
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
+#include "../common/compiler.h"
+#include "../common/bitstream.h" /* BIT_* */
+#include "../common/fse.h"       /* to compress headers */
+#include "../common/huf.h"
+#include "../common/error_private.h"
+#include "../common/zstd_internal.h"
+#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+
+#define HUF_DECODER_FAST_TABLELOG 11
+
+/* **************************************************************
+*  Macros
+****************************************************************/
+
+#ifdef HUF_DISABLE_FAST_DECODE
+    #define HUF_ENABLE_FAST_DECODE 0
+#else
+    #define HUF_ENABLE_FAST_DECODE 1
+#endif
+
+/* These two optional macros force the use one way or another of the two
+ * Huffman decompression implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(HUF_FORCE_DECOMPRESS_X1) && defined(HUF_FORCE_DECOMPRESS_X2)
+    #error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#endif
+
+/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
+ * supported at runtime, so we can add the BMI2 target attribute.
+ * When it is disabled, we will still get BMI2 if it is enabled statically.
+ */
+#if DYNAMIC_BMI2
+    #define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
+#else
+    #define HUF_FAST_BMI2_ATTRS
+#endif
+
+#ifdef __cplusplus
+    #define HUF_EXTERN_C extern "C"
+#else
+    #define HUF_EXTERN_C
+#endif
+#define HUF_ASM_DECL HUF_EXTERN_C
+
+#if DYNAMIC_BMI2
+    #define HUF_NEED_BMI2_FUNCTION 1
+#else
+    #define HUF_NEED_BMI2_FUNCTION 0
+#endif
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+
+
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+
+/* **************************************************************
+*  BMI2 Variant Wrappers
+****************************************************************/
+typedef size_t (*HUF_DecompressUsingDTableFn)(
+  void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+
+#if DYNAMIC_BMI2
+
+    #define HUF_DGEN(fn) \
+\
+        static size_t fn##_default(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, \
+                                   const HUF_DTable* DTable) { \
+            return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+        } \
+\
+        static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2(void* dst, size_t dstSize, const void* cSrc, \
+                                                      size_t cSrcSize, const HUF_DTable* DTable) { \
+            return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+        } \
+\
+        static size_t fn(void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, \
+                         HUF_DTable const* DTable, int flags) { \
+            if (flags & HUF_flags_bmi2) \
+            { \
+                return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
+            } \
+            return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
+        }
+
+#else
+
+    #define HUF_DGEN(fn) \
+        static size_t fn(void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, \
+                         HUF_DTable const* DTable, int flags) { \
+            (void) flags; \
+            return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+        }
+
+#endif
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+typedef struct {
+    BYTE maxTableLog;
+    BYTE tableType;
+    BYTE tableLog;
+    BYTE reserved;
+} DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) {
+    DTableDesc dtd;
+    ZSTD_memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+static size_t HUF_initFastDStream(BYTE const* ip) {
+    BYTE const   lastByte     = ip[7];
+    size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
+    size_t const value        = MEM_readLEST(ip) | 1;
+    assert(bitsConsumed <= 8);
+    assert(sizeof(size_t) == 8);
+    return value << bitsConsumed;
+}
+
+
+/**
+ * The input/output arguments to the Huffman fast decoding loop:
+ *
+ * ip [in/out] - The input pointers, must be updated to reflect what is consumed.
+ * op [in/out] - The output pointers, must be updated to reflect what is written.
+ * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
+ * dt [in] - The decoding table.
+ * ilowest [in] - The beginning of the valid range of the input. Decoders may read
+ *                down to this pointer. It may be below iend[0].
+ * oend [in] - The end of the output stream. op[3] must not cross oend.
+ * iend [in] - The end of each input stream. ip[i] may cross iend[i],
+ *             as long as it is above ilowest, but that indicates corruption.
+ */
+typedef struct {
+    BYTE const* ip[4];
+    BYTE*       op[4];
+    U64         bits[4];
+    void const* dt;
+    BYTE const* ilowest;
+    BYTE*       oend;
+    BYTE const* iend[4];
+} HUF_DecompressFastArgs;
+
+typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
+
+/**
+ * Initializes args for the fast decoding loop.
+ * @returns 1 on success
+ *          0 if the fallback implementation should be used.
+ *          Or an error code on failure.
+ */
+static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args,
+                                          void*                   dst,
+                                          size_t                  dstSize,
+                                          void const*             src,
+                                          size_t                  srcSize,
+                                          const HUF_DTable*       DTable) {
+    void const* dt    = DTable + 1;
+    U32 const   dtLog = HUF_getDTableDesc(DTable).tableLog;
+
+    const BYTE* const istart = (const BYTE*) src;
+
+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*) dst, dstSize);
+
+    /* The fast decoding loop assumes 64-bit little-endian.
+     * This condition is false on x32.
+     */
+    if (!MEM_isLittleEndian() || MEM_32bits())
+        return 0;
+
+    /* Avoid nullptr addition */
+    if (dstSize == 0)
+        return 0;
+    assert(dst != NULL);
+
+    /* strict minimum : jump table + 1 byte per stream */
+    if (srcSize < 10)
+        return ERROR(corruption_detected);
+
+    /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
+     * If table log is not correct at this point, fallback to the old decoder.
+     * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
+     */
+    if (dtLog != HUF_DECODER_FAST_TABLELOG)
+        return 0;
+
+    /* Read the jump table. */
+    {
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart + 2);
+        size_t const length3 = MEM_readLE16(istart + 4);
+        size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
+        args->iend[0]        = istart + 6; /* jumpTable */
+        args->iend[1]        = args->iend[0] + length1;
+        args->iend[2]        = args->iend[1] + length2;
+        args->iend[3]        = args->iend[2] + length3;
+
+        /* HUF_initFastDStream() requires this, and this small of an input
+         * won't benefit from the ASM loop anyways.
+         */
+        if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
+            return 0;
+        if (length4 > srcSize)
+            return ERROR(corruption_detected); /* overflow */
+    }
+    /* ip[] contains the position that is currently loaded into bits[]. */
+    args->ip[0] = args->iend[1] - sizeof(U64);
+    args->ip[1] = args->iend[2] - sizeof(U64);
+    args->ip[2] = args->iend[3] - sizeof(U64);
+    args->ip[3] = (BYTE const*) src + srcSize - sizeof(U64);
+
+    /* op[] contains the output pointers. */
+    args->op[0] = (BYTE*) dst;
+    args->op[1] = args->op[0] + (dstSize + 3) / 4;
+    args->op[2] = args->op[1] + (dstSize + 3) / 4;
+    args->op[3] = args->op[2] + (dstSize + 3) / 4;
+
+    /* No point to call the ASM loop for tiny outputs. */
+    if (args->op[3] >= oend)
+        return 0;
+
+    /* bits[] is the bit container.
+        * It is read from the MSB down to the LSB.
+        * It is shifted left as it is read, and zeros are
+        * shifted in. After the lowest valid bit a 1 is
+        * set, so that CountTrailingZeros(bits[]) can be used
+        * to count how many bits we've consumed.
+        */
+    args->bits[0] = HUF_initFastDStream(args->ip[0]);
+    args->bits[1] = HUF_initFastDStream(args->ip[1]);
+    args->bits[2] = HUF_initFastDStream(args->ip[2]);
+    args->bits[3] = HUF_initFastDStream(args->ip[3]);
+
+    /* The decoders must be sure to never read beyond ilowest.
+     * This is lower than iend[0], but allowing decoders to read
+     * down to ilowest can allow an extra iteration or two in the
+     * fast loop.
+     */
+    args->ilowest = istart;
+
+    args->oend = oend;
+    args->dt   = dt;
+
+    return 1;
+}
+
+static size_t HUF_initRemainingDStream(BIT_DStream_t*                bit,
+                                       HUF_DecompressFastArgs const* args,
+                                       int                           stream,
+                                       BYTE*                         segmentEnd) {
+    /* Validate that we haven't overwritten. */
+    if (args->op[stream] > segmentEnd)
+        return ERROR(corruption_detected);
+    /* Validate that we haven't read beyond iend[].
+        * Note that ip[] may be < iend[] because the MSB is
+        * the next bit to read, and we may have consumed 100%
+        * of the stream, so down to iend[i] - 8 is valid.
+        */
+    if (args->ip[stream] < args->iend[stream] - 8)
+        return ERROR(corruption_detected);
+
+    /* Construct the BIT_DStream_t. */
+    assert(sizeof(size_t) == 8);
+    bit->bitContainer = MEM_readLEST(args->ip[stream]);
+    bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
+    bit->start        = (const char*) args->ilowest;
+    bit->limitPtr     = bit->start + sizeof(size_t);
+    bit->ptr          = (const char*) args->ip[stream];
+
+    return 0;
+}
+
+/* Calls X(N) for each stream 0, 1, 2, 3. */
+#define HUF_4X_FOR_EACH_STREAM(X) \
+    do \
+    { \
+        X(0); \
+        X(1); \
+        X(2); \
+        X(3); \
+    } while (0)
+
+/* Calls X(N, var) for each stream 0, 1, 2, 3. */
+#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
+    do \
+    { \
+        X(0, (var)); \
+        X(1, (var)); \
+        X(2, (var)); \
+        X(3, (var)); \
+    } while (0)
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+typedef struct {
+    BYTE nbBits;
+    BYTE byte;
+} HUF_DEltX1; /* single-symbol decoding */
+
+/**
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
+ * a time.
+ */
+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+    U64 D4;
+    if (MEM_isLittleEndian())
+    {
+        D4 = (U64) ((symbol << 8) + nbBits);
+    }
+    else
+    {
+        D4 = (U64) (symbol + (nbBits << 8));
+    }
+    assert(D4 < (1U << 16));
+    D4 *= 0x0001000100010001ULL;
+    return D4;
+}
+
+/**
+ * Increase the tableLog to targetTableLog and rescales the stats.
+ * If tableLog > targetTableLog this is a no-op.
+ * @returns New tableLog
+ */
+static U32
+HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog) {
+    if (tableLog > targetTableLog)
+        return tableLog;
+    if (tableLog < targetTableLog)
+    {
+        U32 const scale = targetTableLog - tableLog;
+        U32       s;
+        /* Increase the weight for all non-zero probability symbols by scale. */
+        for (s = 0; s < nbSymbols; ++s)
+        {
+            huffWeight[s] += (BYTE) ((huffWeight[s] == 0) ? 0 : scale);
+        }
+        /* Update rankVal to reflect the new weights.
+         * All weights except 0 get moved to weight + scale.
+         * Weights [1, scale] are empty.
+         */
+        for (s = targetTableLog; s > scale; --s)
+        {
+            rankVal[s] = rankVal[s - scale];
+        }
+        for (s = scale; s > 0; --s)
+        {
+            rankVal[s] = 0;
+        }
+    }
+    return targetTableLog;
+}
+
+typedef struct {
+    U32  rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+    U32  rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
+    U32  statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+    BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+} HUF_ReadDTableX1_Workspace;
+
+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable,
+                             const void* src,
+                             size_t      srcSize,
+                             void*       workSpace,
+                             size_t      wkspSize,
+                             int         flags) {
+    U32                         tableLog  = 0;
+    U32                         nbSymbols = 0;
+    size_t                      iSize;
+    void* const                 dtPtr = DTable + 1;
+    HUF_DEltX1* const           dt    = (HUF_DEltX1*) dtPtr;
+    HUF_ReadDTableX1_Workspace* wksp  = (HUF_ReadDTableX1_Workspace*) workSpace;
+
+    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
+    if (sizeof(*wksp) > wkspSize)
+        return ERROR(tableLog_tooLarge);
+
+    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
+
+    iSize =
+      HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols,
+                         &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags);
+    if (HUF_isError(iSize))
+        return iSize;
+
+
+    /* Table header */
+    {
+        DTableDesc dtd            = HUF_getDTableDesc(DTable);
+        U32 const  maxTableLog    = dtd.maxTableLog + 1;
+        U32 const  targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
+        tableLog =
+          HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
+        if (tableLog > (U32) (dtd.maxTableLog + 1))
+            return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog  = (BYTE) tableLog;
+        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Compute symbols and rankStart given rankVal:
+     *
+     * rankVal already contains the number of values of each weight.
+     *
+     * symbols contains the symbols ordered by weight. First are the rankVal[0]
+     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
+     * symbols[0] is filled (but unused) to avoid a branch.
+     *
+     * rankStart contains the offset where each rank belongs in the DTable.
+     * rankStart[0] is not filled because there are no entries in the table for
+     * weight 0.
+     */
+    {
+        int       n;
+        U32       nextRankStart = 0;
+        int const unroll        = 4;
+        int const nLimit        = (int) nbSymbols - unroll + 1;
+        for (n = 0; n < (int) tableLog + 1; n++)
+        {
+            U32 const curr = nextRankStart;
+            nextRankStart += wksp->rankVal[n];
+            wksp->rankStart[n] = curr;
+        }
+        for (n = 0; n < nLimit; n += unroll)
+        {
+            int u;
+            for (u = 0; u < unroll; ++u)
+            {
+                size_t const w                      = wksp->huffWeight[n + u];
+                wksp->symbols[wksp->rankStart[w]++] = (BYTE) (n + u);
+            }
+        }
+        for (; n < (int) nbSymbols; ++n)
+        {
+            size_t const w                      = wksp->huffWeight[n];
+            wksp->symbols[wksp->rankStart[w]++] = (BYTE) n;
+        }
+    }
+
+    /* fill DTable
+     * We fill all entries of each weight in order.
+     * That way length is a constant for each iteration of the outer loop.
+     * We can switch based on the length to a different inner loop which is
+     * optimized for that particular case.
+     */
+    {
+        U32 w;
+        int symbol    = wksp->rankVal[0];
+        int rankStart = 0;
+        for (w = 1; w < tableLog + 1; ++w)
+        {
+            int const  symbolCount = wksp->rankVal[w];
+            int const  length      = (1 << w) >> 1;
+            int        uStart      = rankStart;
+            BYTE const nbBits      = (BYTE) (tableLog + 1 - w);
+            int        s;
+            int        u;
+            switch (length)
+            {
+            case 1 :
+                for (s = 0; s < symbolCount; ++s)
+                {
+                    HUF_DEltX1 D;
+                    D.byte     = wksp->symbols[symbol + s];
+                    D.nbBits   = nbBits;
+                    dt[uStart] = D;
+                    uStart += 1;
+                }
+                break;
+            case 2 :
+                for (s = 0; s < symbolCount; ++s)
+                {
+                    HUF_DEltX1 D;
+                    D.byte         = wksp->symbols[symbol + s];
+                    D.nbBits       = nbBits;
+                    dt[uStart + 0] = D;
+                    dt[uStart + 1] = D;
+                    uStart += 2;
+                }
+                break;
+            case 4 :
+                for (s = 0; s < symbolCount; ++s)
+                {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    uStart += 4;
+                }
+                break;
+            case 8 :
+                for (s = 0; s < symbolCount; ++s)
+                {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    MEM_write64(dt + uStart + 4, D4);
+                    uStart += 8;
+                }
+                break;
+            default :
+                for (s = 0; s < symbolCount; ++s)
+                {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    for (u = 0; u < length; u += 16)
+                    {
+                        MEM_write64(dt + uStart + u + 0, D4);
+                        MEM_write64(dt + uStart + u + 4, D4);
+                        MEM_write64(dt + uStart + u + 8, D4);
+                        MEM_write64(dt + uStart + u + 12, D4);
+                    }
+                    assert(u == length);
+                    uStart += length;
+                }
+                break;
+            }
+            symbol += symbolCount;
+            rankStart += symbolCount * length;
+        }
+    }
+    return iSize;
+}
+
+FORCE_INLINE_TEMPLATE BYTE HUF_decodeSymbolX1(BIT_DStream_t*    Dstream,
+                                              const HUF_DEltX1* dt,
+                                              const U32         dtLog) {
+    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const   c   = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+    #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+        do \
+        { \
+            *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); \
+        } while (0)
+
+    #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
+        do \
+        { \
+            if (MEM_64bits() || (HUF_TABLELOG_MAX <= 12)) \
+                HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
+        } while (0)
+
+    #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+        do \
+        { \
+            if (MEM_64bits()) \
+                HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
+        } while (0)
+
+HINT_INLINE size_t HUF_decodeStreamX1(BYTE*                   p,
+                                      BIT_DStream_t* const    bitDPtr,
+                                      BYTE* const             pEnd,
+                                      const HUF_DEltX1* const dt,
+                                      const U32               dtLog) {
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    if ((pEnd - p) > 3)
+    {
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - 3))
+        {
+            HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+            HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+            HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+        }
+    }
+    else
+    {
+        BIT_reloadDStream(bitDPtr);
+    }
+
+    /* [0-3] symbols remaining */
+    if (MEM_32bits())
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    return (size_t) (pEnd - pStart);
+}
+
+FORCE_INLINE_TEMPLATE size_t HUF_decompress1X1_usingDTable_internal_body(
+  void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) {
+    BYTE*                   op    = (BYTE*) dst;
+    BYTE* const             oend  = ZSTD_maybeNullPtrAdd(op, dstSize);
+    const void*             dtPtr = DTable + 1;
+    const HUF_DEltX1* const dt    = (const HUF_DEltX1*) dtPtr;
+    BIT_DStream_t           bitD;
+    DTableDesc const        dtd   = HUF_getDTableDesc(DTable);
+    U32 const               dtLog = dtd.tableLog;
+
+    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
+
+    if (!BIT_endOfDStream(&bitD))
+        return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+/* HUF_decompress4X1_usingDTable_internal_body():
+ * Conditions :
+ * @dstSize >= 6
+ */
+FORCE_INLINE_TEMPLATE size_t HUF_decompress4X1_usingDTable_internal_body(
+  void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) {
+    /* Check */
+    if (cSrcSize < 10)
+        return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+    if (dstSize < 6)
+        return ERROR(corruption_detected); /* stream 4-split doesn't work */
+
+    {
+        const BYTE* const       istart = (const BYTE*) cSrc;
+        BYTE* const             ostart = (BYTE*) dst;
+        BYTE* const             oend   = ostart + dstSize;
+        BYTE* const             olimit = oend - 3;
+        const void* const       dtPtr  = DTable + 1;
+        const HUF_DEltX1* const dt     = (const HUF_DEltX1*) dtPtr;
+
+        /* Init */
+        BIT_DStream_t     bitD1;
+        BIT_DStream_t     bitD2;
+        BIT_DStream_t     bitD3;
+        BIT_DStream_t     bitD4;
+        size_t const      length1     = MEM_readLE16(istart);
+        size_t const      length2     = MEM_readLE16(istart + 2);
+        size_t const      length3     = MEM_readLE16(istart + 4);
+        size_t const      length4     = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1     = istart + 6; /* jumpTable */
+        const BYTE* const istart2     = istart1 + length1;
+        const BYTE* const istart3     = istart2 + length2;
+        const BYTE* const istart4     = istart3 + length3;
+        const size_t      segmentSize = (dstSize + 3) / 4;
+        BYTE* const       opStart2    = ostart + segmentSize;
+        BYTE* const       opStart3    = opStart2 + segmentSize;
+        BYTE* const       opStart4    = opStart3 + segmentSize;
+        BYTE*             op1         = ostart;
+        BYTE*             op2         = opStart2;
+        BYTE*             op3         = opStart3;
+        BYTE*             op4         = opStart4;
+        DTableDesc const  dtd         = HUF_getDTableDesc(DTable);
+        U32 const         dtLog       = dtd.tableLog;
+        U32               endSignal   = 1;
+
+        if (length4 > cSrcSize)
+            return ERROR(corruption_detected); /* overflow */
+        if (opStart4 > oend)
+            return ERROR(corruption_detected); /* overflow */
+        assert(dstSize >= 6);                  /* validated above */
+        CHECK_F(BIT_initDStream(&bitD1, istart1, length1));
+        CHECK_F(BIT_initDStream(&bitD2, istart2, length2));
+        CHECK_F(BIT_initDStream(&bitD3, istart3, length3));
+        CHECK_F(BIT_initDStream(&bitD4, istart4, length4));
+
+        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
+        if ((size_t) (oend - op4) >= sizeof(size_t))
+        {
+            for (; (endSignal) & (op4 < olimit);)
+            {
+                HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+                HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+                HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+                HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+                HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+                HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+                HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+                HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+                endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+            }
+        }
+
+        /* check corruption */
+        /* note : should not be necessary : op# advance in lock step, and we control op4.
+         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
+        if (op1 > opStart2)
+            return ERROR(corruption_detected);
+        if (op2 > opStart3)
+            return ERROR(corruption_detected);
+        if (op3 > opStart4)
+            return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
+
+        /* check */
+        {
+            U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2)
+                               & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+            if (!endCheck)
+                return ERROR(corruption_detected);
+        }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+    #if HUF_NEED_BMI2_FUNCTION
+static BMI2_TARGET_ATTRIBUTE size_t HUF_decompress4X1_usingDTable_internal_bmi2(
+  void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, HUF_DTable const* DTable) {
+    return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+    #endif
+
+static size_t HUF_decompress4X1_usingDTable_internal_default(
+  void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, HUF_DTable const* DTable) {
+    return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+    #if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+HUF_ASM_DECL void
+HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
+
+    #endif
+
+static HUF_FAST_BMI2_ATTRS void
+HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) {
+    U64               bits[4];
+    BYTE const*       ip[4];
+    BYTE*             op[4];
+    U16 const* const  dtable  = (U16 const*) args->dt;
+    BYTE* const       oend    = args->oend;
+    BYTE const* const ilowest = args->ilowest;
+
+    /* Copy the arguments to local variables */
+    ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
+    ZSTD_memcpy((void*) (&ip), &args->ip, sizeof(ip));
+    ZSTD_memcpy(&op, &args->op, sizeof(op));
+
+    assert(MEM_isLittleEndian());
+    assert(!MEM_32bits());
+
+    for (;;)
+    {
+        BYTE* olimit;
+        int   stream;
+
+        /* Assert loop preconditions */
+    #ifndef NDEBUG
+        for (stream = 0; stream < 4; ++stream)
+        {
+            assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
+            assert(ip[stream] >= ilowest);
+        }
+    #endif
+        /* Compute olimit */
+        {
+            /* Each iteration produces 5 output symbols per stream */
+            size_t const oiters = (size_t) (oend - op[3]) / 5;
+            /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
+             * per stream.
+             */
+            size_t const iiters = (size_t) (ip[0] - ilowest) / 7;
+            /* We can safely run iters iterations before running bounds checks */
+            size_t const iters   = MIN(oiters, iiters);
+            size_t const symbols = iters * 5;
+
+            /* We can simply check that op[3] < olimit, instead of checking all
+             * of our bounds, since we can't hit the other bounds until we've run
+             * iters iterations, which only happens when op[3] == olimit.
+             */
+            olimit = op[3] + symbols;
+
+            /* Exit fast decoding loop once we reach the end. */
+            if (op[3] == olimit)
+                break;
+
+            /* Exit the decoding loop if any input pointer has crossed the
+             * previous one. This indicates corruption, and a precondition
+             * to our loop is that ip[i] >= ip[0].
+             */
+            for (stream = 1; stream < 4; ++stream)
+            {
+                if (ip[stream] < ip[stream - 1])
+                    goto _out;
+            }
+        }
+
+    #ifndef NDEBUG
+        for (stream = 1; stream < 4; ++stream)
+        {
+            assert(ip[stream] >= ip[stream - 1]);
+        }
+    #endif
+
+    #define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \
+        do \
+        { \
+            int const index = (int) (bits[(_stream)] >> 53); \
+            int const entry = (int) dtable[index]; \
+            bits[(_stream)] <<= (entry & 0x3F); \
+            op[(_stream)][(_symbol)] = (BYTE) ((entry >> 8) & 0xFF); \
+        } while (0)
+
+    #define HUF_4X1_RELOAD_STREAM(_stream) \
+        do \
+        { \
+            int const ctz     = ZSTD_countTrailingZeros64(bits[(_stream)]); \
+            int const nbBits  = ctz & 7; \
+            int const nbBytes = ctz >> 3; \
+            op[(_stream)] += 5; \
+            ip[(_stream)] -= nbBytes; \
+            bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
+            bits[(_stream)] <<= nbBits; \
+        } while (0)
+
+        /* Manually unroll the loop because compilers don't consistently
+         * unroll the inner loops, which destroys performance.
+         */
+        do
+        {
+            /* Decode 5 symbols in each of the 4 streams */
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
+
+            /* Reload each of the 4 the bitstreams */
+            HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
+        } while (op[3] < olimit);
+
+    #undef HUF_4X1_DECODE_SYMBOL
+    #undef HUF_4X1_RELOAD_STREAM
+    }
+
+_out:
+
+    /* Save the final values of each of the state variables back to args. */
+    ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
+    ZSTD_memcpy((void*) (&args->ip), &ip, sizeof(ip));
+    ZSTD_memcpy(&args->op, &op, sizeof(op));
+}
+
+/**
+ * @returns @p dstSize on success (>= 6)
+ *          0 if the fallback implementation should be used
+ *          An error if an error occurred
+ */
+static HUF_FAST_BMI2_ATTRS size_t
+HUF_decompress4X1_usingDTable_internal_fast(void*                    dst,
+                                            size_t                   dstSize,
+                                            const void*              cSrc,
+                                            size_t                   cSrcSize,
+                                            const HUF_DTable*        DTable,
+                                            HUF_DecompressFastLoopFn loopFn) {
+    void const*            dt      = DTable + 1;
+    BYTE const* const      ilowest = (BYTE const*) cSrc;
+    BYTE* const            oend    = ZSTD_maybeNullPtrAdd((BYTE*) dst, dstSize);
+    HUF_DecompressFastArgs args;
+    {
+        size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+        FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
+        if (ret == 0)
+            return 0;
+    }
+
+    assert(args.ip[0] >= args.ilowest);
+    loopFn(&args);
+
+    /* Our loop guarantees that ip[] >= ilowest and that we haven't
+    * overwritten any op[].
+    */
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[1] >= ilowest);
+    assert(args.ip[2] >= ilowest);
+    assert(args.ip[3] >= ilowest);
+    assert(args.op[3] <= oend);
+
+    assert(ilowest == args.ilowest);
+    assert(ilowest + 6 == args.iend[0]);
+    (void) ilowest;
+
+    /* finish bit streams one by one. */
+    {
+        size_t const segmentSize = (dstSize + 3) / 4;
+        BYTE*        segmentEnd  = (BYTE*) dst;
+        int          i;
+        for (i = 0; i < 4; ++i)
+        {
+            BIT_DStream_t bit;
+            if (segmentSize <= (size_t) (oend - segmentEnd))
+                segmentEnd += segmentSize;
+            else
+                segmentEnd = oend;
+            FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
+            /* Decompress and validate that we've produced exactly the expected length. */
+            args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*) dt,
+                                             HUF_DECODER_FAST_TABLELOG);
+            if (args.op[i] != segmentEnd)
+                return ERROR(corruption_detected);
+        }
+    }
+
+    /* decoded size */
+    assert(dstSize != 0);
+    return dstSize;
+}
+
+HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+
+static size_t HUF_decompress4X1_usingDTable_internal(void*             dst,
+                                                     size_t            dstSize,
+                                                     void const*       cSrc,
+                                                     size_t            cSrcSize,
+                                                     HUF_DTable const* DTable,
+                                                     int               flags) {
+    HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
+    HUF_DecompressFastLoopFn    loopFn     = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
+
+    #if DYNAMIC_BMI2
+    if (flags & HUF_flags_bmi2)
+    {
+        fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
+        #if ZSTD_ENABLE_ASM_X86_64_BMI2
+        if (!(flags & HUF_flags_disableAsm))
+        {
+            loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
+        }
+        #endif
+    }
+    else
+    {
+        return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
+    }
+    #endif
+
+    #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
+    if (!(flags & HUF_flags_disableAsm))
+    {
+        loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
+    }
+    #endif
+
+    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast))
+    {
+        size_t const ret =
+          HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
+        if (ret != 0)
+            return ret;
+    }
+    return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx,
+                                          void*       dst,
+                                          size_t      dstSize,
+                                          const void* cSrc,
+                                          size_t      cSrcSize,
+                                          void*       workSpace,
+                                          size_t      wkspSize,
+                                          int         flags) {
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
+    if (HUF_isError(hSize))
+        return hSize;
+    if (hSize >= cSrcSize)
+        return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
+}
+
+#endif /* HUF_FORCE_DECOMPRESS_X2 */
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+typedef struct {
+    U16  sequence;
+    BYTE nbBits;
+    BYTE length;
+} HUF_DEltX2; /* double-symbols decoding */
+typedef struct {
+    BYTE symbol;
+} sortedSymbol_t;
+typedef U32          rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
+
+/**
+ * Constructs a HUF_DEltX2 in a U32.
+ */
+static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level) {
+    U32 seq;
+    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
+    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
+    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
+    if (MEM_isLittleEndian())
+    {
+        seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
+        return seq + (nbBits << 16) + ((U32) level << 24);
+    }
+    else
+    {
+        seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
+        return (seq << 16) + (nbBits << 8) + (U32) level;
+    }
+}
+
+/**
+ * Constructs a HUF_DEltX2.
+ */
+static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level) {
+    HUF_DEltX2 DElt;
+    U32 const  val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
+    DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
+    ZSTD_memcpy(&DElt, &val, sizeof(val));
+    return DElt;
+}
+
+/**
+ * Constructs 2 HUF_DEltX2s and packs them into a U64.
+ */
+static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level) {
+    U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
+    return (U64) DElt + ((U64) DElt << 32);
+}
+
+/**
+ * Fills the DTable rank with all the symbols from [begin, end) that are each
+ * nbBits long.
+ *
+ * @param DTableRank The start of the rank in the DTable.
+ * @param begin The first symbol to fill (inclusive).
+ * @param end The last symbol to fill (exclusive).
+ * @param nbBits Each symbol is nbBits long.
+ * @param tableLog The table log.
+ * @param baseSeq If level == 1 { 0 } else { the first level symbol }
+ * @param level The level in the table. Must be 1 or 2.
+ */
+static void HUF_fillDTableX2ForWeight(HUF_DEltX2*           DTableRank,
+                                      sortedSymbol_t const* begin,
+                                      sortedSymbol_t const* end,
+                                      U32                   nbBits,
+                                      U32                   tableLog,
+                                      U16                   baseSeq,
+                                      int const             level) {
+    U32 const             length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
+    const sortedSymbol_t* ptr;
+    assert(level >= 1 && level <= 2);
+    switch (length)
+    {
+    case 1 :
+        for (ptr = begin; ptr != end; ++ptr)
+        {
+            HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
+            *DTableRank++         = DElt;
+        }
+        break;
+    case 2 :
+        for (ptr = begin; ptr != end; ++ptr)
+        {
+            HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
+            DTableRank[0]         = DElt;
+            DTableRank[1]         = DElt;
+            DTableRank += 2;
+        }
+        break;
+    case 4 :
+        for (ptr = begin; ptr != end; ++ptr)
+        {
+            U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+            ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+            DTableRank += 4;
+        }
+        break;
+    case 8 :
+        for (ptr = begin; ptr != end; ++ptr)
+        {
+            U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+            ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
+            DTableRank += 8;
+        }
+        break;
+    default :
+        for (ptr = begin; ptr != end; ++ptr)
+        {
+            U64 const         DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+            HUF_DEltX2* const DTableRankEnd = DTableRank + length;
+            for (; DTableRank != DTableRankEnd; DTableRank += 8)
+            {
+                ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
+            }
+        }
+        break;
+    }
+}
+
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2*           DTable,
+                                   U32                   targetLog,
+                                   const U32             consumedBits,
+                                   const U32*            rankVal,
+                                   const int             minWeight,
+                                   const int             maxWeight1,
+                                   const sortedSymbol_t* sortedSymbols,
+                                   U32 const*            rankStart,
+                                   U32                   nbBitsBaseline,
+                                   U16                   baseSeq) {
+    /* Fill skipped values (all positions up to rankVal[minWeight]).
+     * These are positions only get a single symbol because the combined weight
+     * is too large.
+     */
+    if (minWeight > 1)
+    {
+        U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
+        U64 const DEltX2 =
+          HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
+        int const skipSize = rankVal[minWeight];
+        assert(length > 1);
+        assert((U32) skipSize < length);
+        switch (length)
+        {
+        case 2 :
+            assert(skipSize == 1);
+            ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
+            break;
+        case 4 :
+            assert(skipSize <= 4);
+            ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
+            break;
+        default : {
+            int i;
+            for (i = 0; i < skipSize; i += 8)
+            {
+                ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
+            }
+        }
+        }
+    }
+
+    /* Fill each of the second level symbols by weight. */
+    {
+        int w;
+        for (w = minWeight; w < maxWeight1; ++w)
+        {
+            int const begin     = rankStart[w];
+            int const end       = rankStart[w + 1];
+            U32 const nbBits    = nbBitsBaseline - w;
+            U32 const totalBits = nbBits + consumedBits;
+            HUF_fillDTableX2ForWeight(DTable + rankVal[w], sortedSymbols + begin,
+                                      sortedSymbols + end, totalBits, targetLog, baseSeq,
+                                      /* level */ 2);
+        }
+    }
+}
+
+static void HUF_fillDTableX2(HUF_DEltX2*           DTable,
+                             const U32             targetLog,
+                             const sortedSymbol_t* sortedList,
+                             const U32*            rankStart,
+                             rankValCol_t*         rankValOrigin,
+                             const U32             maxWeight,
+                             const U32             nbBitsBaseline) {
+    U32* const rankVal = rankValOrigin[0];
+    const int  scaleLog =
+      nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits = nbBitsBaseline - maxWeight;
+    int       w;
+    int const wEnd = (int) maxWeight + 1;
+
+    /* Fill DTable in order of weight. */
+    for (w = 1; w < wEnd; ++w)
+    {
+        int const begin  = (int) rankStart[w];
+        int const end    = (int) rankStart[w + 1];
+        U32 const nbBits = nbBitsBaseline - w;
+
+        if (targetLog - nbBits >= minBits)
+        {
+            /* Enough room for a second symbol. */
+            int       start     = rankVal[w];
+            U32 const length    = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
+            int       minWeight = nbBits + scaleLog;
+            int       s;
+            if (minWeight < 1)
+                minWeight = 1;
+            /* Fill the DTable for every symbol of weight w.
+             * These symbols get at least 1 second symbol.
+             */
+            for (s = begin; s != end; ++s)
+            {
+                HUF_fillDTableX2Level2(DTable + start, targetLog, nbBits, rankValOrigin[nbBits],
+                                       minWeight, wEnd, sortedList, rankStart, nbBitsBaseline,
+                                       sortedList[s].symbol);
+                start += length;
+            }
+        }
+        else
+        {
+            /* Only a single symbol. */
+            HUF_fillDTableX2ForWeight(DTable + rankVal[w], sortedList + begin, sortedList + end,
+                                      nbBits, targetLog,
+                                      /* baseSeq */ 0, /* level */ 1);
+        }
+    }
+}
+
+typedef struct {
+    rankValCol_t   rankVal[HUF_TABLELOG_MAX];
+    U32            rankStats[HUF_TABLELOG_MAX + 1];
+    U32            rankStart0[HUF_TABLELOG_MAX + 3];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE           weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32            calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+                             const void* src,
+                             size_t      srcSize,
+                             void*       workSpace,
+                             size_t      wkspSize,
+                             int         flags) {
+    U32               tableLog, maxW, nbSymbols;
+    DTableDesc        dtd         = HUF_getDTableDesc(DTable);
+    U32               maxTableLog = dtd.maxTableLog;
+    size_t            iSize;
+    void*             dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
+    HUF_DEltX2* const dt    = (HUF_DEltX2*) dtPtr;
+    U32*              rankStart;
+
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*) workSpace;
+
+    if (sizeof(*wksp) > wkspSize)
+        return ERROR(GENERIC);
+
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
+
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2)
+                        == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX)
+        return ERROR(tableLog_tooLarge);
+    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats,
+                               &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp,
+                               sizeof(wksp->calleeWksp), flags);
+    if (HUF_isError(iSize))
+        return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog)
+        return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
+    if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG)
+        maxTableLog = HUF_DECODER_FAST_TABLELOG;
+
+    /* find maxWeight */
+    for (maxW = tableLog; wksp->rankStats[maxW] == 0; maxW--)
+    {} /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {
+        U32 w, nextRankStart = 0;
+        for (w = 1; w < maxW + 1; w++)
+        {
+            U32 curr = nextRankStart;
+            nextRankStart += wksp->rankStats[w];
+            rankStart[w] = curr;
+        }
+        rankStart[0]        = nextRankStart; /* put all 0w symbols at the end of sorted list*/
+        rankStart[maxW + 1] = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {
+        U32 s;
+        for (s = 0; s < nbSymbols; s++)
+        {
+            U32 const w                  = wksp->weightList[s];
+            U32 const r                  = rankStart[w]++;
+            wksp->sortedSymbol[r].symbol = (BYTE) s;
+        }
+        rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {
+        U32* const rankVal0 = wksp->rankVal[0];
+        {
+            int const rescale     = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
+            U32       nextRankVal = 0;
+            U32       w;
+            for (w = 1; w < maxW + 1; w++)
+            {
+                U32 curr = nextRankVal;
+                nextRankVal += wksp->rankStats[w] << (w + rescale);
+                rankVal0[w] = curr;
+            }
+        }
+        {
+            U32 const minBits = tableLog + 1 - maxW;
+            U32       consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++)
+            {
+                U32* const rankValPtr = wksp->rankVal[consumed];
+                U32        w;
+                for (w = 1; w < maxW + 1; w++)
+                {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+                }
+            }
+        }
+    }
+
+    HUF_fillDTableX2(dt, maxTableLog, wksp->sortedSymbol, wksp->rankStart0, wksp->rankVal, maxW,
+                     tableLog + 1);
+
+    dtd.tableLog  = (BYTE) maxTableLog;
+    dtd.tableType = 1;
+    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+
+FORCE_INLINE_TEMPLATE U32 HUF_decodeSymbolX2(void*             op,
+                                             BIT_DStream_t*    DStream,
+                                             const HUF_DEltX2* dt,
+                                             const U32         dtLog) {
+    size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, &dt[val].sequence, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32 HUF_decodeLastSymbolX2(void*             op,
+                                                 BIT_DStream_t*    DStream,
+                                                 const HUF_DEltX2* dt,
+                                                 const U32         dtLog) {
+    size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, &dt[val].sequence, 1);
+    if (dt[val].length == 1)
+    {
+        BIT_skipBits(DStream, dt[val].nbBits);
+    }
+    else
+    {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8))
+        {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
+        }
+    }
+    return 1;
+}
+
+    #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+        do \
+        { \
+            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+        } while (0)
+
+    #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+        do \
+        { \
+            if (MEM_64bits() || (HUF_TABLELOG_MAX <= 12)) \
+                ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+        } while (0)
+
+    #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+        do \
+        { \
+            if (MEM_64bits()) \
+                ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+        } while (0)
+
+HINT_INLINE size_t HUF_decodeStreamX2(
+  BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) {
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    if ((size_t) (pEnd - p) >= sizeof(bitDPtr->bitContainer))
+    {
+        if (dtLog <= 11 && MEM_64bits())
+        {
+            /* up to 10 symbols at a time */
+            while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - 9))
+            {
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+            }
+        }
+        else
+        {
+            /* up to 8 symbols at a time */
+            while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished)
+                   & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1)))
+            {
+                HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+            }
+        }
+    }
+    else
+    {
+        BIT_reloadDStream(bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    if ((size_t) (pEnd - p) >= 2)
+    {
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
+            HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+        while (p <= pEnd - 2)
+            HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
+    }
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
+
+    return p - pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t HUF_decompress1X2_usingDTable_internal_body(
+  void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) {
+    BIT_DStream_t bitD;
+
+    /* Init */
+    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+    /* decode */
+    {
+        BYTE* const             ostart = (BYTE*) dst;
+        BYTE* const             oend   = ZSTD_maybeNullPtrAdd(ostart, dstSize);
+        const void* const       dtPtr  = DTable + 1; /* force compiler to not use strict-aliasing */
+        const HUF_DEltX2* const dt     = (const HUF_DEltX2*) dtPtr;
+        DTableDesc const        dtd    = HUF_getDTableDesc(DTable);
+        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD))
+        return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+/* HUF_decompress4X2_usingDTable_internal_body():
+ * Conditions:
+ * @dstSize >= 6
+ */
+FORCE_INLINE_TEMPLATE size_t HUF_decompress4X2_usingDTable_internal_body(
+  void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable) {
+    if (cSrcSize < 10)
+        return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+    if (dstSize < 6)
+        return ERROR(corruption_detected); /* stream 4-split doesn't work */
+
+    {
+        const BYTE* const       istart = (const BYTE*) cSrc;
+        BYTE* const             ostart = (BYTE*) dst;
+        BYTE* const             oend   = ostart + dstSize;
+        BYTE* const             olimit = oend - (sizeof(size_t) - 1);
+        const void* const       dtPtr  = DTable + 1;
+        const HUF_DEltX2* const dt     = (const HUF_DEltX2*) dtPtr;
+
+        /* Init */
+        BIT_DStream_t     bitD1;
+        BIT_DStream_t     bitD2;
+        BIT_DStream_t     bitD3;
+        BIT_DStream_t     bitD4;
+        size_t const      length1     = MEM_readLE16(istart);
+        size_t const      length2     = MEM_readLE16(istart + 2);
+        size_t const      length3     = MEM_readLE16(istart + 4);
+        size_t const      length4     = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1     = istart + 6; /* jumpTable */
+        const BYTE* const istart2     = istart1 + length1;
+        const BYTE* const istart3     = istart2 + length2;
+        const BYTE* const istart4     = istart3 + length3;
+        size_t const      segmentSize = (dstSize + 3) / 4;
+        BYTE* const       opStart2    = ostart + segmentSize;
+        BYTE* const       opStart3    = opStart2 + segmentSize;
+        BYTE* const       opStart4    = opStart3 + segmentSize;
+        BYTE*             op1         = ostart;
+        BYTE*             op2         = opStart2;
+        BYTE*             op3         = opStart3;
+        BYTE*             op4         = opStart4;
+        U32               endSignal   = 1;
+        DTableDesc const  dtd         = HUF_getDTableDesc(DTable);
+        U32 const         dtLog       = dtd.tableLog;
+
+        if (length4 > cSrcSize)
+            return ERROR(corruption_detected); /* overflow */
+        if (opStart4 > oend)
+            return ERROR(corruption_detected); /* overflow */
+        assert(dstSize >= 6 /* validated above */);
+        CHECK_F(BIT_initDStream(&bitD1, istart1, length1));
+        CHECK_F(BIT_initDStream(&bitD2, istart2, length2));
+        CHECK_F(BIT_initDStream(&bitD3, istart3, length3));
+        CHECK_F(BIT_initDStream(&bitD4, istart4, length4));
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        if ((size_t) (oend - op4) >= sizeof(size_t))
+        {
+            for (; (endSignal) & (op4 < olimit);)
+            {
+    #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+                endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+                endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+    #else
+                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+                endSignal =
+                  (U32) LIKELY((U32) (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                               & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                               & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                               & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+    #endif
+            }
+        }
+
+        /* check corruption */
+        if (op1 > opStart2)
+            return ERROR(corruption_detected);
+        if (op2 > opStart3)
+            return ERROR(corruption_detected);
+        if (op3 > opStart4)
+            return ERROR(corruption_detected);
+        /* note : op4 already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
+
+        /* check */
+        {
+            U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2)
+                               & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+            if (!endCheck)
+                return ERROR(corruption_detected);
+        }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+    #if HUF_NEED_BMI2_FUNCTION
+static BMI2_TARGET_ATTRIBUTE size_t HUF_decompress4X2_usingDTable_internal_bmi2(
+  void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, HUF_DTable const* DTable) {
+    return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+    #endif
+
+static size_t HUF_decompress4X2_usingDTable_internal_default(
+  void* dst, size_t dstSize, void const* cSrc, size_t cSrcSize, HUF_DTable const* DTable) {
+    return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+    #if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+HUF_ASM_DECL void
+HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
+
+    #endif
+
+static HUF_FAST_BMI2_ATTRS void
+HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) {
+    U64                     bits[4];
+    BYTE const*             ip[4];
+    BYTE*                   op[4];
+    BYTE*                   oend[4];
+    HUF_DEltX2 const* const dtable  = (HUF_DEltX2 const*) args->dt;
+    BYTE const* const       ilowest = args->ilowest;
+
+    /* Copy the arguments to local registers. */
+    ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
+    ZSTD_memcpy((void*) (&ip), &args->ip, sizeof(ip));
+    ZSTD_memcpy(&op, &args->op, sizeof(op));
+
+    oend[0] = op[1];
+    oend[1] = op[2];
+    oend[2] = op[3];
+    oend[3] = args->oend;
+
+    assert(MEM_isLittleEndian());
+    assert(!MEM_32bits());
+
+    for (;;)
+    {
+        BYTE* olimit;
+        int   stream;
+
+        /* Assert loop preconditions */
+    #ifndef NDEBUG
+        for (stream = 0; stream < 4; ++stream)
+        {
+            assert(op[stream] <= oend[stream]);
+            assert(ip[stream] >= ilowest);
+        }
+    #endif
+        /* Compute olimit */
+        {
+            /* Each loop does 5 table lookups for each of the 4 streams.
+             * Each table lookup consumes up to 11 bits of input, and produces
+             * up to 2 bytes of output.
+             */
+            /* We can consume up to 7 bytes of input per iteration per stream.
+             * We also know that each input pointer is >= ip[0]. So we can run
+             * iters loops before running out of input.
+             */
+            size_t iters = (size_t) (ip[0] - ilowest) / 7;
+            /* Each iteration can produce up to 10 bytes of output per stream.
+             * Each output stream my advance at different rates. So take the
+             * minimum number of safe iterations among all the output streams.
+             */
+            for (stream = 0; stream < 4; ++stream)
+            {
+                size_t const oiters = (size_t) (oend[stream] - op[stream]) / 10;
+                iters               = MIN(iters, oiters);
+            }
+
+            /* Each iteration produces at least 5 output symbols. So until
+             * op[3] crosses olimit, we know we haven't executed iters
+             * iterations yet. This saves us maintaining an iters counter,
+             * at the expense of computing the remaining # of iterations
+             * more frequently.
+             */
+            olimit = op[3] + (iters * 5);
+
+            /* Exit the fast decoding loop once we reach the end. */
+            if (op[3] == olimit)
+                break;
+
+            /* Exit the decoding loop if any input pointer has crossed the
+             * previous one. This indicates corruption, and a precondition
+             * to our loop is that ip[i] >= ip[0].
+             */
+            for (stream = 1; stream < 4; ++stream)
+            {
+                if (ip[stream] < ip[stream - 1])
+                    goto _out;
+            }
+        }
+
+    #ifndef NDEBUG
+        for (stream = 1; stream < 4; ++stream)
+        {
+            assert(ip[stream] >= ip[stream - 1]);
+        }
+    #endif
+
+    #define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \
+        do \
+        { \
+            if ((_decode3) || (_stream) != 3) \
+            { \
+                int const        index = (int) (bits[(_stream)] >> 53); \
+                HUF_DEltX2 const entry = dtable[index]; \
+                MEM_write16(op[(_stream)], entry.sequence); \
+                bits[(_stream)] <<= (entry.nbBits) & 0x3F; \
+                op[(_stream)] += (entry.length); \
+            } \
+        } while (0)
+
+    #define HUF_4X2_RELOAD_STREAM(_stream) \
+        do \
+        { \
+            HUF_4X2_DECODE_SYMBOL(3, 1); \
+            { \
+                int const ctz     = ZSTD_countTrailingZeros64(bits[(_stream)]); \
+                int const nbBits  = ctz & 7; \
+                int const nbBytes = ctz >> 3; \
+                ip[(_stream)] -= nbBytes; \
+                bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \
+                bits[(_stream)] <<= nbBits; \
+            } \
+        } while (0)
+
+        /* Manually unroll the loop because compilers don't consistently
+         * unroll the inner loops, which destroys performance.
+         */
+        do
+        {
+            /* Decode 5 symbols from each of the first 3 streams.
+             * The final stream will be decoded during the reload phase
+             * to reduce register pressure.
+             */
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+
+            /* Decode one symbol from the final stream */
+            HUF_4X2_DECODE_SYMBOL(3, 1);
+
+            /* Decode 4 symbols from the final stream & reload bitstreams.
+             * The final stream is reloaded last, meaning that all 5 symbols
+             * are decoded from the final stream before it is reloaded.
+             */
+            HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
+        } while (op[3] < olimit);
+    }
+
+    #undef HUF_4X2_DECODE_SYMBOL
+    #undef HUF_4X2_RELOAD_STREAM
+
+_out:
+
+    /* Save the final values of each of the state variables back to args. */
+    ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
+    ZSTD_memcpy((void*) (&args->ip), &ip, sizeof(ip));
+    ZSTD_memcpy(&args->op, &op, sizeof(op));
+}
+
+
+static HUF_FAST_BMI2_ATTRS size_t
+HUF_decompress4X2_usingDTable_internal_fast(void*                    dst,
+                                            size_t                   dstSize,
+                                            const void*              cSrc,
+                                            size_t                   cSrcSize,
+                                            const HUF_DTable*        DTable,
+                                            HUF_DecompressFastLoopFn loopFn) {
+    void const*            dt      = DTable + 1;
+    const BYTE* const      ilowest = (const BYTE*) cSrc;
+    BYTE* const            oend    = ZSTD_maybeNullPtrAdd((BYTE*) dst, dstSize);
+    HUF_DecompressFastArgs args;
+    {
+        size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+        FORWARD_IF_ERROR(ret, "Failed to init asm args");
+        if (ret == 0)
+            return 0;
+    }
+
+    assert(args.ip[0] >= args.ilowest);
+    loopFn(&args);
+
+    /* note : op4 already verified within main loop */
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[1] >= ilowest);
+    assert(args.ip[2] >= ilowest);
+    assert(args.ip[3] >= ilowest);
+    assert(args.op[3] <= oend);
+
+    assert(ilowest == args.ilowest);
+    assert(ilowest + 6 == args.iend[0]);
+    (void) ilowest;
+
+    /* finish bitStreams one by one */
+    {
+        size_t const segmentSize = (dstSize + 3) / 4;
+        BYTE*        segmentEnd  = (BYTE*) dst;
+        int          i;
+        for (i = 0; i < 4; ++i)
+        {
+            BIT_DStream_t bit;
+            if (segmentSize <= (size_t) (oend - segmentEnd))
+                segmentEnd += segmentSize;
+            else
+                segmentEnd = oend;
+            FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
+            args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*) dt,
+                                             HUF_DECODER_FAST_TABLELOG);
+            if (args.op[i] != segmentEnd)
+                return ERROR(corruption_detected);
+        }
+    }
+
+    /* decoded size */
+    return dstSize;
+}
+
+static size_t HUF_decompress4X2_usingDTable_internal(void*             dst,
+                                                     size_t            dstSize,
+                                                     void const*       cSrc,
+                                                     size_t            cSrcSize,
+                                                     HUF_DTable const* DTable,
+                                                     int               flags) {
+    HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
+    HUF_DecompressFastLoopFn    loopFn     = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
+
+    #if DYNAMIC_BMI2
+    if (flags & HUF_flags_bmi2)
+    {
+        fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
+        #if ZSTD_ENABLE_ASM_X86_64_BMI2
+        if (!(flags & HUF_flags_disableAsm))
+        {
+            loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
+        }
+        #endif
+    }
+    else
+    {
+        return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
+    }
+    #endif
+
+    #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
+    if (!(flags & HUF_flags_disableAsm))
+    {
+        loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
+    }
+    #endif
+
+    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast))
+    {
+        size_t const ret =
+          HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
+        if (ret != 0)
+            return ret;
+    }
+    return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+
+HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx,
+                                   void*       dst,
+                                   size_t      dstSize,
+                                   const void* cSrc,
+                                   size_t      cSrcSize,
+                                   void*       workSpace,
+                                   size_t      wkspSize,
+                                   int         flags) {
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize, flags);
+    if (HUF_isError(hSize))
+        return hSize;
+    if (hSize >= cSrcSize)
+        return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx,
+                                          void*       dst,
+                                          size_t      dstSize,
+                                          const void* cSrc,
+                                          size_t      cSrcSize,
+                                          void*       workSpace,
+                                          size_t      wkspSize,
+                                          int         flags) {
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
+    if (HUF_isError(hSize))
+        return hSize;
+    if (hSize >= cSrcSize)
+        return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
+}
+
+#endif /* HUF_FORCE_DECOMPRESS_X1 */
+
+
+/* ***********************************/
+/* Universal decompression selectors */
+/* ***********************************/
+
+
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+typedef struct {
+    U32 tableTime;
+    U32 decode256Time;
+} algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] = {
+  /* single, double, quad */
+  {{0, 0}, {1, 1}},           /* Q==0 : impossible */
+  {{0, 0}, {1, 1}},           /* Q==1 : impossible */
+  {{150, 216}, {381, 119}},   /* Q == 2 : 12-18% */
+  {{170, 205}, {514, 112}},   /* Q == 3 : 18-25% */
+  {{177, 199}, {539, 110}},   /* Q == 4 : 25-32% */
+  {{197, 194}, {644, 107}},   /* Q == 5 : 32-38% */
+  {{221, 192}, {735, 107}},   /* Q == 6 : 38-44% */
+  {{256, 189}, {881, 106}},   /* Q == 7 : 44-50% */
+  {{359, 188}, {1167, 109}},  /* Q == 8 : 50-56% */
+  {{582, 187}, {1570, 114}},  /* Q == 9 : 56-62% */
+  {{688, 187}, {1712, 122}},  /* Q ==10 : 62-69% */
+  {{825, 186}, {1965, 136}},  /* Q ==11 : 69-75% */
+  {{976, 185}, {2131, 150}},  /* Q ==12 : 75-81% */
+  {{1180, 186}, {2070, 175}}, /* Q ==13 : 81-87% */
+  {{1377, 185}, {1731, 202}}, /* Q ==14 : 87-93% */
+  {{1412, 185}, {1695, 202}}, /* Q ==15 : 93-99% */
+};
+#endif
+
+/** HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize) {
+    assert(dstSize > 0);
+    assert(dstSize <= 128 * 1024);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void) dstSize;
+    (void) cSrcSize;
+    return 0;
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void) dstSize;
+    (void) cSrcSize;
+    return 1;
+#else
+    /* decoder timing evaluation */
+    {
+        U32 const Q    = (cSrcSize >= dstSize) ? 15 : (U32) (cSrcSize * 16 / dstSize); /* Q < 16 */
+        U32 const D256 = (U32) (dstSize >> 8);
+        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+        U32       DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+        DTime1 += DTime1
+               >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
+        return DTime1 < DTime0;
+    }
+#endif
+}
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx,
+                                  void*       dst,
+                                  size_t      dstSize,
+                                  const void* cSrc,
+                                  size_t      cSrcSize,
+                                  void*       workSpace,
+                                  size_t      wkspSize,
+                                  int         flags) {
+    /* validation checks */
+    if (dstSize == 0)
+        return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize)
+        return ERROR(corruption_detected); /* invalid */
+    if (cSrcSize == dstSize)
+    {
+        ZSTD_memcpy(dst, cSrc, dstSize);
+        return dstSize;
+    } /* not compressed */
+    if (cSrcSize == 1)
+    {
+        ZSTD_memset(dst, *(const BYTE*) cSrc, dstSize);
+        return dstSize;
+    } /* RLE */
+
+    {
+        U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void) algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize,
+                                           flags);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void) algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize,
+                                           flags);
+#else
+        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace,
+                                                    wkspSize, flags)
+                      : HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace,
+                                                    wkspSize, flags);
+#endif
+    }
+}
+
+
+size_t HUF_decompress1X_usingDTable(void*             dst,
+                                    size_t            maxDstSize,
+                                    const void*       cSrc,
+                                    size_t            cSrcSize,
+                                    const HUF_DTable* DTable,
+                                    int               flags) {
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void) dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void) dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+#else
+    return dtd.tableType
+           ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags)
+           : HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+#endif
+}
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx,
+                                   void*       dst,
+                                   size_t      dstSize,
+                                   const void* cSrc,
+                                   size_t      cSrcSize,
+                                   void*       workSpace,
+                                   size_t      wkspSize,
+                                   int         flags) {
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags);
+    if (HUF_isError(hSize))
+        return hSize;
+    if (hSize >= cSrcSize)
+        return ERROR(srcSize_wrong);
+    ip += hSize;
+    cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags);
+}
+#endif
+
+size_t HUF_decompress4X_usingDTable(void*             dst,
+                                    size_t            maxDstSize,
+                                    const void*       cSrc,
+                                    size_t            cSrcSize,
+                                    const HUF_DTable* DTable,
+                                    int               flags) {
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void) dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void) dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+#else
+    return dtd.tableType
+           ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags)
+           : HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags);
+#endif
+}
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx,
+                                     void*       dst,
+                                     size_t      dstSize,
+                                     const void* cSrc,
+                                     size_t      cSrcSize,
+                                     void*       workSpace,
+                                     size_t      wkspSize,
+                                     int         flags) {
+    /* validation checks */
+    if (dstSize == 0)
+        return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0)
+        return ERROR(corruption_detected);
+
+    {
+        U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void) algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize,
+                                           flags);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void) algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize,
+                                           flags);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace,
+                                                    wkspSize, flags)
+                      : HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace,
+                                                    wkspSize, flags);
+#endif
+    }
+}
diff --git a/src/external/decompress/huf_decompress_amd64.S b/src/external/decompress/huf_decompress_amd64.S
new file mode 100644
index 00000000..78da291e
--- /dev/null
+++ b/src/external/decompress/huf_decompress_amd64.S
@@ -0,0 +1,595 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "../common/portability_macros.h"
+
+#if defined(__ELF__) && defined(__GNUC__)
+/* Stack marking
+ * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
+ */
+.section .note.GNU-stack,"",%progbits
+
+#if defined(__aarch64__)
+/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64.
+ * See: https://github.com/facebook/zstd/issues/3841
+ * See: https://gcc.godbolt.org/z/sqr5T4ffK
+ * See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/
+ * See: https://reviews.llvm.org/D62609
+ */
+.pushsection .note.gnu.property, "a"
+.p2align 3
+.long 4                 /* size of the name - "GNU\0" */
+.long 0x10              /* size of descriptor */
+.long 0x5               /* NT_GNU_PROPERTY_TYPE_0 */
+.asciz "GNU"
+.long 0xc0000000        /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+.long 4                 /* pr_datasz - 4 bytes */
+.long 3                 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
+.p2align 3              /* pr_padding - bring everything to 8 byte alignment */
+.popsection
+#endif
+
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+/* Calling convention:
+ *
+ * %rdi contains the first argument: HUF_DecompressAsmArgs*.
+ * %rbp isn't maintained (no frame pointer).
+ * %rsp contains the stack pointer that grows down.
+ *      No red-zone is assumed, only addresses >= %rsp are used.
+ * All register contents are preserved.
+ *
+ * TODO: Support Windows calling convention.
+ */
+
+ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
+ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
+ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
+ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
+.global HUF_decompress4X1_usingDTable_internal_fast_asm_loop
+.global HUF_decompress4X2_usingDTable_internal_fast_asm_loop
+.global _HUF_decompress4X1_usingDTable_internal_fast_asm_loop
+.global _HUF_decompress4X2_usingDTable_internal_fast_asm_loop
+.text
+
+/* Sets up register mappings for clarity.
+ * op[], bits[], dtable & ip[0] each get their own register.
+ * ip[1,2,3] & olimit alias var[].
+ * %rax is a scratch register.
+ */
+
+#define op0    rsi
+#define op1    rbx
+#define op2    rcx
+#define op3    rdi
+
+#define ip0    r8
+#define ip1    r9
+#define ip2    r10
+#define ip3    r11
+
+#define bits0  rbp
+#define bits1  rdx
+#define bits2  r12
+#define bits3  r13
+#define dtable r14
+#define olimit r15
+
+/* var[] aliases ip[1,2,3] & olimit
+ * ip[1,2,3] are saved every iteration.
+ * olimit is only used in compute_olimit.
+ */
+#define var0   r15
+#define var1   r9
+#define var2   r10
+#define var3   r11
+
+/* 32-bit var registers */
+#define vard0  r15d
+#define vard1  r9d
+#define vard2  r10d
+#define vard3  r11d
+
+/* Calls X(N) for each stream 0, 1, 2, 3. */
+#define FOR_EACH_STREAM(X) \
+    X(0);                  \
+    X(1);                  \
+    X(2);                  \
+    X(3)
+
+/* Calls X(N, idx) for each stream 0, 1, 2, 3. */
+#define FOR_EACH_STREAM_WITH_INDEX(X, idx) \
+    X(0, idx);                             \
+    X(1, idx);                             \
+    X(2, idx);                             \
+    X(3, idx)
+
+/* Define both _HUF_* & HUF_* symbols because MacOS
+ * C symbols are prefixed with '_' & Linux symbols aren't.
+ */
+_HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
+HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
+    ZSTD_CET_ENDBRANCH
+    /* Save all registers - even if they are callee saved for simplicity. */
+    push %rax
+    push %rbx
+    push %rcx
+    push %rdx
+    push %rbp
+    push %rsi
+    push %rdi
+    push %r8
+    push %r9
+    push %r10
+    push %r11
+    push %r12
+    push %r13
+    push %r14
+    push %r15
+
+    /* Read HUF_DecompressAsmArgs* args from %rax */
+    movq %rdi, %rax
+    movq  0(%rax), %ip0
+    movq  8(%rax), %ip1
+    movq 16(%rax), %ip2
+    movq 24(%rax), %ip3
+    movq 32(%rax), %op0
+    movq 40(%rax), %op1
+    movq 48(%rax), %op2
+    movq 56(%rax), %op3
+    movq 64(%rax), %bits0
+    movq 72(%rax), %bits1
+    movq 80(%rax), %bits2
+    movq 88(%rax), %bits3
+    movq 96(%rax), %dtable
+    push %rax      /* argument */
+    push 104(%rax) /* ilowest */
+    push 112(%rax) /* oend */
+    push %olimit   /* olimit space */
+
+    subq $24, %rsp
+
+.L_4X1_compute_olimit:
+    /* Computes how many iterations we can do safely
+     * %r15, %rax may be clobbered
+     * rbx, rdx must be saved
+     * op3 & ip0 mustn't be clobbered
+     */
+    movq %rbx, 0(%rsp)
+    movq %rdx, 8(%rsp)
+
+    movq 32(%rsp), %rax /* rax = oend */
+    subq %op3,    %rax  /* rax = oend - op3 */
+
+    /* r15 = (oend - op3) / 5 */
+    movabsq $-3689348814741910323, %rdx
+    mulq %rdx
+    movq %rdx, %r15
+    shrq $2, %r15
+
+    movq %ip0,     %rax /* rax = ip0 */
+    movq 40(%rsp), %rdx /* rdx = ilowest */
+    subq %rdx,     %rax /* rax = ip0 - ilowest */
+    movq %rax,     %rbx /* rbx = ip0 - ilowest */
+
+    /* rdx = (ip0 - ilowest) / 7 */
+    movabsq $2635249153387078803, %rdx
+    mulq %rdx
+    subq %rdx, %rbx
+    shrq %rbx
+    addq %rbx, %rdx
+    shrq $2, %rdx
+
+    /* r15 = min(%rdx, %r15) */
+    cmpq %rdx, %r15
+    cmova %rdx, %r15
+
+    /* r15 = r15 * 5 */
+    leaq (%r15, %r15, 4), %r15
+
+    /* olimit = op3 + r15 */
+    addq %op3, %olimit
+
+    movq 8(%rsp), %rdx
+    movq 0(%rsp), %rbx
+
+    /* If (op3 + 20 > olimit) */
+    movq %op3, %rax    /* rax = op3 */
+    cmpq %rax, %olimit /* op3 == olimit */
+    je .L_4X1_exit
+
+    /* If (ip1 < ip0) go to exit */
+    cmpq %ip0, %ip1
+    jb .L_4X1_exit
+
+    /* If (ip2 < ip1) go to exit */
+    cmpq %ip1, %ip2
+    jb .L_4X1_exit
+
+    /* If (ip3 < ip2) go to exit */
+    cmpq %ip2, %ip3
+    jb .L_4X1_exit
+
+/* Reads top 11 bits from bits[n]
+ * Loads dt[bits[n]] into var[n]
+ */
+#define GET_NEXT_DELT(n)                \
+    movq $53, %var##n;                  \
+    shrxq %var##n, %bits##n, %var##n;   \
+    movzwl (%dtable,%var##n,2),%vard##n
+
+/* var[n] must contain the DTable entry computed with GET_NEXT_DELT
+ * Moves var[n] to %rax
+ * bits[n] <<= var[n] & 63
+ * op[n][idx] = %rax >> 8
+ * %ah is a way to access bits [8, 16) of %rax
+ */
+#define DECODE_FROM_DELT(n, idx)       \
+    movq %var##n, %rax;                \
+    shlxq %var##n, %bits##n, %bits##n; \
+    movb %ah, idx(%op##n)
+
+/* Assumes GET_NEXT_DELT has been called.
+ * Calls DECODE_FROM_DELT then GET_NEXT_DELT
+ */
+#define DECODE_AND_GET_NEXT(n, idx) \
+    DECODE_FROM_DELT(n, idx);       \
+    GET_NEXT_DELT(n)                \
+
+/* // ctz & nbBytes is stored in bits[n]
+ * // nbBits is stored in %rax
+ * ctz  = CTZ[bits[n]]
+ * nbBits  = ctz & 7
+ * nbBytes = ctz >> 3
+ * op[n]  += 5
+ * ip[n]  -= nbBytes
+ * // Note: x86-64 is little-endian ==> no bswap
+ * bits[n] = MEM_readST(ip[n]) | 1
+ * bits[n] <<= nbBits
+ */
+#define RELOAD_BITS(n)             \
+    bsfq %bits##n, %bits##n;       \
+    movq %bits##n, %rax;           \
+    andq $7, %rax;                 \
+    shrq $3, %bits##n;             \
+    leaq 5(%op##n), %op##n;        \
+    subq %bits##n, %ip##n;         \
+    movq (%ip##n), %bits##n;       \
+    orq $1, %bits##n;              \
+    shlx %rax, %bits##n, %bits##n
+
+    /* Store clobbered variables on the stack */
+    movq %olimit, 24(%rsp)
+    movq %ip1, 0(%rsp)
+    movq %ip2, 8(%rsp)
+    movq %ip3, 16(%rsp)
+
+    /* Call GET_NEXT_DELT for each stream */
+    FOR_EACH_STREAM(GET_NEXT_DELT)
+
+    .p2align 6
+
+.L_4X1_loop_body:
+    /* Decode 5 symbols in each of the 4 streams (20 total)
+     * Must have called GET_NEXT_DELT for each stream
+     */
+    FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 0)
+    FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 1)
+    FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 2)
+    FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 3)
+    FOR_EACH_STREAM_WITH_INDEX(DECODE_FROM_DELT, 4)
+
+    /* Load ip[1,2,3] from stack (var[] aliases them)
+     * ip[] is needed for RELOAD_BITS
+     * Each will be stored back to the stack after RELOAD
+     */
+    movq 0(%rsp), %ip1
+    movq 8(%rsp), %ip2
+    movq 16(%rsp), %ip3
+
+    /* Reload each stream & fetch the next table entry
+     * to prepare for the next iteration
+     */
+    RELOAD_BITS(0)
+    GET_NEXT_DELT(0)
+
+    RELOAD_BITS(1)
+    movq %ip1, 0(%rsp)
+    GET_NEXT_DELT(1)
+
+    RELOAD_BITS(2)
+    movq %ip2, 8(%rsp)
+    GET_NEXT_DELT(2)
+
+    RELOAD_BITS(3)
+    movq %ip3, 16(%rsp)
+    GET_NEXT_DELT(3)
+
+    /* If op3 < olimit: continue the loop */
+    cmp %op3, 24(%rsp)
+    ja .L_4X1_loop_body
+
+    /* Reload ip[1,2,3] from stack */
+    movq 0(%rsp), %ip1
+    movq 8(%rsp), %ip2
+    movq 16(%rsp), %ip3
+
+    /* Re-compute olimit */
+    jmp .L_4X1_compute_olimit
+
+#undef GET_NEXT_DELT
+#undef DECODE_FROM_DELT
+#undef DECODE
+#undef RELOAD_BITS
+.L_4X1_exit:
+    addq $24, %rsp
+
+    /* Restore stack (oend & olimit) */
+    pop %rax /* olimit */
+    pop %rax /* oend */
+    pop %rax /* ilowest */
+    pop %rax /* arg */
+
+    /* Save ip / op / bits */
+    movq %ip0,  0(%rax)
+    movq %ip1,  8(%rax)
+    movq %ip2, 16(%rax)
+    movq %ip3, 24(%rax)
+    movq %op0, 32(%rax)
+    movq %op1, 40(%rax)
+    movq %op2, 48(%rax)
+    movq %op3, 56(%rax)
+    movq %bits0, 64(%rax)
+    movq %bits1, 72(%rax)
+    movq %bits2, 80(%rax)
+    movq %bits3, 88(%rax)
+
+    /* Restore registers */
+    pop %r15
+    pop %r14
+    pop %r13
+    pop %r12
+    pop %r11
+    pop %r10
+    pop %r9
+    pop %r8
+    pop %rdi
+    pop %rsi
+    pop %rbp
+    pop %rdx
+    pop %rcx
+    pop %rbx
+    pop %rax
+    ret
+
+_HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
+HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
+    ZSTD_CET_ENDBRANCH
+    /* Save all registers - even if they are callee saved for simplicity. */
+    push %rax
+    push %rbx
+    push %rcx
+    push %rdx
+    push %rbp
+    push %rsi
+    push %rdi
+    push %r8
+    push %r9
+    push %r10
+    push %r11
+    push %r12
+    push %r13
+    push %r14
+    push %r15
+
+    movq %rdi, %rax
+    movq  0(%rax), %ip0
+    movq  8(%rax), %ip1
+    movq 16(%rax), %ip2
+    movq 24(%rax), %ip3
+    movq 32(%rax), %op0
+    movq 40(%rax), %op1
+    movq 48(%rax), %op2
+    movq 56(%rax), %op3
+    movq 64(%rax), %bits0
+    movq 72(%rax), %bits1
+    movq 80(%rax), %bits2
+    movq 88(%rax), %bits3
+    movq 96(%rax), %dtable
+    push %rax      /* argument */
+    push %rax      /* olimit */
+    push 104(%rax) /* ilowest */
+
+    movq 112(%rax), %rax
+    push %rax /* oend3 */
+
+    movq %op3, %rax
+    push %rax /* oend2 */
+
+    movq %op2, %rax
+    push %rax /* oend1 */
+
+    movq %op1, %rax
+    push %rax /* oend0 */
+
+    /* Scratch space */
+    subq $8, %rsp
+
+.L_4X2_compute_olimit:
+    /* Computes how many iterations we can do safely
+     * %r15, %rax may be clobbered
+     * rdx must be saved
+     * op[1,2,3,4] & ip0 mustn't be clobbered
+     */
+    movq %rdx, 0(%rsp)
+
+    /* We can consume up to 7 input bytes each iteration. */
+    movq %ip0,     %rax  /* rax = ip0 */
+    movq 40(%rsp), %rdx  /* rdx = ilowest */
+    subq %rdx,     %rax  /* rax = ip0 - ilowest */
+    movq %rax,    %r15   /* r15 = ip0 - ilowest */
+
+    /* rdx = rax / 7 */
+    movabsq $2635249153387078803, %rdx
+    mulq %rdx
+    subq %rdx, %r15
+    shrq %r15
+    addq %r15, %rdx
+    shrq $2, %rdx
+
+    /* r15 = (ip0 - ilowest) / 7 */
+    movq %rdx, %r15
+
+    /* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
+    movq 8(%rsp),  %rax /* rax = oend0 */
+    subq %op0,     %rax /* rax = oend0 - op0 */
+    movq 16(%rsp), %rdx /* rdx = oend1 */
+    subq %op1,     %rdx /* rdx = oend1 - op1 */
+
+    cmpq  %rax,    %rdx
+    cmova %rax,    %rdx /* rdx = min(%rdx, %rax) */
+
+    movq 24(%rsp), %rax /* rax = oend2 */
+    subq %op2,     %rax /* rax = oend2 - op2 */
+
+    cmpq  %rax,    %rdx
+    cmova %rax,    %rdx /* rdx = min(%rdx, %rax) */
+
+    movq 32(%rsp), %rax /* rax = oend3 */
+    subq %op3,     %rax /* rax = oend3 - op3 */
+
+    cmpq  %rax,    %rdx
+    cmova %rax,    %rdx /* rdx = min(%rdx, %rax) */
+
+    movabsq $-3689348814741910323, %rax
+    mulq %rdx
+    shrq $3,       %rdx /* rdx = rdx / 10 */
+
+    /* r15 = min(%rdx, %r15) */
+    cmpq  %rdx, %r15
+    cmova %rdx, %r15
+
+    /* olimit = op3 + 5 * r15 */
+    movq %r15, %rax
+    leaq (%op3, %rax, 4), %olimit
+    addq %rax, %olimit
+
+    movq 0(%rsp), %rdx
+
+    /* If (op3 + 10 > olimit) */
+    movq %op3, %rax    /* rax = op3 */
+    cmpq %rax, %olimit /* op3 == olimit */
+    je .L_4X2_exit
+
+    /* If (ip1 < ip0) go to exit */
+    cmpq %ip0, %ip1
+    jb .L_4X2_exit
+
+    /* If (ip2 < ip1) go to exit */
+    cmpq %ip1, %ip2
+    jb .L_4X2_exit
+
+    /* If (ip3 < ip2) go to exit */
+    cmpq %ip2, %ip3
+    jb .L_4X2_exit
+
+#define DECODE(n, idx)              \
+    movq %bits##n, %rax;            \
+    shrq $53, %rax;                 \
+    movzwl 0(%dtable,%rax,4),%r8d;  \
+    movzbl 2(%dtable,%rax,4),%r15d; \
+    movzbl 3(%dtable,%rax,4),%eax;  \
+    movw %r8w, (%op##n);            \
+    shlxq %r15, %bits##n, %bits##n; \
+    addq %rax, %op##n
+
+#define RELOAD_BITS(n)              \
+    bsfq %bits##n, %bits##n;        \
+    movq %bits##n, %rax;            \
+    shrq $3, %bits##n;              \
+    andq $7, %rax;                  \
+    subq %bits##n, %ip##n;          \
+    movq (%ip##n), %bits##n;        \
+    orq $1, %bits##n;               \
+    shlxq %rax, %bits##n, %bits##n
+
+
+    movq %olimit, 48(%rsp)
+
+    .p2align 6
+
+.L_4X2_loop_body:
+    /* We clobber r8, so store it on the stack */
+    movq %r8, 0(%rsp)
+
+    /* Decode 5 symbols from each of the 4 streams (20 symbols total). */
+    FOR_EACH_STREAM_WITH_INDEX(DECODE, 0)
+    FOR_EACH_STREAM_WITH_INDEX(DECODE, 1)
+    FOR_EACH_STREAM_WITH_INDEX(DECODE, 2)
+    FOR_EACH_STREAM_WITH_INDEX(DECODE, 3)
+    FOR_EACH_STREAM_WITH_INDEX(DECODE, 4)
+
+    /* Reload r8 */
+    movq 0(%rsp), %r8
+
+    FOR_EACH_STREAM(RELOAD_BITS)
+
+    cmp %op3, 48(%rsp)
+    ja .L_4X2_loop_body
+    jmp .L_4X2_compute_olimit
+
+#undef DECODE
+#undef RELOAD_BITS
+.L_4X2_exit:
+    addq $8, %rsp
+    /* Restore stack (oend & olimit) */
+    pop %rax /* oend0 */
+    pop %rax /* oend1 */
+    pop %rax /* oend2 */
+    pop %rax /* oend3 */
+    pop %rax /* ilowest */
+    pop %rax /* olimit */
+    pop %rax /* arg */
+
+    /* Save ip / op / bits */
+    movq %ip0,  0(%rax)
+    movq %ip1,  8(%rax)
+    movq %ip2, 16(%rax)
+    movq %ip3, 24(%rax)
+    movq %op0, 32(%rax)
+    movq %op1, 40(%rax)
+    movq %op2, 48(%rax)
+    movq %op3, 56(%rax)
+    movq %bits0, 64(%rax)
+    movq %bits1, 72(%rax)
+    movq %bits2, 80(%rax)
+    movq %bits3, 88(%rax)
+
+    /* Restore registers */
+    pop %r15
+    pop %r14
+    pop %r13
+    pop %r12
+    pop %r11
+    pop %r10
+    pop %r9
+    pop %r8
+    pop %rdi
+    pop %rsi
+    pop %rbp
+    pop %rdx
+    pop %rcx
+    pop %rbx
+    pop %rax
+    ret
+
+#endif
diff --git a/src/external/decompress/zstd_ddict.cpp b/src/external/decompress/zstd_ddict.cpp
new file mode 100644
index 00000000..f1332fc5
--- /dev/null
+++ b/src/external/decompress/zstd_ddict.cpp
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_ddict.c :
+ * concentrates all logic that needs to know the internals of ZSTD_DDict object */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#include "../common/huf.h"
+#include "zstd_decompress_internal.h"
+#include "zstd_ddict.h"
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    #include "../legacy/zstd_legacy.h"
+#endif
+
+
+/*-*******************************************************
+*  Types
+*********************************************************/
+struct ZSTD_DDict_s {
+    void*                 dictBuffer;
+    const void*           dictContent;
+    size_t                dictSize;
+    ZSTD_entropyDTables_t entropy;
+    U32                   dictID;
+    U32                   entropyPresent;
+    ZSTD_customMem        cMem;
+}; /* typedef'd to ZSTD_DDict within "zstd.h" */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) {
+    assert(ddict != NULL);
+    return ddict->dictContent;
+}
+
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) {
+    assert(ddict != NULL);
+    return ddict->dictSize;
+}
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) {
+    DEBUGLOG(4, "ZSTD_copyDDictParameters");
+    assert(dctx != NULL);
+    assert(ddict != NULL);
+    dctx->dictID         = ddict->dictID;
+    dctx->prefixStart    = ddict->dictContent;
+    dctx->virtualStart   = ddict->dictContent;
+    dctx->dictEnd        = (const BYTE*) ddict->dictContent + ddict->dictSize;
+    dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing   = dctx->previousDstEnd;
+#endif
+    if (ddict->entropyPresent)
+    {
+        dctx->litEntropy     = 1;
+        dctx->fseEntropy     = 1;
+        dctx->LLTptr         = ddict->entropy.LLTable;
+        dctx->MLTptr         = ddict->entropy.MLTable;
+        dctx->OFTptr         = ddict->entropy.OFTable;
+        dctx->HUFptr         = ddict->entropy.hufTable;
+        dctx->entropy.rep[0] = ddict->entropy.rep[0];
+        dctx->entropy.rep[1] = ddict->entropy.rep[1];
+        dctx->entropy.rep[2] = ddict->entropy.rep[2];
+    }
+    else
+    {
+        dctx->litEntropy = 0;
+        dctx->fseEntropy = 0;
+    }
+}
+
+
+static size_t ZSTD_loadEntropy_intoDDict(ZSTD_DDict*            ddict,
+                                         ZSTD_dictContentType_e dictContentType) {
+    ddict->dictID         = 0;
+    ddict->entropyPresent = 0;
+    if (dictContentType == ZSTD_dct_rawContent)
+        return 0;
+
+    if (ddict->dictSize < 8)
+    {
+        if (dictContentType == ZSTD_dct_fullDict)
+            return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
+        return 0;                               /* pure content mode */
+    }
+    {
+        U32 const magic = MEM_readLE32(ddict->dictContent);
+        if (magic != ZSTD_MAGIC_DICTIONARY)
+        {
+            if (dictContentType == ZSTD_dct_fullDict)
+                return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
+            return 0;                               /* pure content mode */
+        }
+    }
+    ddict->dictID = MEM_readLE32((const char*) ddict->dictContent + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    RETURN_ERROR_IF(
+      ZSTD_isError(ZSTD_loadDEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize)),
+      dictionary_corrupted, "");
+    ddict->entropyPresent = 1;
+    return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict*            ddict,
+                                      const void*            dict,
+                                      size_t                 dictSize,
+                                      ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType) {
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize))
+    {
+        ddict->dictBuffer  = NULL;
+        ddict->dictContent = dict;
+        if (!dict)
+            dictSize = 0;
+    }
+    else
+    {
+        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
+        ddict->dictBuffer          = internalBuffer;
+        ddict->dictContent         = internalBuffer;
+        if (!internalBuffer)
+            return ERROR(memory_allocation);
+        ZSTD_memcpy(internalBuffer, dict, dictSize);
+    }
+    ddict->dictSize            = dictSize;
+    ddict->entropy.hufTable[0] = (HUF_DTable) ((ZSTD_HUFFDTABLE_CAPACITY_LOG)
+                                               * 0x1000001); /* cover both little and big endian */
+
+    /* parse dictionary content */
+    FORWARD_IF_ERROR(ZSTD_loadEntropy_intoDDict(ddict, dictContentType), "");
+
+    return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void*            dict,
+                                      size_t                 dictSize,
+                                      ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_customMem         customMem) {
+    if ((!customMem.customAlloc) ^ (!customMem.customFree))
+        return NULL;
+
+    {
+        ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
+        if (ddict == NULL)
+            return NULL;
+        ddict->cMem = customMem;
+        {
+            size_t const initResult =
+              ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType);
+            if (ZSTD_isError(initResult))
+            {
+                ZSTD_freeDDict(ddict);
+                return NULL;
+            }
+        }
+        return ddict;
+    }
+}
+
+/*! ZSTD_createDDict() :
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
+ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) {
+    ZSTD_customMem const allocator = {NULL, NULL, NULL};
+    return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
+}
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, to start decompression without startup delay.
+ *  Dictionary content is simply referenced, it will be accessed during decompression.
+ *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) {
+    ZSTD_customMem const allocator = {NULL, NULL, NULL};
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto,
+                                     allocator);
+}
+
+
+const ZSTD_DDict* ZSTD_initStaticDDict(void*                  sBuffer,
+                                       size_t                 sBufferSize,
+                                       const void*            dict,
+                                       size_t                 dictSize,
+                                       ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                       ZSTD_dictContentType_e dictContentType) {
+    size_t const neededSpace =
+      sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+    ZSTD_DDict* const ddict = (ZSTD_DDict*) sBuffer;
+    assert(sBuffer != NULL);
+    assert(dict != NULL);
+    if ((size_t) sBuffer & 7)
+        return NULL; /* 8-aligned */
+    if (sBufferSize < neededSpace)
+        return NULL;
+    if (dictLoadMethod == ZSTD_dlm_byCopy)
+    {
+        ZSTD_memcpy(ddict + 1, dict, dictSize); /* local copy */
+        dict = ddict + 1;
+    }
+    if (ZSTD_isError(
+          ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType)))
+        return NULL;
+    return ddict;
+}
+
+
+size_t ZSTD_freeDDict(ZSTD_DDict* ddict) {
+    if (ddict == NULL)
+        return 0; /* support free on NULL */
+    {
+        ZSTD_customMem const cMem = ddict->cMem;
+        ZSTD_customFree(ddict->dictBuffer, cMem);
+        ZSTD_customFree(ddict, cMem);
+        return 0;
+    }
+}
+
+/*! ZSTD_estimateDDictSize() :
+ *  Estimate amount of memory that will be needed to create a dictionary for decompression.
+ *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) {
+    return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+}
+
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) {
+    if (ddict == NULL)
+        return 0; /* support sizeof on NULL */
+    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0);
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) {
+    if (ddict == NULL)
+        return 0;
+    return ddict->dictID;
+}
diff --git a/src/external/decompress/zstd_ddict.h b/src/external/decompress/zstd_ddict.h
new file mode 100644
index 00000000..f50f2936
--- /dev/null
+++ b/src/external/decompress/zstd_ddict.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DDICT_H
+#define ZSTD_DDICT_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h" /* size_t */
+#include "../zstd.h"             /* ZSTD_DDict, and several public functions */
+
+
+/*-*******************************************************
+ *  Interface
+ *********************************************************/
+
+/* note: several prototypes are already published in `zstd.h` :
+ * ZSTD_createDDict()
+ * ZSTD_createDDict_byReference()
+ * ZSTD_createDDict_advanced()
+ * ZSTD_freeDDict()
+ * ZSTD_initStaticDDict()
+ * ZSTD_sizeof_DDict()
+ * ZSTD_estimateDDictSize()
+ * ZSTD_getDictID_fromDict()
+ */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
+size_t      ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+
+#endif /* ZSTD_DDICT_H */
diff --git a/src/external/decompress/zstd_decompress.cpp b/src/external/decompress/zstd_decompress.cpp
new file mode 100644
index 00000000..1116410f
--- /dev/null
+++ b/src/external/decompress/zstd_decompress.cpp
@@ -0,0 +1,2664 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
+ */
+#ifndef ZSTD_HEAPMODE
+    #define ZSTD_HEAPMODE 1
+#endif
+
+/*!
+*  LEGACY_SUPPORT :
+*  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
+*/
+#ifndef ZSTD_LEGACY_SUPPORT
+    #define ZSTD_LEGACY_SUPPORT 0
+#endif
+
+/*!
+ *  MAXWINDOWSIZE_DEFAULT :
+ *  maximum window size accepted by DStream __by default__.
+ *  Frames requiring more memory will be rejected.
+ *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
+    #define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32) 1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+/*!
+ *  NO_FORWARD_PROGRESS_MAX :
+ *  maximum allowed nb of calls to ZSTD_decompressStream()
+ *  without any forward progress
+ *  (defined as: no byte read from input, and no byte flushed to output)
+ *  before triggering an error.
+ */
+#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
+    #define ZSTD_NO_FORWARD_PROGRESS_MAX 16
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+#include "../common/error_private.h"
+#include "../common/zstd_internal.h" /* blockProperties_t */
+#include "../common/mem.h"           /* low level memory routines */
+#include "../common/bits.h"          /* ZSTD_highbit32 */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#include "../common/huf.h"
+#include "../common/xxhash.h"         /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
+#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
+#include "zstd_ddict.h"               /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"    /* ZSTD_decompressBlock_internal */
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    #include "../legacy/zstd_legacy.h"
+#endif
+
+
+/*************************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
+
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT \
+    3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+                                                    * Currently, that means a 0.75 load factor.
+                                                    * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+                                                    * the load factor of the ddict hash set.
+                                                    */
+
+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
+#define DDICT_HASHSET_RESIZE_FACTOR 2
+
+/* Hash function to determine starting position of dict insertion within the table
+ * Returns an index between [0, hashSet->ddictPtrTableSize]
+ */
+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    const U64 hash = XXH64(&dictID, sizeof(U32), 0);
+    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
+    return hash & (hashSet->ddictPtrTableSize - 1);
+}
+
+/* Adds DDict to a hashset without resizing it.
+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
+ * Returns 0 if successful, or a zstd error code if something went wrong.
+ */
+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
+    const U32    dictID       = ZSTD_getDictID_fromDDict(ddict);
+    size_t       idx          = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC,
+                    "Hash set is full!");
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    while (hashSet->ddictPtrTable[idx] != NULL)
+    {
+        /* Replace existing ddict if inserting ddict with same dictID */
+        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID)
+        {
+            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
+            hashSet->ddictPtrTable[idx] = ddict;
+            return 0;
+        }
+        idx &= idxRangeMask;
+        idx++;
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    hashSet->ddictPtrTable[idx] = ddict;
+    hashSet->ddictPtrCount++;
+    return 0;
+}
+
+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
+ * rehashes all values, allocates new table, frees old table.
+ * Returns 0 on success, otherwise a zstd error code.
+ */
+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    size_t             newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
+    const ZSTD_DDict** newTable =
+      (const ZSTD_DDict**) ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
+    const ZSTD_DDict** oldTable     = hashSet->ddictPtrTable;
+    size_t             oldTableSize = hashSet->ddictPtrTableSize;
+    size_t             i;
+
+    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize,
+             newTableSize);
+    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
+    hashSet->ddictPtrTable     = newTable;
+    hashSet->ddictPtrTableSize = newTableSize;
+    hashSet->ddictPtrCount     = 0;
+    for (i = 0; i < oldTableSize; ++i)
+    {
+        if (oldTable[i] != NULL)
+        {
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
+        }
+    }
+    ZSTD_customFree((void*) oldTable, customMem);
+    DEBUGLOG(4, "Finished re-hash");
+    return 0;
+}
+
+/* Fetches a DDict with the given dictID
+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
+ */
+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    size_t       idx          = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    for (;;)
+    {
+        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
+        if (currDictID == dictID || currDictID == 0)
+        {
+            /* currDictID == 0 implies a NULL ddict entry */
+            break;
+        }
+        else
+        {
+            idx &= idxRangeMask; /* Goes to start of table when we reach the end */
+            idx++;
+        }
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    return hashSet->ddictPtrTable[idx];
+}
+
+/* Allocates space for and returns a ddict hash set
+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
+ * Returns NULL if allocation failed.
+ */
+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
+    ZSTD_DDictHashSet* ret =
+      (ZSTD_DDictHashSet*) ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+    DEBUGLOG(4, "Allocating new hash set");
+    if (!ret)
+        return NULL;
+    ret->ddictPtrTable = (const ZSTD_DDict**) ZSTD_customCalloc(
+      DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
+    if (!ret->ddictPtrTable)
+    {
+        ZSTD_customFree(ret, customMem);
+        return NULL;
+    }
+    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
+    ret->ddictPtrCount     = 0;
+    return ret;
+}
+
+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
+ */
+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Freeing ddict hash set");
+    if (hashSet && hashSet->ddictPtrTable)
+    {
+        ZSTD_customFree((void*) hashSet->ddictPtrTable, customMem);
+    }
+    if (hashSet)
+    {
+        ZSTD_customFree(hashSet, customMem);
+    }
+}
+
+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
+ * Returns 0 on success, or a ZSTD error.
+ */
+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet,
+                                         const ZSTD_DDict*  ddict,
+                                         ZSTD_customMem     customMem) {
+    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu",
+             ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
+    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT
+          / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT
+        != 0)
+    {
+        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
+    }
+    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
+    return 0;
+}
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx) {
+    if (dctx == NULL)
+        return 0; /* support sizeof NULL */
+    return sizeof(*dctx) + ZSTD_sizeof_DDict(dctx->ddictLocal) + dctx->inBuffSize
+         + dctx->outBuffSize;
+}
+
+size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
+
+
+static size_t ZSTD_startingInputLength(ZSTD_format_e format) {
+    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
+    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
+    assert((format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless));
+    return startingInputLength;
+}
+
+static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) {
+    assert(dctx->streamStage == zdss_init);
+    dctx->format              = ZSTD_f_zstd1;
+    dctx->maxWindowSize       = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->outBufferMode       = ZSTD_bm_buffered;
+    dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+    dctx->refMultipleDDicts   = ZSTD_rmd_refSingleDDict;
+    dctx->disableHufAsm       = 0;
+    dctx->maxBlockSizeParam   = 0;
+}
+
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) {
+    dctx->staticSize  = 0;
+    dctx->ddict       = NULL;
+    dctx->ddictLocal  = NULL;
+    dctx->dictEnd     = NULL;
+    dctx->ddictIsCold = 0;
+    dctx->dictUses    = ZSTD_dont_use;
+    dctx->inBuff      = NULL;
+    dctx->inBuffSize  = 0;
+    dctx->outBuffSize = 0;
+    dctx->streamStage = zdss_init;
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    dctx->legacyContext         = NULL;
+    dctx->previousLegacyVersion = 0;
+#endif
+    dctx->noForwardProgress    = 0;
+    dctx->oversizedDuration    = 0;
+    dctx->isFrameDecompression = 1;
+#if DYNAMIC_BMI2
+    dctx->bmi2 = ZSTD_cpuSupportsBmi2();
+#endif
+    dctx->ddictSet = NULL;
+    ZSTD_DCtx_resetParameters(dctx);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentEndForFuzzing = NULL;
+#endif
+}
+
+ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize) {
+    ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
+
+    if ((size_t) workspace & 7)
+        return NULL; /* 8-aligned */
+    if (workspaceSize < sizeof(ZSTD_DCtx))
+        return NULL; /* minimum size */
+
+    ZSTD_initDCtx_internal(dctx);
+    dctx->staticSize = workspaceSize;
+    dctx->inBuff     = (char*) (dctx + 1);
+    return dctx;
+}
+
+static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
+    if ((!customMem.customAlloc) ^ (!customMem.customFree))
+        return NULL;
+
+    {
+        ZSTD_DCtx* const dctx = (ZSTD_DCtx*) ZSTD_customMalloc(sizeof(*dctx), customMem);
+        if (!dctx)
+            return NULL;
+        dctx->customMem = customMem;
+        ZSTD_initDCtx_internal(dctx);
+        return dctx;
+    }
+}
+
+ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) {
+    return ZSTD_createDCtx_internal(customMem);
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void) {
+    DEBUGLOG(3, "ZSTD_createDCtx");
+    return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
+}
+
+static void ZSTD_clearDict(ZSTD_DCtx* dctx) {
+    ZSTD_freeDDict(dctx->ddictLocal);
+    dctx->ddictLocal = NULL;
+    dctx->ddict      = NULL;
+    dctx->dictUses   = ZSTD_dont_use;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) {
+    if (dctx == NULL)
+        return 0; /* support free on NULL */
+    RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
+    {
+        ZSTD_customMem const cMem = dctx->customMem;
+        ZSTD_clearDict(dctx);
+        ZSTD_customFree(dctx->inBuff, cMem);
+        dctx->inBuff = NULL;
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (dctx->legacyContext)
+            ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
+#endif
+        if (dctx->ddictSet)
+        {
+            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
+            dctx->ddictSet = NULL;
+        }
+        ZSTD_customFree(dctx, cMem);
+        return 0;
+    }
+}
+
+/* no longer useful */
+void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) {
+    size_t const toCopy = (size_t) ((char*) (&dstDCtx->inBuff) - (char*) dstDCtx);
+    ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
+}
+
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
+ */
+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
+    assert(dctx->refMultipleDDicts && dctx->ddictSet);
+    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
+    if (dctx->ddict)
+    {
+        const ZSTD_DDict* frameDDict =
+          ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+        if (frameDDict)
+        {
+            DEBUGLOG(4, "DDict found!");
+            ZSTD_clearDict(dctx);
+            dctx->dictID   = dctx->fParams.dictID;
+            dctx->ddict    = frameDDict;
+            dctx->dictUses = ZSTD_use_indefinitely;
+        }
+    }
+}
+
+
+/*-*************************************************************
+ *   Frame header decoding
+ ***************************************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size) {
+    if (size < ZSTD_FRAMEIDSIZE)
+        return 0;
+    {
+        U32 const magic = MEM_readLE32(buffer);
+        if (magic == ZSTD_MAGICNUMBER)
+            return 1;
+        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START)
+            return 1;
+    }
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(buffer, size))
+        return 1;
+#endif
+    return 0;
+}
+
+/*! ZSTD_isSkippableFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ */
+unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size) {
+    if (size < ZSTD_FRAMEIDSIZE)
+        return 0;
+    {
+        U32 const magic = MEM_readLE32(buffer);
+        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START)
+            return 1;
+    }
+    return 0;
+}
+
+/** ZSTD_frameHeaderSize_internal() :
+ *  srcSize must be large enough to reach header size fields.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
+ * @return : size of the Frame Header
+ *           or an error code, which can be tested with ZSTD_isError() */
+static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) {
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
+
+    {
+        BYTE const fhd           = ((const BYTE*) src)[minInputSize - 1];
+        U32 const  dictID        = fhd & 3;
+        U32 const  singleSegment = (fhd >> 5) & 1;
+        U32 const  fcsId         = fhd >> 6;
+        return minInputSize + !singleSegment + ZSTD_did_fieldSize[dictID]
+             + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId);
+    }
+}
+
+/** ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) {
+    return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/** ZSTD_getFrameHeader_advanced() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+**           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr,
+                                    const void*       src,
+                                    size_t            srcSize,
+                                    ZSTD_format_e     format) {
+    const BYTE*  ip           = (const BYTE*) src;
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+
+    DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize,
+             srcSize);
+
+    if (srcSize > 0)
+    {
+        /* note : technically could be considered an assert(), since it's an invalid entry */
+        RETURN_ERROR_IF(src == NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0");
+    }
+    if (srcSize < minInputSize)
+    {
+        if (srcSize > 0 && format != ZSTD_f_zstd1_magicless)
+        {
+            /* when receiving less than @minInputSize bytes,
+             * control these bytes at least correspond to a supported magic number
+             * in order to error out early if they don't.
+            **/
+            size_t const  toCopy = MIN(4, srcSize);
+            unsigned char hbuf[4];
+            MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER);
+            assert(src != NULL);
+            ZSTD_memcpy(hbuf, src, toCopy);
+            if (MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER)
+            {
+                /* not a zstd frame : let's check if it's a skippable frame */
+                MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START);
+                ZSTD_memcpy(hbuf, src, toCopy);
+                if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START)
+                {
+                    RETURN_ERROR(prefix_unknown,
+                                 "first bytes don't correspond to any supported magic number");
+                }
+            }
+        }
+        return minInputSize;
+    }
+
+    ZSTD_memset(
+      zfhPtr, 0,
+      sizeof(
+        *zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */
+    if ((format != ZSTD_f_zstd1_magicless) && (MEM_readLE32(src) != ZSTD_MAGICNUMBER))
+    {
+        if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START)
+        {
+            /* skippable frame */
+            if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
+                return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
+            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
+            zfhPtr->frameContentSize = MEM_readLE32((const char*) src + ZSTD_FRAMEIDSIZE);
+            zfhPtr->frameType        = ZSTD_skippableFrame;
+            return 0;
+        }
+        RETURN_ERROR(prefix_unknown, "");
+    }
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    {
+        size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
+        if (srcSize < fhsize)
+            return fhsize;
+        zfhPtr->headerSize = (U32) fhsize;
+    }
+
+    {
+        BYTE const fhdByte          = ip[minInputSize - 1];
+        size_t     pos              = minInputSize;
+        U32 const  dictIDSizeCode   = fhdByte & 3;
+        U32 const  checksumFlag     = (fhdByte >> 2) & 1;
+        U32 const  singleSegment    = (fhdByte >> 5) & 1;
+        U32 const  fcsID            = fhdByte >> 6;
+        U64        windowSize       = 0;
+        U32        dictID           = 0;
+        U64        frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
+        RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
+                        "reserved bits, must be zero");
+
+        if (!singleSegment)
+        {
+            BYTE const wlByte    = ip[pos++];
+            U32 const  windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
+            windowSize = (1ULL << windowLog);
+            windowSize += (windowSize >> 3) * (wlByte & 7);
+        }
+        switch (dictIDSizeCode)
+        {
+        default :
+            assert(0); /* impossible */
+            ZSTD_FALLTHROUGH;
+        case 0 :
+            break;
+        case 1 :
+            dictID = ip[pos];
+            pos++;
+            break;
+        case 2 :
+            dictID = MEM_readLE16(ip + pos);
+            pos += 2;
+            break;
+        case 3 :
+            dictID = MEM_readLE32(ip + pos);
+            pos += 4;
+            break;
+        }
+        switch (fcsID)
+        {
+        default :
+            assert(0); /* impossible */
+            ZSTD_FALLTHROUGH;
+        case 0 :
+            if (singleSegment)
+                frameContentSize = ip[pos];
+            break;
+        case 1 :
+            frameContentSize = MEM_readLE16(ip + pos) + 256;
+            break;
+        case 2 :
+            frameContentSize = MEM_readLE32(ip + pos);
+            break;
+        case 3 :
+            frameContentSize = MEM_readLE64(ip + pos);
+            break;
+        }
+        if (singleSegment)
+            windowSize = frameContentSize;
+
+        zfhPtr->frameType        = ZSTD_frame;
+        zfhPtr->frameContentSize = frameContentSize;
+        zfhPtr->windowSize       = windowSize;
+        zfhPtr->blockSizeMax     = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+        zfhPtr->dictID           = dictID;
+        zfhPtr->checksumFlag     = checksumFlag;
+    }
+    return 0;
+}
+
+/** ZSTD_getFrameHeader() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : this function does not consume input, it only reads it.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) {
+    return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
+}
+
+/** ZSTD_getFrameContentSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+ *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void* src, size_t srcSize) {
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize))
+    {
+        unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
+        return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
+    }
+#endif
+    {
+        ZSTD_frameHeader zfh;
+        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
+            return ZSTD_CONTENTSIZE_ERROR;
+        if (zfh.frameType == ZSTD_skippableFrame)
+        {
+            return 0;
+        }
+        else
+        {
+            return zfh.frameContentSize;
+        }
+    }
+}
+
+static size_t readSkippableFrameSize(void const* src, size_t srcSize) {
+    size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
+    U32          sizeU32;
+
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+
+    sizeU32 = MEM_readLE32((BYTE const*) src + ZSTD_FRAMEIDSIZE);
+    RETURN_ERROR_IF((U32) (sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+                    frameParameter_unsupported, "");
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
+        return skippableSize;
+    }
+}
+
+/*! ZSTD_readSkippableFrame() :
+ * Retrieves content of a skippable frame, and writes it to dst buffer.
+ *
+ * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
+ * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
+ * in the magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+size_t ZSTD_readSkippableFrame(void*       dst,
+                               size_t      dstCapacity,
+                               unsigned*   magicVariant, /* optional, can be NULL */
+                               const void* src,
+                               size_t      srcSize) {
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+
+    {
+        U32 const magicNumber          = MEM_readLE32(src);
+        size_t    skippableFrameSize   = readSkippableFrameSize(src, srcSize);
+        size_t    skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
+
+        /* check input validity */
+        RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
+        RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE
+                          || skippableFrameSize > srcSize,
+                        srcSize_wrong, "");
+        RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
+
+        /* deliver payload */
+        if (skippableContentSize > 0 && dst != NULL)
+            ZSTD_memcpy(dst, (const BYTE*) src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
+        if (magicVariant != NULL)
+            *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
+        return skippableContentSize;
+    }
+}
+
+/** ZSTD_findDecompressedSize() :
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  note: compatible with legacy mode
+ * @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) {
+    unsigned long long totalDstSize = 0;
+
+    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1))
+    {
+        U32 const magicNumber = MEM_readLE32(src);
+
+        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START)
+        {
+            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+            if (ZSTD_isError(skippableSize))
+                return ZSTD_CONTENTSIZE_ERROR;
+            assert(skippableSize <= srcSize);
+
+            src = (const BYTE*) src + skippableSize;
+            srcSize -= skippableSize;
+            continue;
+        }
+
+        {
+            unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
+            if (fcs >= ZSTD_CONTENTSIZE_ERROR)
+                return fcs;
+
+            if (totalDstSize + fcs < totalDstSize)
+                return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
+            totalDstSize += fcs;
+        }
+        /* skip to next frame */
+        {
+            size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+            if (ZSTD_isError(frameSrcSize))
+                return ZSTD_CONTENTSIZE_ERROR;
+            assert(frameSrcSize <= srcSize);
+
+            src = (const BYTE*) src + frameSrcSize;
+            srcSize -= frameSrcSize;
+        }
+    } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    if (srcSize)
+        return ZSTD_CONTENTSIZE_ERROR;
+
+    return totalDstSize;
+}
+
+/** ZSTD_getDecompressedSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size if known, 0 otherwise
+             note : 0 can mean any of the following :
+                   - frame content is empty
+                   - decompressed size field is not present in frame header
+                   - frame header unknown / not supported
+                   - frame header not complete (`srcSize` too small) */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) {
+    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
+    return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
+}
+
+
+/** ZSTD_decodeFrameHeader() :
+ * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
+ * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) {
+    size_t const result =
+      ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
+    if (ZSTD_isError(result))
+        return result; /* invalid header */
+    RETURN_ERROR_IF(result > 0, srcSize_wrong, "headerSize too small");
+
+    /* Reference DDict requested by frame if dctx references multiple ddicts */
+    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet)
+    {
+        ZSTD_DCtx_selectFrameDDict(dctx);
+    }
+
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    /* Skip the dictID check in fuzzing mode, because it makes the search
+     * harder.
+     */
+    RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
+                    dictionary_wrong, "");
+#endif
+    dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
+    if (dctx->validateChecksum)
+        XXH64_reset(&dctx->xxhState, 0);
+    dctx->processedCSize += headerSize;
+    return 0;
+}
+
+static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) {
+    ZSTD_frameSizeInfo frameSizeInfo;
+    frameSizeInfo.compressedSize    = ret;
+    frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    return frameSizeInfo;
+}
+
+static ZSTD_frameSizeInfo
+ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format) {
+    ZSTD_frameSizeInfo frameSizeInfo;
+    ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize))
+        return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
+#endif
+
+    if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START)
+    {
+        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize)
+               || frameSizeInfo.compressedSize <= srcSize);
+        return frameSizeInfo;
+    }
+    else
+    {
+        const BYTE*       ip            = (const BYTE*) src;
+        const BYTE* const ipstart       = ip;
+        size_t            remainingSize = srcSize;
+        size_t            nbBlocks      = 0;
+        ZSTD_frameHeader  zfh;
+
+        /* Extract Frame Header */
+        {
+            size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
+            if (ZSTD_isError(ret))
+                return ZSTD_errorFrameSizeInfo(ret);
+            if (ret > 0)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+        }
+
+        ip += zfh.headerSize;
+        remainingSize -= zfh.headerSize;
+
+        /* Iterate over each block */
+        while (1)
+        {
+            blockProperties_t blockProperties;
+            size_t const      cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize))
+                return ZSTD_errorFrameSizeInfo(cBlockSize);
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+            nbBlocks++;
+
+            if (blockProperties.lastBlock)
+                break;
+        }
+
+        /* Final frame content checksum */
+        if (zfh.checksumFlag)
+        {
+            if (remainingSize < 4)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+            ip += 4;
+        }
+
+        frameSizeInfo.nbBlocks          = nbBlocks;
+        frameSizeInfo.compressedSize    = (size_t) (ip - ipstart);
+        frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                                          ? zfh.frameContentSize
+                                          : (unsigned long long) nbBlocks * zfh.blockSizeMax;
+        return frameSizeInfo;
+    }
+}
+
+static size_t
+ZSTD_findFrameCompressedSize_advanced(const void* src, size_t srcSize, ZSTD_format_e format) {
+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format);
+    return frameSizeInfo.compressedSize;
+}
+
+/** ZSTD_findFrameCompressedSize() :
+ * See docs in zstd.h
+ * Note: compatible with legacy mode */
+size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) {
+    return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1);
+}
+
+/** ZSTD_decompressBound() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame or a skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the maximum decompressed size of the compressed source
+ */
+unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) {
+    unsigned long long bound = 0;
+    /* Iterate over each frame */
+    while (srcSize > 0)
+    {
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
+        size_t const             compressedSize    = frameSizeInfo.compressedSize;
+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+            return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
+        src = (const BYTE*) src + compressedSize;
+        srcSize -= compressedSize;
+        bound += decompressedBound;
+    }
+    return bound;
+}
+
+size_t ZSTD_decompressionMargin(void const* src, size_t srcSize) {
+    size_t   margin       = 0;
+    unsigned maxBlockSize = 0;
+
+    /* Iterate over each frame */
+    while (srcSize > 0)
+    {
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
+        size_t const             compressedSize    = frameSizeInfo.compressedSize;
+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+        ZSTD_frameHeader         zfh;
+
+        FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+            return ERROR(corruption_detected);
+
+        if (zfh.frameType == ZSTD_frame)
+        {
+            /* Add the frame header to our margin */
+            margin += zfh.headerSize;
+            /* Add the checksum to our margin */
+            margin += zfh.checksumFlag ? 4 : 0;
+            /* Add 3 bytes per block */
+            margin += 3 * frameSizeInfo.nbBlocks;
+
+            /* Compute the max block size */
+            maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax);
+        }
+        else
+        {
+            assert(zfh.frameType == ZSTD_skippableFrame);
+            /* Add the entire skippable frame size to our margin. */
+            margin += compressedSize;
+        }
+
+        assert(srcSize >= compressedSize);
+        src = (const BYTE*) src + compressedSize;
+        srcSize -= compressedSize;
+    }
+
+    /* Add the max block size back to the margin. */
+    margin += maxBlockSize;
+
+    return margin;
+}
+
+/*-*************************************************************
+ *   Frame decoding
+ ***************************************************************/
+
+/** ZSTD_insertBlock() :
+ *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) {
+    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned) blockSize);
+    ZSTD_checkContinuity(dctx, blockStart, blockSize);
+    dctx->previousDstEnd = (const char*) blockStart + blockSize;
+    return blockSize;
+}
+
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) {
+    DEBUGLOG(5, "ZSTD_copyRawBlock");
+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL)
+    {
+        if (srcSize == 0)
+            return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memmove(dst, src, srcSize);
+    return srcSize;
+}
+
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, BYTE b, size_t regenSize) {
+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL)
+    {
+        if (regenSize == 0)
+            return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memset(dst, b, regenSize);
+    return regenSize;
+}
+
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx,
+                                U64              uncompressedSize,
+                                U64              compressedSize,
+                                unsigned         streaming) {
+#if ZSTD_TRACE
+    if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL)
+    {
+        ZSTD_Trace trace;
+        ZSTD_memset(&trace, 0, sizeof(trace));
+        trace.version   = ZSTD_VERSION_NUMBER;
+        trace.streaming = streaming;
+        if (dctx->ddict)
+        {
+            trace.dictionaryID     = ZSTD_getDictID_fromDDict(dctx->ddict);
+            trace.dictionarySize   = ZSTD_DDict_dictSize(dctx->ddict);
+            trace.dictionaryIsCold = dctx->ddictIsCold;
+        }
+        trace.uncompressedSize = (size_t) uncompressedSize;
+        trace.compressedSize   = (size_t) compressedSize;
+        trace.dctx             = dctx;
+        ZSTD_trace_decompress_end(dctx->traceCtx, &trace);
+    }
+#else
+    (void) dctx;
+    (void) uncompressedSize;
+    (void) compressedSize;
+    (void) streaming;
+#endif
+}
+
+
+/*! ZSTD_decompressFrame() :
+ * @dctx must be properly initialized
+ *  will update *srcPtr and *srcSizePtr,
+ *  to make *srcPtr progress by one frame. */
+static size_t ZSTD_decompressFrame(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void** srcPtr, size_t* srcSizePtr) {
+    const BYTE* const istart           = (const BYTE*) (*srcPtr);
+    const BYTE*       ip               = istart;
+    BYTE* const       ostart           = (BYTE*) dst;
+    BYTE* const       oend             = dstCapacity != 0 ? ostart + dstCapacity : ostart;
+    BYTE*             op               = ostart;
+    size_t            remainingSrcSize = *srcSizePtr;
+
+    DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int) *srcSizePtr);
+
+    /* check */
+    RETURN_ERROR_IF(remainingSrcSize
+                      < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format) + ZSTD_blockHeaderSize,
+                    srcSize_wrong, "");
+
+    /* Frame Header */
+    {
+        size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+          ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
+        if (ZSTD_isError(frameHeaderSize))
+            return frameHeaderSize;
+        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize + ZSTD_blockHeaderSize, srcSize_wrong,
+                        "");
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize), "");
+        ip += frameHeaderSize;
+        remainingSrcSize -= frameHeaderSize;
+    }
+
+    /* Shrink the blockSizeMax if enabled */
+    if (dctx->maxBlockSizeParam != 0)
+        dctx->fParams.blockSizeMax =
+          MIN(dctx->fParams.blockSizeMax, (unsigned) dctx->maxBlockSizeParam);
+
+    /* Loop on each block */
+    while (1)
+    {
+        BYTE*             oBlockEnd = oend;
+        size_t            decodedSize;
+        blockProperties_t blockProperties;
+        size_t const      cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize))
+            return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSrcSize -= ZSTD_blockHeaderSize;
+        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
+
+        if (ip >= op && ip < oBlockEnd)
+        {
+            /* We are decompressing in-place. Limit the output pointer so that we
+             * don't overwrite the block that we are currently reading. This will
+             * fail decompression if the input & output pointers aren't spaced
+             * far enough apart.
+             *
+             * This is important to set, even when the pointers are far enough
+             * apart, because ZSTD_decompressBlock_internal() can decide to store
+             * literals in the output buffer, after the block it is decompressing.
+             * Since we don't want anything to overwrite our input, we have to tell
+             * ZSTD_decompressBlock_internal to never write past ip.
+             *
+             * See ZSTD_allocateLiteralsBuffer() for reference.
+             */
+            oBlockEnd = op + (ip - op);
+        }
+
+        switch (blockProperties.blockType)
+        {
+        case bt_compressed :
+            assert(dctx->isFrameDecompression == 1);
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t) (oBlockEnd - op), ip,
+                                                        cBlockSize, not_streaming);
+            break;
+        case bt_raw :
+            /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
+            decodedSize = ZSTD_copyRawBlock(op, (size_t) (oend - op), ip, cBlockSize);
+            break;
+        case bt_rle :
+            decodedSize =
+              ZSTD_setRleBlock(op, (size_t) (oBlockEnd - op), *ip, blockProperties.origSize);
+            break;
+        case bt_reserved :
+        default :
+            RETURN_ERROR(corruption_detected, "invalid block type");
+        }
+        FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
+        DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned) decodedSize);
+        if (dctx->validateChecksum)
+        {
+            XXH64_update(&dctx->xxhState, op, decodedSize);
+        }
+        if (decodedSize) /* support dst = NULL,0 */
+        {
+            op += decodedSize;
+        }
+        assert(ip != NULL);
+        ip += cBlockSize;
+        remainingSrcSize -= cBlockSize;
+        if (blockProperties.lastBlock)
+            break;
+    }
+
+    if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+    {
+        RETURN_ERROR_IF((U64) (op - ostart) != dctx->fParams.frameContentSize, corruption_detected,
+                        "");
+    }
+    if (dctx->fParams.checksumFlag)
+    { /* Frame content checksum verification */
+        RETURN_ERROR_IF(remainingSrcSize < 4, checksum_wrong, "");
+        if (!dctx->forceIgnoreChecksum)
+        {
+            U32 const checkCalc = (U32) XXH64_digest(&dctx->xxhState);
+            U32       checkRead;
+            checkRead = MEM_readLE32(ip);
+            RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
+        }
+        ip += 4;
+        remainingSrcSize -= 4;
+    }
+    ZSTD_DCtx_trace_end(dctx, (U64) (op - ostart), (U64) (ip - istart), /* streaming */ 0);
+    /* Allow caller to get size read */
+    DEBUGLOG(4,
+             "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input",
+             op - ostart, ip - (const BYTE*) *srcPtr);
+    *srcPtr     = ip;
+    *srcSizePtr = remainingSrcSize;
+    return (size_t) (op - ostart);
+}
+
+static ZSTD_ALLOW_POINTER_OVERFLOW_ATTR size_t ZSTD_decompressMultiFrame(ZSTD_DCtx*  dctx,
+                                                                         void*       dst,
+                                                                         size_t      dstCapacity,
+                                                                         const void* src,
+                                                                         size_t      srcSize,
+                                                                         const void* dict,
+                                                                         size_t      dictSize,
+                                                                         const ZSTD_DDict* ddict) {
+    void* const dststart       = dst;
+    int         moreThan1Frame = 0;
+
+    DEBUGLOG(5, "ZSTD_decompressMultiFrame");
+    assert(dict == NULL || ddict == NULL); /* either dict or ddict set, not both */
+
+    if (ddict)
+    {
+        dict     = ZSTD_DDict_dictContent(ddict);
+        dictSize = ZSTD_DDict_dictSize(ddict);
+    }
+
+    while (srcSize >= ZSTD_startingInputLength(dctx->format))
+    {
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+        if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize))
+        {
+            size_t       decodedSize;
+            size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
+            if (ZSTD_isError(frameSize))
+                return frameSize;
+            RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
+                            "legacy support is not compatible with static dctx");
+
+            decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
+            if (ZSTD_isError(decodedSize))
+                return decodedSize;
+
+            {
+                unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize);
+                RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected,
+                                "Corrupted frame header!");
+                if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                {
+                    RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected,
+                                    "Frame header size does not match decoded size!");
+                }
+            }
+
+            assert(decodedSize <= dstCapacity);
+            dst = (BYTE*) dst + decodedSize;
+            dstCapacity -= decodedSize;
+
+            src = (const BYTE*) src + frameSize;
+            srcSize -= frameSize;
+
+            continue;
+        }
+#endif
+
+        if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4)
+        {
+            U32 const magicNumber = MEM_readLE32(src);
+            DEBUGLOG(5, "reading magic number %08X", (unsigned) magicNumber);
+            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START)
+            {
+                /* skippable frame detected : skip it */
+                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+                FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
+                assert(skippableSize <= srcSize);
+
+                src = (const BYTE*) src + skippableSize;
+                srcSize -= skippableSize;
+                continue; /* check next frame */
+            }
+        }
+
+        if (ddict)
+        {
+            /* we were called from ZSTD_decompress_usingDDict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
+        }
+        else
+        {
+            /* this will initialize correctly with no dict if dict == NULL, so
+             * use this in all cases but ddict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
+        }
+        ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+        {
+            const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize);
+            RETURN_ERROR_IF((ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
+                              && (moreThan1Frame == 1),
+                            srcSize_wrong,
+                            "At least one frame successfully completed, "
+                            "but following bytes are garbage: "
+                            "it's more likely to be a srcSize error, "
+                            "specifying more input bytes than size of frame(s). "
+                            "Note: one could be unlucky, it might be a corruption error instead, "
+                            "happening right at the place where we expect zstd magic bytes. "
+                            "But this is _much_ less likely than a srcSize field error.");
+            if (ZSTD_isError(res))
+                return res;
+            assert(res <= dstCapacity);
+            if (res != 0)
+                dst = (BYTE*) dst + res;
+            dstCapacity -= res;
+        }
+        moreThan1Frame = 1;
+    } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
+
+    return (size_t) ((BYTE*) dst - (BYTE*) dststart);
+}
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx*  dctx,
+                                 void*       dst,
+                                 size_t      dstCapacity,
+                                 const void* src,
+                                 size_t      srcSize,
+                                 const void* dict,
+                                 size_t      dictSize) {
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
+}
+
+
+static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) {
+    switch (dctx->dictUses)
+    {
+    default :
+        assert(0 /* Impossible */);
+        ZSTD_FALLTHROUGH;
+    case ZSTD_dont_use :
+        ZSTD_clearDict(dctx);
+        return NULL;
+    case ZSTD_use_indefinitely :
+        return dctx->ddict;
+    case ZSTD_use_once :
+        dctx->dictUses = ZSTD_dont_use;
+        return dctx->ddict;
+    }
+}
+
+size_t ZSTD_decompressDCtx(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) {
+    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) {
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE >= 1)
+    size_t           regenSize;
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem);
+    RETURN_ERROR_IF(dctx == NULL, memory_allocation, "NULL pointer!");
+    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else /* stack mode */
+    ZSTD_DCtx dctx;
+    ZSTD_initDCtx_internal(&dctx);
+    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+
+/*-**************************************
+*   Advanced Streaming Decompression API
+*   Bufferless and synchronous
+****************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+
+/**
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we
+ * allow taking a partial block as the input. Currently only raw uncompressed blocks can
+ * be streamed.
+ *
+ * For blocks that can be streamed, this allows us to reduce the latency until we produce
+ * output, and avoid copying the input.
+ *
+ * @param inputSize - The total amount of input that the caller currently has.
+ */
+static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
+    if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
+        return dctx->expected;
+    if (dctx->bType != bt_raw)
+        return dctx->expected;
+    return BOUNDED(1, inputSize, dctx->expected);
+}
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
+    switch (dctx->stage)
+    {
+    default : /* should not happen */
+        assert(0);
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_getFrameHeaderSize :
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_decodeFrameHeader :
+        return ZSTDnit_frameHeader;
+    case ZSTDds_decodeBlockHeader :
+        return ZSTDnit_blockHeader;
+    case ZSTDds_decompressBlock :
+        return ZSTDnit_block;
+    case ZSTDds_decompressLastBlock :
+        return ZSTDnit_lastBlock;
+    case ZSTDds_checkChecksum :
+        return ZSTDnit_checksum;
+    case ZSTDds_decodeSkippableHeader :
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_skipFrame :
+        return ZSTDnit_skippableFrame;
+    }
+}
+
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
+
+/** ZSTD_decompressContinue() :
+ *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ *            or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_decompressContinue(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) {
+    DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned) srcSize);
+    /* Sanity check */
+    RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize),
+                    srcSize_wrong, "not allowed");
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+    dctx->processedCSize += srcSize;
+
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        assert(src != NULL);
+        if (dctx->format == ZSTD_f_zstd1)
+        {                                        /* allows header */
+            assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
+            if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START)
+            { /* skippable frame */
+                ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+                dctx->expected = ZSTD_SKIPPABLEHEADERSIZE
+                               - srcSize; /* remaining to load to get full skippable frame header */
+                dctx->stage = ZSTDds_decodeSkippableHeader;
+                return 0;
+            }
+        }
+        dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
+        if (ZSTD_isError(dctx->headerSize))
+            return dctx->headerSize;
+        ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+        dctx->expected = dctx->headerSize - srcSize;
+        dctx->stage    = ZSTDds_decodeFrameHeader;
+        return 0;
+
+    case ZSTDds_decodeFrameHeader :
+        assert(src != NULL);
+        ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
+        dctx->expected = ZSTD_blockHeaderSize;
+        dctx->stage    = ZSTDds_decodeBlockHeader;
+        return 0;
+
+    case ZSTDds_decodeBlockHeader : {
+        blockProperties_t bp;
+        size_t const      cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+        if (ZSTD_isError(cBlockSize))
+            return cBlockSize;
+        RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected,
+                        "Block Size Exceeds Maximum");
+        dctx->expected = cBlockSize;
+        dctx->bType    = bp.blockType;
+        dctx->rleSize  = bp.origSize;
+        if (cBlockSize)
+        {
+            dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+            return 0;
+        }
+        /* empty block */
+        if (bp.lastBlock)
+        {
+            if (dctx->fParams.checksumFlag)
+            {
+                dctx->expected = 4;
+                dctx->stage    = ZSTDds_checkChecksum;
+            }
+            else
+            {
+                dctx->expected = 0; /* end of frame */
+                dctx->stage    = ZSTDds_getFrameHeaderSize;
+            }
+        }
+        else
+        {
+            dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */
+            dctx->stage    = ZSTDds_decodeBlockHeader;
+        }
+        return 0;
+    }
+
+    case ZSTDds_decompressLastBlock :
+    case ZSTDds_decompressBlock :
+        DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
+        {
+            size_t rSize;
+            switch (dctx->bType)
+            {
+            case bt_compressed :
+                DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
+                assert(dctx->isFrameDecompression == 1);
+                rSize =
+                  ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
+                dctx->expected = 0; /* Streaming not supported */
+                break;
+            case bt_raw :
+                assert(srcSize <= dctx->expected);
+                rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
+                FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
+                assert(rSize == srcSize);
+                dctx->expected -= rSize;
+                break;
+            case bt_rle :
+                rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*) src, dctx->rleSize);
+                dctx->expected = 0; /* Streaming not supported */
+                break;
+            case bt_reserved : /* should never happen */
+            default :
+                RETURN_ERROR(corruption_detected, "invalid block type");
+            }
+            FORWARD_IF_ERROR(rSize, "");
+            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected,
+                            "Decompressed Block Size Exceeds Maximum");
+            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned) rSize);
+            dctx->decodedSize += rSize;
+            if (dctx->validateChecksum)
+                XXH64_update(&dctx->xxhState, dst, rSize);
+            dctx->previousDstEnd = (char*) dst + rSize;
+
+            /* Stay on the same stage until we are finished streaming the block. */
+            if (dctx->expected > 0)
+            {
+                return rSize;
+            }
+
+            if (dctx->stage == ZSTDds_decompressLastBlock)
+            { /* end of frame */
+                DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u",
+                         (unsigned) dctx->decodedSize);
+                RETURN_ERROR_IF(dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                                  && dctx->decodedSize != dctx->fParams.frameContentSize,
+                                corruption_detected, "");
+                if (dctx->fParams.checksumFlag)
+                { /* another round for frame checksum */
+                    dctx->expected = 4;
+                    dctx->stage    = ZSTDds_checkChecksum;
+                }
+                else
+                {
+                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize,
+                                        /* streaming */ 1);
+                    dctx->expected = 0; /* ends here */
+                    dctx->stage    = ZSTDds_getFrameHeaderSize;
+                }
+            }
+            else
+            {
+                dctx->stage    = ZSTDds_decodeBlockHeader;
+                dctx->expected = ZSTD_blockHeaderSize;
+            }
+            return rSize;
+        }
+
+    case ZSTDds_checkChecksum :
+        assert(srcSize == 4); /* guaranteed by dctx->expected */
+        {
+            if (dctx->validateChecksum)
+            {
+                U32 const h32     = (U32) XXH64_digest(&dctx->xxhState);
+                U32 const check32 = MEM_readLE32(src);
+                DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read",
+                         (unsigned) h32, (unsigned) check32);
+                RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
+            }
+            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+            dctx->expected = 0;
+            dctx->stage    = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
+
+    case ZSTDds_decodeSkippableHeader :
+        assert(src != NULL);
+        assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+        assert(dctx->format != ZSTD_f_zstd1_magicless);
+        ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src,
+                    srcSize); /* complete skippable header */
+        dctx->expected = MEM_readLE32(
+          dctx->headerBuffer
+          + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
+        dctx->stage = ZSTDds_skipFrame;
+        return 0;
+
+    case ZSTDds_skipFrame :
+        dctx->expected = 0;
+        dctx->stage    = ZSTDds_getFrameHeaderSize;
+        return 0;
+
+    default :
+        assert(0); /* impossible */
+        RETURN_ERROR(GENERIC,
+                     "impossible to reach"); /* some compilers require default to do something */
+    }
+}
+
+
+static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) {
+    dctx->dictEnd      = dctx->previousDstEnd;
+    dctx->virtualStart = (const char*) dict
+                       - ((const char*) (dctx->previousDstEnd) - (const char*) (dctx->prefixStart));
+    dctx->prefixStart    = dict;
+    dctx->previousDstEnd = (const char*) dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing   = dctx->previousDstEnd;
+#endif
+    return 0;
+}
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of entropy tables read */
+size_t
+ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize) {
+    const BYTE*       dictPtr = (const BYTE*) dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+
+    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
+    assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
+    dictPtr += 8;                                        /* skip header = magic + dictID */
+
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable)
+                       == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable)
+                       == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
+    ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable)
+                         + sizeof(entropy->MLTable)
+                       >= HUF_DECOMPRESS_WORKSPACE_SIZE);
+    {
+        void* const workspace =
+          &entropy
+             ->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
+        size_t const workspaceSize =
+          sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
+#ifdef HUF_FORCE_DECOMPRESS_X1
+        /* in minimal huffman, we always use X1 variants */
+        size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr,
+                                                   workspace, workspaceSize, /* flags */ 0);
+#else
+        size_t const hSize =
+          HUF_readDTableX2_wksp(entropy->hufTable, dictPtr, (size_t) (dictEnd - dictPtr), workspace,
+                                workspaceSize, /* flags */ 0);
+#endif
+        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
+        dictPtr += hSize;
+    }
+
+    {
+        short        offcodeNCount[MaxOff + 1];
+        unsigned     offcodeMaxValue   = MaxOff, offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(
+          offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t) (dictEnd - dictPtr));
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable(entropy->OFTable, offcodeNCount, offcodeMaxValue, OF_base, OF_bits,
+                           offcodeLog, entropy->workspace, sizeof(entropy->workspace),
+                           /* bmi2 */ 0);
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {
+        short        matchlengthNCount[MaxML + 1];
+        unsigned     matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize =
+          FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr,
+                         (size_t) (dictEnd - dictPtr));
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, ML_base,
+                           ML_bits, matchlengthLog, entropy->workspace, sizeof(entropy->workspace),
+                           /* bmi2 */ 0);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {
+        short        litlengthNCount[MaxLL + 1];
+        unsigned     litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize =
+          FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr,
+                         (size_t) (dictEnd - dictPtr));
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable(entropy->LLTable, litlengthNCount, litlengthMaxValue, LL_base, LL_bits,
+                           litlengthLog, entropy->workspace, sizeof(entropy->workspace),
+                           /* bmi2 */ 0);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr + 12 > dictEnd, dictionary_corrupted, "");
+    {
+        int          i;
+        size_t const dictContentSize = (size_t) (dictEnd - (dictPtr + 12));
+        for (i = 0; i < 3; i++)
+        {
+            U32 const rep = MEM_readLE32(dictPtr);
+            dictPtr += 4;
+            RETURN_ERROR_IF(rep == 0 || rep > dictContentSize, dictionary_corrupted, "");
+            entropy->rep[i] = rep;
+        }
+    }
+
+    return (size_t) (dictPtr - (const BYTE*) dict);
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) {
+    if (dictSize < 8)
+        return ZSTD_refDictContent(dctx, dict, dictSize);
+    {
+        U32 const magic = MEM_readLE32(dict);
+        if (magic != ZSTD_MAGIC_DICTIONARY)
+        {
+            return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
+        }
+    }
+    dctx->dictID = MEM_readLE32((const char*) dict + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    {
+        size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
+        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
+        dict = (const char*) dict + eSize;
+        dictSize -= eSize;
+    }
+    dctx->litEntropy = dctx->fseEntropy = 1;
+
+    /* reference dictionary content */
+    return ZSTD_refDictContent(dctx, dict, dictSize);
+}
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) {
+    assert(dctx != NULL);
+#if ZSTD_TRACE
+    dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0;
+#endif
+    dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */
+    dctx->stage    = ZSTDds_getFrameHeaderSize;
+    dctx->processedCSize      = 0;
+    dctx->decodedSize         = 0;
+    dctx->previousDstEnd      = NULL;
+    dctx->prefixStart         = NULL;
+    dctx->virtualStart        = NULL;
+    dctx->dictEnd             = NULL;
+    dctx->entropy.hufTable[0] = (HUF_DTable) ((ZSTD_HUFFDTABLE_CAPACITY_LOG)
+                                              * 0x1000001); /* cover both little and big endian */
+    dctx->litEntropy = dctx->fseEntropy = 0;
+    dctx->dictID                        = 0;
+    dctx->bType                         = bt_reserved;
+    dctx->isFrameDecompression          = 1;
+    ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+    ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
+    dctx->LLTptr = dctx->entropy.LLTable;
+    dctx->MLTptr = dctx->entropy.MLTable;
+    dctx->OFTptr = dctx->entropy.OFTable;
+    dctx->HUFptr = dctx->entropy.hufTable;
+    return 0;
+}
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) {
+    FORWARD_IF_ERROR(ZSTD_decompressBegin(dctx), "");
+    if (dict && dictSize)
+        RETURN_ERROR_IF(ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
+                        dictionary_corrupted, "");
+    return 0;
+}
+
+
+/* ======   ZSTD_DDict   ====== */
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) {
+    DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
+    assert(dctx != NULL);
+    if (ddict)
+    {
+        const char* const dictStart = (const char*) ZSTD_DDict_dictContent(ddict);
+        size_t const      dictSize  = ZSTD_DDict_dictSize(ddict);
+        const void* const dictEnd   = dictStart + dictSize;
+        dctx->ddictIsCold           = (dctx->dictEnd != dictEnd);
+        DEBUGLOG(4, "DDict is %s", dctx->ddictIsCold ? "~cold~" : "hot!");
+    }
+    FORWARD_IF_ERROR(ZSTD_decompressBegin(dctx), "");
+    if (ddict)
+    { /* NULL ddict is equivalent to no dictionary */
+        ZSTD_copyDDictParameters(dctx, ddict);
+    }
+    return 0;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) {
+    if (dictSize < 8)
+        return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY)
+        return 0;
+    return MEM_readLE32((const char*) dict + ZSTD_FRAMEIDSIZE);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompress frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary (most common case).
+ *  - The frame was built with dictID intentionally removed.
+ *    Needed dictionary is a hidden piece of information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+ *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use
+ *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) {
+    ZSTD_frameHeader zfp    = {0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0};
+    size_t const     hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+    if (ZSTD_isError(hError))
+        return 0;
+    return zfp.dictID;
+}
+
+
+/*! ZSTD_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx*        dctx,
+                                  void*             dst,
+                                  size_t            dstCapacity,
+                                  const void*       src,
+                                  size_t            srcSize,
+                                  const ZSTD_DDict* ddict) {
+    /* pass content and size in case legacy frames are encountered */
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict);
+}
+
+
+/*=====================================
+*   Streaming decompression
+*====================================*/
+
+ZSTD_DStream* ZSTD_createDStream(void) {
+    DEBUGLOG(3, "ZSTD_createDStream");
+    return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize) {
+    return ZSTD_initStaticDCtx(workspace, workspaceSize);
+}
+
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) {
+    return ZSTD_createDCtx_internal(customMem);
+}
+
+size_t ZSTD_freeDStream(ZSTD_DStream* zds) { return ZSTD_freeDCtx(zds); }
+
+
+/* ***  Initialization  *** */
+
+size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx*             dctx,
+                                         const void*            dict,
+                                         size_t                 dictSize,
+                                         ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                         ZSTD_dictContentType_e dictContentType) {
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (dict && dictSize != 0)
+    {
+        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod,
+                                                     dictContentType, dctx->customMem);
+        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
+        dctx->ddict    = dctx->ddictLocal;
+        dctx->dictUses = ZSTD_use_indefinitely;
+    }
+    return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) {
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) {
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx*             dctx,
+                                    const void*            prefix,
+                                    size_t                 prefixSize,
+                                    ZSTD_dictContentType_e dictContentType) {
+    FORWARD_IF_ERROR(
+      ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType),
+      "");
+    dctx->dictUses = ZSTD_use_once;
+    return 0;
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) {
+    return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) {
+    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
+    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary(zds, dict, dictSize), "");
+    return ZSTD_startingInputLength(zds->format);
+}
+
+/* note : this variant can't fail */
+size_t ZSTD_initDStream(ZSTD_DStream* zds) {
+    DEBUGLOG(4, "ZSTD_initDStream");
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
+    FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), "");
+    return ZSTD_startingInputLength(zds->format);
+}
+
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) {
+    DEBUGLOG(4, "ZSTD_initDStream_usingDDict");
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+    FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(dctx, ddict), "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx) {
+    DEBUGLOG(4, "ZSTD_resetDStream");
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) {
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (ddict)
+    {
+        dctx->ddict    = ddict;
+        dctx->dictUses = ZSTD_use_indefinitely;
+        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts)
+        {
+            if (dctx->ddictSet == NULL)
+            {
+                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+                if (!dctx->ddictSet)
+                {
+                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+                }
+            }
+            assert(
+              !dctx
+                 ->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem),
+                             "");
+        }
+    }
+    return 0;
+}
+
+/* ZSTD_DCtx_setMaxWindowSize() :
+ * note : no direct equivalence in ZSTD_DCtx_setParameter,
+ * since this version sets windowSize, and the other sets windowLog */
+size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) {
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
+    size_t const      min    = (size_t) 1 << bounds.lowerBound;
+    size_t const      max    = (size_t) 1 << bounds.upperBound;
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
+    dctx->maxWindowSize = maxWindowSize;
+    return 0;
+}
+
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) {
+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int) format);
+}
+
+ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) {
+    ZSTD_bounds bounds = {0, 0, 0};
+    switch (dParam)
+    {
+    case ZSTD_d_windowLogMax :
+        bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
+        bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+        return bounds;
+    case ZSTD_d_format :
+        bounds.lowerBound = (int) ZSTD_f_zstd1;
+        bounds.upperBound = (int) ZSTD_f_zstd1_magicless;
+        ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+        return bounds;
+    case ZSTD_d_stableOutBuffer :
+        bounds.lowerBound = (int) ZSTD_bm_buffered;
+        bounds.upperBound = (int) ZSTD_bm_stable;
+        return bounds;
+    case ZSTD_d_forceIgnoreChecksum :
+        bounds.lowerBound = (int) ZSTD_d_validateChecksum;
+        bounds.upperBound = (int) ZSTD_d_ignoreChecksum;
+        return bounds;
+    case ZSTD_d_refMultipleDDicts :
+        bounds.lowerBound = (int) ZSTD_rmd_refSingleDDict;
+        bounds.upperBound = (int) ZSTD_rmd_refMultipleDDicts;
+        return bounds;
+    case ZSTD_d_disableHuffmanAssembly :
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+    case ZSTD_d_maxBlockSize :
+        bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
+        bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
+        return bounds;
+
+    default :;
+    }
+    bounds.error = ERROR(parameter_unsupported);
+    return bounds;
+}
+
+/* ZSTD_dParam_withinBounds:
+ * @return 1 if value is within dParam bounds,
+ * 0 otherwise */
+static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) {
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
+    if (ZSTD_isError(bounds.error))
+        return 0;
+    if (value < bounds.lowerBound)
+        return 0;
+    if (value > bounds.upperBound)
+        return 0;
+    return 1;
+}
+
+#define CHECK_DBOUNDS(p, v) \
+    { RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); }
+
+size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) {
+    switch (param)
+    {
+    case ZSTD_d_windowLogMax :
+        *value = (int) ZSTD_highbit32((U32) dctx->maxWindowSize);
+        return 0;
+    case ZSTD_d_format :
+        *value = (int) dctx->format;
+        return 0;
+    case ZSTD_d_stableOutBuffer :
+        *value = (int) dctx->outBufferMode;
+        return 0;
+    case ZSTD_d_forceIgnoreChecksum :
+        *value = (int) dctx->forceIgnoreChecksum;
+        return 0;
+    case ZSTD_d_refMultipleDDicts :
+        *value = (int) dctx->refMultipleDDicts;
+        return 0;
+    case ZSTD_d_disableHuffmanAssembly :
+        *value = (int) dctx->disableHufAsm;
+        return 0;
+    case ZSTD_d_maxBlockSize :
+        *value = dctx->maxBlockSizeParam;
+        return 0;
+    default :;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) {
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    switch (dParam)
+    {
+    case ZSTD_d_windowLogMax :
+        if (value == 0)
+            value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
+        CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
+        dctx->maxWindowSize = ((size_t) 1) << value;
+        return 0;
+    case ZSTD_d_format :
+        CHECK_DBOUNDS(ZSTD_d_format, value);
+        dctx->format = (ZSTD_format_e) value;
+        return 0;
+    case ZSTD_d_stableOutBuffer :
+        CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+        dctx->outBufferMode = (ZSTD_bufferMode_e) value;
+        return 0;
+    case ZSTD_d_forceIgnoreChecksum :
+        CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
+        dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e) value;
+        return 0;
+    case ZSTD_d_refMultipleDDicts :
+        CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
+        if (dctx->staticSize != 0)
+        {
+            RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
+        }
+        dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e) value;
+        return 0;
+    case ZSTD_d_disableHuffmanAssembly :
+        CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
+        dctx->disableHufAsm = value != 0;
+        return 0;
+    case ZSTD_d_maxBlockSize :
+        if (value != 0)
+            CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
+        dctx->maxBlockSizeParam = value;
+        return 0;
+    default :;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) {
+    if ((reset == ZSTD_reset_session_only) || (reset == ZSTD_reset_session_and_parameters))
+    {
+        dctx->streamStage          = zdss_init;
+        dctx->noForwardProgress    = 0;
+        dctx->isFrameDecompression = 1;
+    }
+    if ((reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters))
+    {
+        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+        ZSTD_clearDict(dctx);
+        ZSTD_DCtx_resetParameters(dctx);
+    }
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) { return ZSTD_sizeof_DCtx(dctx); }
+
+static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize,
+                                               unsigned long long frameContentSize,
+                                               size_t             blockSizeMax) {
+    size_t const blockSize = MIN((size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
+    /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
+     * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
+     * the block at the beginning of the output buffer, and maintain a full window.
+     *
+     * We need another blockSize worth of buffer so that we can store split
+     * literals at the end of the block without overwriting the extDict window.
+     */
+    unsigned long long const neededRBSize =
+      windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
+    unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
+    size_t const             minRBSize  = (size_t) neededSize;
+    RETURN_ERROR_IF((unsigned long long) minRBSize != neededSize, frameParameter_windowTooLarge,
+                    "");
+    return minRBSize;
+}
+
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize,
+                                   unsigned long long frameContentSize) {
+    return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize) {
+    size_t const blockSize   = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    size_t const inBuffSize  = blockSize; /* no block can be larger */
+    size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) {
+    U32 const windowSizeMax =
+      1U
+      << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
+    ZSTD_frameHeader zfh;
+    size_t const     err = ZSTD_getFrameHeader(&zfh, src, srcSize);
+    if (ZSTD_isError(err))
+        return err;
+    RETURN_ERROR_IF(err > 0, srcSize_wrong, "");
+    RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, frameParameter_windowTooLarge, "");
+    return ZSTD_estimateDStreamSize((size_t) zfh.windowSize);
+}
+
+
+/* *****   Decompression   ***** */
+
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds,
+                                size_t const  neededInBuffSize,
+                                size_t const  neededOutBuffSize) {
+    return (zds->inBuffSize + zds->outBuffSize)
+        >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds,
+                                              size_t const  neededInBuffSize,
+                                              size_t const  neededOutBuffSize) {
+    if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+        zds->oversizedDuration++;
+    else
+        zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) {
+    return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) {
+    ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+    /* No requirement when ZSTD_obm_stable is not enabled. */
+    if (zds->outBufferMode != ZSTD_bm_stable)
+        return 0;
+    /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+     * the context is reset.
+     */
+    if (zds->streamStage == zdss_init)
+        return 0;
+    /* The buffer must match our expectation exactly. */
+    if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+        return 0;
+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
+}
+
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+  ZSTD_DStream* zds, char** op, char* oend, void const* src, size_t srcSize) {
+    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+    if (zds->outBufferMode == ZSTD_bm_buffered)
+    {
+        size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+        size_t const decodedSize =
+          ZSTD_decompressContinue(zds, zds->outBuff + zds->outStart, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        if (!decodedSize && !isSkipFrame)
+        {
+            zds->streamStage = zdss_read;
+        }
+        else
+        {
+            zds->outEnd      = zds->outStart + decodedSize;
+            zds->streamStage = zdss_flush;
+        }
+    }
+    else
+    {
+        /* Write directly into the output buffer */
+        size_t const dstSize     = isSkipFrame ? 0 : (size_t) (oend - *op);
+        size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        *op += decodedSize;
+        /* Flushing is not needed. */
+        zds->streamStage = zdss_read;
+        assert(*op <= oend);
+        assert(zds->outBufferMode == ZSTD_bm_stable);
+    }
+    return 0;
+}
+
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) {
+    const char* const src          = (const char*) input->src;
+    const char* const istart       = input->pos != 0 ? src + input->pos : src;
+    const char* const iend         = input->size != 0 ? src + input->size : src;
+    const char*       ip           = istart;
+    char* const       dst          = (char*) output->dst;
+    char* const       ostart       = output->pos != 0 ? dst + output->pos : dst;
+    char* const       oend         = output->size != 0 ? dst + output->size : dst;
+    char*             op           = ostart;
+    U32               someMoreWork = 1;
+
+    DEBUGLOG(5, "ZSTD_decompressStream");
+    RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "forbidden. in: pos: %u   vs size: %u",
+                    (U32) input->pos, (U32) input->size);
+    RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall,
+                    "forbidden. out: pos: %u   vs size: %u", (U32) output->pos, (U32) output->size);
+    DEBUGLOG(5, "input size : %u", (U32) (input->size - input->pos));
+    FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
+
+    while (someMoreWork)
+    {
+        switch (zds->streamStage)
+        {
+        case zdss_init :
+            DEBUGLOG(5, "stage zdss_init => transparent reset ");
+            zds->streamStage = zdss_loadHeader;
+            zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+            zds->legacyVersion = 0;
+#endif
+            zds->hostageByte       = 0;
+            zds->expectedOutBuffer = *output;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_loadHeader :
+            DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32) (iend - ip));
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+            if (zds->legacyVersion)
+            {
+                RETURN_ERROR_IF(zds->staticSize, memory_allocation,
+                                "legacy support is incompatible with static dctx");
+                {
+                    size_t const hint = ZSTD_decompressLegacyStream(
+                      zds->legacyContext, zds->legacyVersion, output, input);
+                    if (hint == 0)
+                        zds->streamStage = zdss_init;
+                    return hint;
+                }
+            }
+#endif
+            {
+                size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer,
+                                                                  zds->lhSize, zds->format);
+                if (zds->refMultipleDDicts && zds->ddictSet)
+                {
+                    ZSTD_DCtx_selectFrameDDict(zds);
+                }
+                if (ZSTD_isError(hSize))
+                {
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+                    U32 const legacyVersion = ZSTD_isLegacy(istart, iend - istart);
+                    if (legacyVersion)
+                    {
+                        ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
+                        const void* const       dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
+                        size_t const            dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
+                        DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u",
+                                 legacyVersion);
+                        RETURN_ERROR_IF(zds->staticSize, memory_allocation,
+                                        "legacy support is incompatible with static dctx");
+                        FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
+                                                               zds->previousLegacyVersion,
+                                                               legacyVersion, dict, dictSize),
+                                         "");
+                        zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
+                        {
+                            size_t const hint = ZSTD_decompressLegacyStream(
+                              zds->legacyContext, legacyVersion, output, input);
+                            if (hint == 0)
+                                zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */
+                            return hint;
+                        }
+                    }
+#endif
+                    return hSize; /* error */
+                }
+                if (hSize != 0)
+                { /* need more input */
+                    size_t const toLoad =
+                      hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */
+                    size_t const remainingInput = (size_t) (iend - ip);
+                    assert(iend >= ip);
+                    if (toLoad > remainingInput)
+                    { /* not enough input to load full header */
+                        if (remainingInput > 0)
+                        {
+                            ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
+                            zds->lhSize += remainingInput;
+                        }
+                        input->pos = input->size;
+                        /* check first few bytes */
+                        FORWARD_IF_ERROR(ZSTD_getFrameHeader_advanced(&zds->fParams,
+                                                                      zds->headerBuffer,
+                                                                      zds->lhSize, zds->format),
+                                         "First few bytes detected incorrect");
+                        /* return hint input size */
+                        return (MAX((size_t) ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize)
+                                - zds->lhSize)
+                             + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
+                    }
+                    assert(ip != NULL);
+                    ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad);
+                    zds->lhSize = hSize;
+                    ip += toLoad;
+                    break;
+                }
+            }
+
+            /* check for single-pass mode opportunity */
+            if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && (U64) (size_t) (oend - op) >= zds->fParams.frameContentSize)
+            {
+                size_t const cSize = ZSTD_findFrameCompressedSize_advanced(
+                  istart, (size_t) (iend - istart), zds->format);
+                if (cSize <= (size_t) (iend - istart))
+                {
+                    /* shortcut : using single-pass mode */
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(
+                      zds, op, (size_t) (oend - op), istart, cSize, ZSTD_getDDict(zds));
+                    if (ZSTD_isError(decompressedSize))
+                        return decompressedSize;
+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
+                    assert(istart != NULL);
+                    ip               = istart + cSize;
+                    op               = op ? op + decompressedSize
+                                          : op; /* can occur if frameContentSize = 0 (empty frame) */
+                    zds->expected    = 0;
+                    zds->streamStage = zdss_init;
+                    someMoreWork     = 0;
+                    break;
+                }
+            }
+
+            /* Check output buffer is large enough for ZSTD_odm_stable. */
+            if (zds->outBufferMode == ZSTD_bm_stable
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && (U64) (size_t) (oend - op) < zds->fParams.frameContentSize)
+            {
+                RETURN_ERROR(dstSize_tooSmall,
+                             "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+            }
+
+            /* Consume header (see ZSTDds_decodeFrameHeader) */
+            DEBUGLOG(4, "Consume header");
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
+
+            if (zds->format == ZSTD_f_zstd1
+                && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK)
+                     == ZSTD_MAGIC_SKIPPABLE_START)
+            { /* skippable frame */
+                zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
+                zds->stage    = ZSTDds_skipFrame;
+            }
+            else
+            {
+                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
+                zds->expected = ZSTD_blockHeaderSize;
+                zds->stage    = ZSTDds_decodeBlockHeader;
+            }
+
+            /* control buffer memory usage */
+            DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
+                     (U32) (zds->fParams.windowSize >> 10), (U32) (zds->maxWindowSize >> 10));
+            zds->fParams.windowSize =
+              MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+            RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+                            frameParameter_windowTooLarge, "");
+            if (zds->maxBlockSizeParam != 0)
+                zds->fParams.blockSizeMax =
+                  MIN(zds->fParams.blockSizeMax, (unsigned) zds->maxBlockSizeParam);
+
+            /* Adapt buffer sizes to frame header instructions */
+            {
+                size_t const neededInBuffSize =
+                  MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
+                size_t const neededOutBuffSize =
+                  zds->outBufferMode == ZSTD_bm_buffered
+                    ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize,
+                                                       zds->fParams.frameContentSize,
+                                                       zds->fParams.blockSizeMax)
+                    : 0;
+
+                ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+                {
+                    int const tooSmall = (zds->inBuffSize < neededInBuffSize)
+                                      || (zds->outBuffSize < neededOutBuffSize);
+                    int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+
+                    if (tooSmall || tooLarge)
+                    {
+                        size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+                        DEBUGLOG(4, "inBuff  : from %u to %u", (U32) zds->inBuffSize,
+                                 (U32) neededInBuffSize);
+                        DEBUGLOG(4, "outBuff : from %u to %u", (U32) zds->outBuffSize,
+                                 (U32) neededOutBuffSize);
+                        if (zds->staticSize)
+                        { /* static DCtx */
+                            DEBUGLOG(4, "staticSize : %u", (U32) zds->staticSize);
+                            assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */
+                            RETURN_ERROR_IF(bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+                                            memory_allocation, "");
+                        }
+                        else
+                        {
+                            ZSTD_customFree(zds->inBuff, zds->customMem);
+                            zds->inBuffSize  = 0;
+                            zds->outBuffSize = 0;
+                            zds->inBuff = (char*) ZSTD_customMalloc(bufferSize, zds->customMem);
+                            RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+                        }
+                        zds->inBuffSize  = neededInBuffSize;
+                        zds->outBuff     = zds->inBuff + zds->inBuffSize;
+                        zds->outBuffSize = neededOutBuffSize;
+                    }
+                }
+            }
+            zds->streamStage = zdss_read;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_read :
+            DEBUGLOG(5, "stage zdss_read");
+            {
+                size_t const neededInSize =
+                  ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t) (iend - ip));
+                DEBUGLOG(5, "neededInSize = %u", (U32) neededInSize);
+                if (neededInSize == 0)
+                { /* end of frame */
+                    zds->streamStage = zdss_init;
+                    someMoreWork     = 0;
+                    break;
+                }
+                if ((size_t) (iend - ip) >= neededInSize)
+                { /* decode directly from src */
+                    FORWARD_IF_ERROR(
+                      ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
+                    assert(ip != NULL);
+                    ip += neededInSize;
+                    /* Function modifies the stage so we must break */
+                    break;
+                }
+            }
+            if (ip == iend)
+            {
+                someMoreWork = 0;
+                break;
+            } /* no more input */
+            zds->streamStage = zdss_load;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_load : {
+            size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+            size_t const toLoad       = neededInSize - zds->inPos;
+            int const    isSkipFrame  = ZSTD_isSkipFrame(zds);
+            size_t       loadedSize;
+            /* At this point we shouldn't be decompressing a block that we can stream. */
+            assert(neededInSize
+                   == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t) (iend - ip)));
+            if (isSkipFrame)
+            {
+                loadedSize = MIN(toLoad, (size_t) (iend - ip));
+            }
+            else
+            {
+                RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, corruption_detected,
+                                "should never happen");
+                loadedSize =
+                  ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t) (iend - ip));
+            }
+            if (loadedSize != 0)
+            {
+                /* ip may be NULL */
+                ip += loadedSize;
+                zds->inPos += loadedSize;
+            }
+            if (loadedSize < toLoad)
+            {
+                someMoreWork = 0;
+                break;
+            } /* not enough input, wait for more */
+
+            /* decode loaded input */
+            zds->inPos = 0; /* input is consumed */
+            FORWARD_IF_ERROR(
+              ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+            /* Function modifies the stage so we must break */
+            break;
+        }
+        case zdss_flush : {
+            size_t const toFlushSize = zds->outEnd - zds->outStart;
+            size_t const flushedSize =
+              ZSTD_limitCopy(op, (size_t) (oend - op), zds->outBuff + zds->outStart, toFlushSize);
+
+            op = op ? op + flushedSize : op;
+
+            zds->outStart += flushedSize;
+            if (flushedSize == toFlushSize)
+            { /* flush completed */
+                zds->streamStage = zdss_read;
+                if ((zds->outBuffSize < zds->fParams.frameContentSize)
+                    && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize))
+                {
+                    DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
+                             (int) (zds->outBuffSize - zds->outStart),
+                             (U32) zds->fParams.blockSizeMax);
+                    zds->outStart = zds->outEnd = 0;
+                }
+                break;
+            }
+        }
+            /* cannot complete flush */
+            someMoreWork = 0;
+            break;
+
+        default :
+            assert(0); /* impossible */
+            RETURN_ERROR(
+              GENERIC, "impossible to reach"); /* some compilers require default to do something */
+        }
+    }
+
+    /* result */
+    input->pos  = (size_t) (ip - (const char*) (input->src));
+    output->pos = (size_t) (op - (char*) (output->dst));
+
+    /* Update the expected output buffer for ZSTD_obm_stable. */
+    zds->expectedOutBuffer = *output;
+
+    if ((ip == istart) && (op == ostart))
+    { /* no forward progress */
+        zds->noForwardProgress++;
+        if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX)
+        {
+            RETURN_ERROR_IF(op == oend, noForwardProgress_destFull, "");
+            RETURN_ERROR_IF(ip == iend, noForwardProgress_inputEmpty, "");
+            assert(0);
+        }
+    }
+    else
+    {
+        zds->noForwardProgress = 0;
+    }
+    {
+        size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
+        if (!nextSrcSizeHint)
+        { /* frame fully decoded */
+            if (zds->outEnd == zds->outStart)
+            { /* output fully flushed */
+                if (zds->hostageByte)
+                {
+                    if (input->pos >= input->size)
+                    {
+                        /* can't release hostage (not present) */
+                        zds->streamStage = zdss_read;
+                        return 1;
+                    }
+                    input->pos++; /* release hostage */
+                } /* zds->hostageByte */
+                return 0;
+            } /* zds->outEnd == zds->outStart */
+            if (!zds->hostageByte)
+            { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
+                input
+                  ->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */
+                zds->hostageByte = 1;
+            }
+            return 1;
+        } /* nextSrcSizeHint==0 */
+        nextSrcSizeHint +=
+          ZSTD_blockHeaderSize
+          * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */
+        assert(zds->inPos <= nextSrcSizeHint);
+        nextSrcSizeHint -= zds->inPos; /* part already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+size_t ZSTD_decompressStream_simpleArgs(ZSTD_DCtx*  dctx,
+                                        void*       dst,
+                                        size_t      dstCapacity,
+                                        size_t*     dstPos,
+                                        const void* src,
+                                        size_t      srcSize,
+                                        size_t*     srcPos) {
+    ZSTD_outBuffer output;
+    ZSTD_inBuffer  input;
+    output.dst  = dst;
+    output.size = dstCapacity;
+    output.pos  = *dstPos;
+    input.src   = src;
+    input.size  = srcSize;
+    input.pos   = *srcPos;
+    {
+        size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
+        *dstPos           = output.pos;
+        *srcPos           = input.pos;
+        return cErr;
+    }
+}
diff --git a/src/external/decompress/zstd_decompress_block.cpp b/src/external/decompress/zstd_decompress_block.cpp
new file mode 100644
index 00000000..f86253b5
--- /dev/null
+++ b/src/external/decompress/zstd_decompress_block.cpp
@@ -0,0 +1,2636 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_decompress_block :
+ * this module takes care of decompressing _compressed_ block */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/compiler.h"  /* prefetch */
+#include "../common/cpu.h"       /* bmi2 */
+#include "../common/mem.h"       /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#include "../common/huf.h"
+#include "../common/zstd_internal.h"
+#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
+#include "zstd_ddict.h"               /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"
+#include "../common/bits.h" /* ZSTD_highbit32 */
+
+/*_*******************************************************
+*  Macros
+**********************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * ZSTD_decompressSequences implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+    #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
+#endif
+
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+ *   Block decoding
+ ***************************************************************/
+
+static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx) {
+    size_t const blockSizeMax =
+      dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
+    assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
+    return blockSizeMax;
+}
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) {
+    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
+
+    {
+        U32 const cBlockHeader = MEM_readLE24(src);
+        U32 const cSize        = cBlockHeader >> 3;
+        bpPtr->lastBlock       = cBlockHeader & 1;
+        bpPtr->blockType       = (blockType_e) ((cBlockHeader >> 1) & 3);
+        bpPtr->origSize        = cSize; /* only useful for RLE */
+        if (bpPtr->blockType == bt_rle)
+            return 1;
+        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
+        return cSize;
+    }
+}
+
+/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
+static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx*                dctx,
+                                        void* const               dst,
+                                        const size_t              dstCapacity,
+                                        const size_t              litSize,
+                                        const streaming_operation streaming,
+                                        const size_t              expectedWriteSize,
+                                        const unsigned            splitImmediately) {
+    size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
+    assert(litSize <= blockSizeMax);
+    assert(dctx->isFrameDecompression || streaming == not_streaming);
+    assert(expectedWriteSize <= blockSizeMax);
+    if (streaming == not_streaming
+        && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
+    {
+        /* If we aren't streaming, we can just put the literals after the output
+         * of the current block. We don't need to worry about overwriting the
+         * extDict of our window, because it doesn't exist.
+         * So if we have space after the end of the block, just put it there.
+         */
+        dctx->litBuffer         = (BYTE*) dst + blockSizeMax + WILDCOPY_OVERLENGTH;
+        dctx->litBufferEnd      = dctx->litBuffer + litSize;
+        dctx->litBufferLocation = ZSTD_in_dst;
+    }
+    else if (litSize <= ZSTD_LITBUFFEREXTRASIZE)
+    {
+        /* Literals fit entirely within the extra buffer, put them there to avoid
+         * having to split the literals.
+         */
+        dctx->litBuffer         = dctx->litExtraBuffer;
+        dctx->litBufferEnd      = dctx->litBuffer + litSize;
+        dctx->litBufferLocation = ZSTD_not_in_dst;
+    }
+    else
+    {
+        assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
+        /* Literals must be split between the output block and the extra lit
+         * buffer. We fill the extra lit buffer with the tail of the literals,
+         * and put the rest of the literals at the end of the block, with
+         * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
+         * This MUST not write more than our maxBlockSize beyond dst, because in
+         * streaming mode, that could overwrite part of our extDict window.
+         */
+        if (splitImmediately)
+        {
+            /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
+            dctx->litBuffer = (BYTE*) dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE
+                            - WILDCOPY_OVERLENGTH;
+            dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
+        }
+        else
+        {
+            /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
+            dctx->litBuffer    = (BYTE*) dst + expectedWriteSize - litSize;
+            dctx->litBufferEnd = (BYTE*) dst + expectedWriteSize;
+        }
+        dctx->litBufferLocation = ZSTD_split;
+        assert(dctx->litBufferEnd <= (BYTE*) dst + expectedWriteSize);
+    }
+}
+
+/*! ZSTD_decodeLiteralsBlock() :
+ * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
+ * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
+ * block will be output.  Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
+ * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
+ *
+ * @return : nb of bytes read from src (< srcSize )
+ *  note : symbol not declared but exposed for fullbench */
+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx*  dctx,
+                                       const void* src,
+                                       size_t      srcSize, /* note : srcSize < BLOCKSIZE */
+                                       void*       dst,
+                                       size_t      dstCapacity,
+                                       const streaming_operation streaming) {
+    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
+
+    {
+        const BYTE* const          istart       = (const BYTE*) src;
+        symbolEncodingType_e const litEncType   = (symbolEncodingType_e) (istart[0] & 3);
+        size_t const               blockSizeMax = ZSTD_blockSizeMax(dctx);
+
+        switch (litEncType)
+        {
+        case set_repeat :
+            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+            RETURN_ERROR_IF(dctx->litEntropy == 0, dictionary_corrupted, "");
+            ZSTD_FALLTHROUGH;
+
+        case set_compressed :
+            RETURN_ERROR_IF(srcSize < 5, corruption_detected,
+                            "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
+            {
+                size_t    lhSize, litSize, litCSize;
+                U32       singleStream = 0;
+                U32 const lhlCode      = (istart[0] >> 2) & 3;
+                U32 const lhc          = MEM_readLE32(istart);
+                size_t    hufSuccess;
+                size_t    expectedWriteSize = MIN(blockSizeMax, dstCapacity);
+                int const flags             = 0 | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
+                                | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
+                switch (lhlCode)
+                {
+                case 0 :
+                case 1 :
+                default : /* note : default is impossible, since lhlCode into [0..3] */
+                    /* 2 - 2 - 10 - 10 */
+                    singleStream = !lhlCode;
+                    lhSize       = 3;
+                    litSize      = (lhc >> 4) & 0x3FF;
+                    litCSize     = (lhc >> 14) & 0x3FF;
+                    break;
+                case 2 :
+                    /* 2 - 2 - 14 - 14 */
+                    lhSize   = 4;
+                    litSize  = (lhc >> 4) & 0x3FFF;
+                    litCSize = lhc >> 18;
+                    break;
+                case 3 :
+                    /* 2 - 2 - 18 - 18 */
+                    lhSize   = 5;
+                    litSize  = (lhc >> 4) & 0x3FFFF;
+                    litCSize = (lhc >> 22) + ((size_t) istart[4] << 10);
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
+                if (!singleStream)
+                    RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
+                                    "Not enough literals (%zu) for the 4-streams mode (min %u)",
+                                    litSize, MIN_LITERALS_FOR_4_STREAMS);
+                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+                RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+                ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming,
+                                            expectedWriteSize, 0);
+
+                /* prefetch huffman table if cold */
+                if (dctx->ddictIsCold && (litSize > 768 /* heuristic */))
+                {
+                    PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
+                }
+
+                if (litEncType == set_repeat)
+                {
+                    if (singleStream)
+                    {
+                        hufSuccess = HUF_decompress1X_usingDTable(
+                          dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr, flags);
+                    }
+                    else
+                    {
+                        assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
+                        hufSuccess = HUF_decompress4X_usingDTable(
+                          dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr, flags);
+                    }
+                }
+                else
+                {
+                    if (singleStream)
+                    {
+#if defined(HUF_FORCE_DECOMPRESS_X2)
+                        hufSuccess = HUF_decompress1X_DCtx_wksp(
+                          dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize,
+                          litCSize, dctx->workspace, sizeof(dctx->workspace), flags);
+#else
+                        hufSuccess = HUF_decompress1X1_DCtx_wksp(
+                          dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize,
+                          litCSize, dctx->workspace, sizeof(dctx->workspace), flags);
+#endif
+                    }
+                    else
+                    {
+                        hufSuccess = HUF_decompress4X_hufOnly_wksp(
+                          dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize,
+                          litCSize, dctx->workspace, sizeof(dctx->workspace), flags);
+                    }
+                }
+                if (dctx->litBufferLocation == ZSTD_split)
+                {
+                    assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
+                    ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE,
+                                ZSTD_LITBUFFEREXTRASIZE);
+                    ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH,
+                                 dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
+                    dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
+                    dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
+                    assert(dctx->litBufferEnd <= (BYTE*) dst + blockSizeMax);
+                }
+
+                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
+
+                dctx->litPtr     = dctx->litBuffer;
+                dctx->litSize    = litSize;
+                dctx->litEntropy = 1;
+                if (litEncType == set_compressed)
+                    dctx->HUFptr = dctx->entropy.hufTable;
+                return litCSize + lhSize;
+            }
+
+        case set_basic : {
+            size_t    litSize, lhSize;
+            U32 const lhlCode           = ((istart[0]) >> 2) & 3;
+            size_t    expectedWriteSize = MIN(blockSizeMax, dstCapacity);
+            switch (lhlCode)
+            {
+            case 0 :
+            case 2 :
+            default : /* note : default is impossible, since lhlCode into [0..3] */
+                lhSize  = 1;
+                litSize = istart[0] >> 3;
+                break;
+            case 1 :
+                lhSize  = 2;
+                litSize = MEM_readLE16(istart) >> 4;
+                break;
+            case 3 :
+                lhSize = 3;
+                RETURN_ERROR_IF(srcSize < 3, corruption_detected,
+                                "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
+                litSize = MEM_readLE24(istart) >> 4;
+                break;
+            }
+
+            RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+            RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
+            RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+            ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming,
+                                        expectedWriteSize, 1);
+            if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize)
+            { /* risk reading beyond src buffer with wildcopy */
+                RETURN_ERROR_IF(litSize + lhSize > srcSize, corruption_detected, "");
+                if (dctx->litBufferLocation == ZSTD_split)
+                {
+                    ZSTD_memcpy(dctx->litBuffer, istart + lhSize,
+                                litSize - ZSTD_LITBUFFEREXTRASIZE);
+                    ZSTD_memcpy(dctx->litExtraBuffer,
+                                istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE,
+                                ZSTD_LITBUFFEREXTRASIZE);
+                }
+                else
+                {
+                    ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
+                }
+                dctx->litPtr  = dctx->litBuffer;
+                dctx->litSize = litSize;
+                return lhSize + litSize;
+            }
+            /* direct reference into compressed stream */
+            dctx->litPtr            = istart + lhSize;
+            dctx->litSize           = litSize;
+            dctx->litBufferEnd      = dctx->litPtr + litSize;
+            dctx->litBufferLocation = ZSTD_not_in_dst;
+            return lhSize + litSize;
+        }
+
+        case set_rle : {
+            U32 const lhlCode = ((istart[0]) >> 2) & 3;
+            size_t    litSize, lhSize;
+            size_t    expectedWriteSize = MIN(blockSizeMax, dstCapacity);
+            switch (lhlCode)
+            {
+            case 0 :
+            case 2 :
+            default : /* note : default is impossible, since lhlCode into [0..3] */
+                lhSize  = 1;
+                litSize = istart[0] >> 3;
+                break;
+            case 1 :
+                lhSize = 2;
+                RETURN_ERROR_IF(srcSize < 3, corruption_detected,
+                                "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
+                litSize = MEM_readLE16(istart) >> 4;
+                break;
+            case 3 :
+                lhSize = 3;
+                RETURN_ERROR_IF(srcSize < 4, corruption_detected,
+                                "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
+                litSize = MEM_readLE24(istart) >> 4;
+                break;
+            }
+            RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+            RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
+            RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+            ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming,
+                                        expectedWriteSize, 1);
+            if (dctx->litBufferLocation == ZSTD_split)
+            {
+                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
+                ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
+            }
+            else
+            {
+                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
+            }
+            dctx->litPtr  = dctx->litBuffer;
+            dctx->litSize = litSize;
+            return lhSize + 1;
+        }
+        default :
+            RETURN_ERROR(corruption_detected, "impossible");
+        }
+    }
+}
+
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock_wrapper(
+  ZSTD_DCtx* dctx, const void* src, size_t srcSize, void* dst, size_t dstCapacity);
+size_t ZSTD_decodeLiteralsBlock_wrapper(
+  ZSTD_DCtx* dctx, const void* src, size_t srcSize, void* dst, size_t dstCapacity) {
+    dctx->isFrameDecompression = 0;
+    return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
+}
+
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - prettify output, report below, test with fuzzer to ensure it's correct */
+
+/* Default FSE distribution table for Literal Lengths */
+static const ZSTD_seqSymbol LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = {
+  {1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+  /* nextState, nbAddBits, nbBits, baseVal */
+  {0, 0, 4, 0},
+  {16, 0, 4, 0},
+  {32, 0, 5, 1},
+  {0, 0, 5, 3},
+  {0, 0, 5, 4},
+  {0, 0, 5, 6},
+  {0, 0, 5, 7},
+  {0, 0, 5, 9},
+  {0, 0, 5, 10},
+  {0, 0, 5, 12},
+  {0, 0, 6, 14},
+  {0, 1, 5, 16},
+  {0, 1, 5, 20},
+  {0, 1, 5, 22},
+  {0, 2, 5, 28},
+  {0, 3, 5, 32},
+  {0, 4, 5, 48},
+  {32, 6, 5, 64},
+  {0, 7, 5, 128},
+  {0, 8, 6, 256},
+  {0, 10, 6, 1024},
+  {0, 12, 6, 4096},
+  {32, 0, 4, 0},
+  {0, 0, 4, 1},
+  {0, 0, 5, 2},
+  {32, 0, 5, 4},
+  {0, 0, 5, 5},
+  {32, 0, 5, 7},
+  {0, 0, 5, 8},
+  {32, 0, 5, 10},
+  {0, 0, 5, 11},
+  {0, 0, 6, 13},
+  {32, 1, 5, 16},
+  {0, 1, 5, 18},
+  {32, 1, 5, 22},
+  {0, 2, 5, 24},
+  {32, 3, 5, 32},
+  {0, 3, 5, 40},
+  {0, 6, 4, 64},
+  {16, 6, 4, 64},
+  {32, 7, 5, 128},
+  {0, 9, 6, 512},
+  {0, 11, 6, 2048},
+  {48, 0, 4, 0},
+  {16, 0, 4, 1},
+  {32, 0, 5, 2},
+  {32, 0, 5, 3},
+  {32, 0, 5, 5},
+  {32, 0, 5, 6},
+  {32, 0, 5, 8},
+  {32, 0, 5, 9},
+  {32, 0, 5, 11},
+  {32, 0, 5, 12},
+  {0, 0, 6, 15},
+  {32, 1, 5, 18},
+  {32, 1, 5, 20},
+  {32, 2, 5, 24},
+  {32, 2, 5, 28},
+  {32, 3, 5, 40},
+  {32, 4, 5, 48},
+  {0, 16, 6, 65536},
+  {0, 15, 6, 32768},
+  {0, 14, 6, 16384},
+  {0, 13, 6, 8192},
+}; /* LL_defaultDTable */
+
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = {
+  {1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+  /* nextState, nbAddBits, nbBits, baseVal */
+  {0, 0, 5, 0},
+  {0, 6, 4, 61},
+  {0, 9, 5, 509},
+  {0, 15, 5, 32765},
+  {0, 21, 5, 2097149},
+  {0, 3, 5, 5},
+  {0, 7, 4, 125},
+  {0, 12, 5, 4093},
+  {0, 18, 5, 262141},
+  {0, 23, 5, 8388605},
+  {0, 5, 5, 29},
+  {0, 8, 4, 253},
+  {0, 14, 5, 16381},
+  {0, 20, 5, 1048573},
+  {0, 2, 5, 1},
+  {16, 7, 4, 125},
+  {0, 11, 5, 2045},
+  {0, 17, 5, 131069},
+  {0, 22, 5, 4194301},
+  {0, 4, 5, 13},
+  {16, 8, 4, 253},
+  {0, 13, 5, 8189},
+  {0, 19, 5, 524285},
+  {0, 1, 5, 1},
+  {16, 6, 4, 61},
+  {0, 10, 5, 1021},
+  {0, 16, 5, 65533},
+  {0, 28, 5, 268435453},
+  {0, 27, 5, 134217725},
+  {0, 26, 5, 67108861},
+  {0, 25, 5, 33554429},
+  {0, 24, 5, 16777213},
+}; /* OF_defaultDTable */
+
+
+/* Default FSE distribution table for Match Lengths */
+static const ZSTD_seqSymbol ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = {
+  {1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+  /* nextState, nbAddBits, nbBits, baseVal */
+  {0, 0, 6, 3},
+  {0, 0, 4, 4},
+  {32, 0, 5, 5},
+  {0, 0, 5, 6},
+  {0, 0, 5, 8},
+  {0, 0, 5, 9},
+  {0, 0, 5, 11},
+  {0, 0, 6, 13},
+  {0, 0, 6, 16},
+  {0, 0, 6, 19},
+  {0, 0, 6, 22},
+  {0, 0, 6, 25},
+  {0, 0, 6, 28},
+  {0, 0, 6, 31},
+  {0, 0, 6, 34},
+  {0, 1, 6, 37},
+  {0, 1, 6, 41},
+  {0, 2, 6, 47},
+  {0, 3, 6, 59},
+  {0, 4, 6, 83},
+  {0, 7, 6, 131},
+  {0, 9, 6, 515},
+  {16, 0, 4, 4},
+  {0, 0, 4, 5},
+  {32, 0, 5, 6},
+  {0, 0, 5, 7},
+  {32, 0, 5, 9},
+  {0, 0, 5, 10},
+  {0, 0, 6, 12},
+  {0, 0, 6, 15},
+  {0, 0, 6, 18},
+  {0, 0, 6, 21},
+  {0, 0, 6, 24},
+  {0, 0, 6, 27},
+  {0, 0, 6, 30},
+  {0, 0, 6, 33},
+  {0, 1, 6, 35},
+  {0, 1, 6, 39},
+  {0, 2, 6, 43},
+  {0, 3, 6, 51},
+  {0, 4, 6, 67},
+  {0, 5, 6, 99},
+  {0, 8, 6, 259},
+  {32, 0, 4, 4},
+  {48, 0, 4, 4},
+  {16, 0, 4, 5},
+  {32, 0, 5, 7},
+  {32, 0, 5, 8},
+  {32, 0, 5, 10},
+  {32, 0, 5, 11},
+  {0, 0, 6, 14},
+  {0, 0, 6, 17},
+  {0, 0, 6, 20},
+  {0, 0, 6, 23},
+  {0, 0, 6, 26},
+  {0, 0, 6, 29},
+  {0, 0, 6, 32},
+  {0, 16, 6, 65539},
+  {0, 15, 6, 32771},
+  {0, 14, 6, 16387},
+  {0, 13, 6, 8195},
+  {0, 12, 6, 4099},
+  {0, 11, 6, 2051},
+  {0, 10, 6, 1027},
+}; /* ML_defaultDTable */
+
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits) {
+    void*                        ptr     = dt;
+    ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*) ptr;
+    ZSTD_seqSymbol* const        cell    = dt + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->nbBits    = 0;
+    cell->nextState = 0;
+    assert(nbAddBits < 255);
+    cell->nbAdditionalBits = nbAddBits;
+    cell->baseValue        = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * cannot fail if input is valid =>
+ * all inputs are presumed validated at this stage */
+FORCE_INLINE_TEMPLATE
+void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+                             const short*    normalizedCounter,
+                             unsigned        maxSymbolValue,
+                             const U32*      baseValue,
+                             const U8*       nbAdditionalBits,
+                             unsigned        tableLog,
+                             void*           wksp,
+                             size_t          wkspSize) {
+    ZSTD_seqSymbol* const tableDecode = dt + 1;
+    U32 const             maxSV1      = maxSymbolValue + 1;
+    U32 const             tableSize   = 1 << tableLog;
+
+    U16*  symbolNext    = (U16*) wksp;
+    BYTE* spread        = (BYTE*) (symbolNext + MaxSeq + 1);
+    U32   highThreshold = tableSize - 1;
+
+
+    /* Sanity Checks */
+    assert(maxSymbolValue <= MaxSeq);
+    assert(tableLog <= MaxFSELog);
+    assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
+    (void) wkspSize;
+    /* Init, lay down lowprob symbols */
+    {
+        ZSTD_seqSymbol_header DTableH;
+        DTableH.tableLog = tableLog;
+        DTableH.fastMode = 1;
+        {
+            S16 const largeLimit = (S16) (1 << (tableLog - 1));
+            U32       s;
+            for (s = 0; s < maxSV1; s++)
+            {
+                if (normalizedCounter[s] == -1)
+                {
+                    tableDecode[highThreshold--].baseValue = s;
+                    symbolNext[s]                          = 1;
+                }
+                else
+                {
+                    if (normalizedCounter[s] >= largeLimit)
+                        DTableH.fastMode = 0;
+                    assert(normalizedCounter[s] >= 0);
+                    symbolNext[s] = (U16) normalizedCounter[s];
+                }
+            }
+        }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    assert(tableSize <= 512);
+    /* Specialized symbol spreading for the case when there are
+     * no low probability (-1 count) symbols. When compressing
+     * small blocks we avoid low probability symbols to hit this
+     * case, since header decoding speed matters more.
+     */
+    if (highThreshold == tableSize - 1)
+    {
+        size_t const tableMask = tableSize - 1;
+        size_t const step      = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t    pos = 0;
+            U64       sv  = 0;
+            U32       s;
+            for (s = 0; s < maxSV1; ++s, sv += add)
+            {
+                int       i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8)
+                {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                assert(n >= 0);
+                pos += (size_t) n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what empirically worked best.
+         */
+        {
+            size_t       position = 0;
+            size_t       s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t) tableSize; s += unroll)
+            {
+                size_t u;
+                for (u = 0; u < unroll; ++u)
+                {
+                    size_t const uPosition           = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].baseValue = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    }
+    else
+    {
+        U32 const tableMask = tableSize - 1;
+        U32 const step      = FSE_TABLESTEP(tableSize);
+        U32       s, position = 0;
+        for (s = 0; s < maxSV1; s++)
+        {
+            int       i;
+            int const n = normalizedCounter[s];
+            for (i = 0; i < n; i++)
+            {
+                tableDecode[position].baseValue = s;
+                position                        = (position + step) & tableMask;
+                while (UNLIKELY(position > highThreshold))
+                    position = (position + step) & tableMask; /* lowprob area */
+            }
+        }
+        assert(
+          position
+          == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {
+        U32 u;
+        for (u = 0; u < tableSize; u++)
+        {
+            U32 const symbol         = tableDecode[u].baseValue;
+            U32 const nextState      = symbolNext[symbol]++;
+            tableDecode[u].nbBits    = (BYTE) (tableLog - ZSTD_highbit32(nextState));
+            tableDecode[u].nextState = (U16) ((nextState << tableDecode[u].nbBits) - tableSize);
+            assert(nbAdditionalBits[symbol] < 255);
+            tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
+            tableDecode[u].baseValue        = baseValue[symbol];
+        }
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
+                                            const short*    normalizedCounter,
+                                            unsigned        maxSymbolValue,
+                                            const U32*      baseValue,
+                                            const U8*       nbAdditionalBits,
+                                            unsigned        tableLog,
+                                            void*           wksp,
+                                            size_t          wkspSize) {
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, baseValue, nbAdditionalBits,
+                            tableLog, wksp, wkspSize);
+}
+
+#if DYNAMIC_BMI2
+BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
+                                                               const short*    normalizedCounter,
+                                                               unsigned        maxSymbolValue,
+                                                               const U32*      baseValue,
+                                                               const U8*       nbAdditionalBits,
+                                                               unsigned        tableLog,
+                                                               void*           wksp,
+                                                               size_t          wkspSize) {
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, baseValue, nbAdditionalBits,
+                            tableLog, wksp, wkspSize);
+}
+#endif
+
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+                        const short*    normalizedCounter,
+                        unsigned        maxSymbolValue,
+                        const U32*      baseValue,
+                        const U8*       nbAdditionalBits,
+                        unsigned        tableLog,
+                        void*           wksp,
+                        size_t          wkspSize,
+                        int             bmi2) {
+#if DYNAMIC_BMI2
+    if (bmi2)
+    {
+        ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue, baseValue,
+                                     nbAdditionalBits, tableLog, wksp, wkspSize);
+        return;
+    }
+#endif
+    (void) bmi2;
+    ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue, baseValue,
+                                    nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+
+/*! ZSTD_buildSeqTable() :
+ * @return : nb bytes read from src,
+ *           or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol*        DTableSpace,
+                                 const ZSTD_seqSymbol** DTablePtr,
+                                 symbolEncodingType_e   type,
+                                 unsigned               max,
+                                 U32                    maxLog,
+                                 const void*            src,
+                                 size_t                 srcSize,
+                                 const U32*             baseValue,
+                                 const U8*              nbAdditionalBits,
+                                 const ZSTD_seqSymbol*  defaultTable,
+                                 U32                    flagRepeatTable,
+                                 int                    ddictIsCold,
+                                 int                    nbSeq,
+                                 U32*                   wksp,
+                                 size_t                 wkspSize,
+                                 int                    bmi2) {
+    switch (type)
+    {
+    case set_rle :
+        RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+        RETURN_ERROR_IF((*(const BYTE*) src) > max, corruption_detected, "");
+        {
+            U32 const symbol   = *(const BYTE*) src;
+            U32 const baseline = baseValue[symbol];
+            U8 const  nbBits   = nbAdditionalBits[symbol];
+            ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+        }
+        *DTablePtr = DTableSpace;
+        return 1;
+    case set_basic :
+        *DTablePtr = defaultTable;
+        return 0;
+    case set_repeat :
+        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
+        /* prefetch FSE table if used */
+        if (ddictIsCold && (nbSeq > 24 /* heuristic */))
+        {
+            const void* const pStart = *DTablePtr;
+            size_t const      pSize  = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
+            PREFETCH_AREA(pStart, pSize);
+        }
+        return 0;
+    case set_compressed : {
+        unsigned     tableLog;
+        S16          norm[MaxSeq + 1];
+        size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+        RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+        RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
+        ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp,
+                           wkspSize, bmi2);
+        *DTablePtr = DTableSpace;
+        return headerSize;
+    }
+    default :
+        assert(0);
+        RETURN_ERROR(GENERIC, "impossible");
+    }
+}
+
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize) {
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* const iend   = istart + srcSize;
+    const BYTE*       ip     = istart;
+    int               nbSeq;
+    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
+
+    /* check */
+    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
+
+    /* SeqHead */
+    nbSeq = *ip++;
+    if (nbSeq > 0x7F)
+    {
+        if (nbSeq == 0xFF)
+        {
+            RETURN_ERROR_IF(ip + 2 > iend, srcSize_wrong, "");
+            nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
+            ip += 2;
+        }
+        else
+        {
+            RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
+            nbSeq = ((nbSeq - 0x80) << 8) + *ip++;
+        }
+    }
+    *nbSeqPtr = nbSeq;
+
+    if (nbSeq == 0)
+    {
+        /* No sequence : section ends immediately */
+        RETURN_ERROR_IF(ip != iend, corruption_detected,
+                        "extraneous data present in the Sequences section");
+        return (size_t) (ip - istart);
+    }
+
+    /* FSE table descriptors */
+    RETURN_ERROR_IF(ip + 1 > iend, srcSize_wrong,
+                    ""); /* minimum possible size: 1 byte for symbol encoding types */
+    RETURN_ERROR_IF(*ip & 3, corruption_detected,
+                    ""); /* The last field, Reserved, must be all-zeroes. */
+    {
+        symbolEncodingType_e const LLtype = (symbolEncodingType_e) (*ip >> 6);
+        symbolEncodingType_e const OFtype = (symbolEncodingType_e) ((*ip >> 4) & 3);
+        symbolEncodingType_e const MLtype = (symbolEncodingType_e) ((*ip >> 2) & 3);
+        ip++;
+
+        /* Build DTables */
+        {
+            size_t const llhSize = ZSTD_buildSeqTable(
+              dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip, LL_base,
+              LL_bits, LL_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq,
+              dctx->workspace, sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
+            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected,
+                            "ZSTD_buildSeqTable failed");
+            ip += llhSize;
+        }
+
+        {
+            size_t const ofhSize = ZSTD_buildSeqTable(
+              dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip,
+              OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq,
+              dctx->workspace, sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
+            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected,
+                            "ZSTD_buildSeqTable failed");
+            ip += ofhSize;
+        }
+
+        {
+            size_t const mlhSize = ZSTD_buildSeqTable(
+              dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip, ML_base,
+              ML_bits, ML_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq,
+              dctx->workspace, sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
+            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected,
+                            "ZSTD_buildSeqTable failed");
+            ip += mlhSize;
+        }
+    }
+
+    return ip - istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+} seq_t;
+
+typedef struct {
+    size_t                state;
+    const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    ZSTD_fseState stateLL;
+    ZSTD_fseState stateOffb;
+    ZSTD_fseState stateML;
+    size_t        prevOffset[ZSTD_REP_NUM];
+} seqState_t;
+
+/*! ZSTD_overlapCopy8() :
+ *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ *  If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ *  Precondition: *ip <= *op
+ *  Postcondition: *op - *op >= 8
+ */
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+    assert(*ip <= *op);
+    if (offset < 8)
+    {
+        /* close range match, overlap */
+        static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
+        static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
+        int const        sub2         = dec64table[offset];
+        (*op)[0]                      = (*ip)[0];
+        (*op)[1]                      = (*ip)[1];
+        (*op)[2]                      = (*ip)[2];
+        (*op)[3]                      = (*ip)[3];
+        *ip += dec32table[offset];
+        ZSTD_copy4(*op + 4, *ip);
+        *ip -= sub2;
+    }
+    else
+    {
+        ZSTD_copy8(*op, *ip);
+    }
+    *ip += 8;
+    *op += 8;
+    assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ *           The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(
+  BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const     oend = op + length;
+
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w))
+           || (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+    if (length < 8)
+    {
+        /* Handle short lengths. */
+        while (op < oend)
+            *op++ = *ip++;
+        return;
+    }
+    if (ovtype == ZSTD_overlap_src_before_dst)
+    {
+        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+        assert(length >= 8);
+        ZSTD_overlapCopy8(&op, &ip, diff);
+        length -= 8;
+        assert(op - ip >= 8);
+        assert(op <= oend);
+    }
+
+    if (oend <= oend_w)
+    {
+        /* No risk of overwrite. */
+        ZSTD_wildcopy(op, ip, length, ovtype);
+        return;
+    }
+    if (op <= oend_w)
+    {
+        /* Wildcopy until we get close to the end. */
+        assert(oend > oend_w);
+        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+        ip += oend_w - op;
+        op += oend_w - op;
+    }
+    /* Handle the leftovers. */
+    while (op < oend)
+        *op++ = *ip++;
+}
+
+/* ZSTD_safecopyDstBeforeSrc():
+ * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
+ * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
+static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const     oend = op + length;
+
+    if (length < 8 || diff > -8)
+    {
+        /* Handle short lengths, close overlaps, and dst not before src. */
+        while (op < oend)
+            *op++ = *ip++;
+        return;
+    }
+
+    if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN)
+    {
+        ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
+        ip += oend - WILDCOPY_OVERLENGTH - op;
+        op += oend - WILDCOPY_OVERLENGTH - op;
+    }
+
+    /* Handle the leftovers. */
+    while (op < oend)
+        *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
+FORCE_NOINLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_execSequenceEnd(BYTE*             op,
+                            BYTE* const       oend,
+                            seq_t             sequence,
+                            const BYTE**      litPtr,
+                            const BYTE* const litLimit,
+                            const BYTE* const prefixStart,
+                            const BYTE* const virtualStart,
+                            const BYTE* const dictEnd) {
+    BYTE* const       oLitEnd        = op + sequence.litLength;
+    size_t const      sequenceLength = sequence.litLength + sequence.matchLength;
+    const BYTE* const iLitEnd        = *litPtr + sequence.litLength;
+    const BYTE*       match          = oLitEnd - sequence.offset;
+    BYTE* const       oend_w         = oend - WILDCOPY_OVERLENGTH;
+
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t) (oend - op), dstSize_tooSmall,
+                    "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t) (litLimit - *litPtr), corruption_detected,
+                    "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
+
+    /* copy literals */
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+    op      = oLitEnd;
+    *litPtr = iLitEnd;
+
+    /* copy Match */
+    if (sequence.offset > (size_t) (oLitEnd - prefixStart))
+    {
+        /* offset beyond prefix */
+        RETURN_ERROR_IF(sequence.offset > (size_t) (oLitEnd - virtualStart), corruption_detected,
+                        "");
+        match = dictEnd - (prefixStart - match);
+        if (match + sequence.matchLength <= dictEnd)
+        {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+        }
+    }
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+    return sequenceLength;
+}
+
+/* ZSTD_execSequenceEndSplitLitBuffer():
+ * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
+ */
+FORCE_NOINLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE*             op,
+                                          BYTE* const       oend,
+                                          const BYTE* const oend_w,
+                                          seq_t             sequence,
+                                          const BYTE**      litPtr,
+                                          const BYTE* const litLimit,
+                                          const BYTE* const prefixStart,
+                                          const BYTE* const virtualStart,
+                                          const BYTE* const dictEnd) {
+    BYTE* const       oLitEnd        = op + sequence.litLength;
+    size_t const      sequenceLength = sequence.litLength + sequence.matchLength;
+    const BYTE* const iLitEnd        = *litPtr + sequence.litLength;
+    const BYTE*       match          = oLitEnd - sequence.offset;
+
+
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t) (oend - op), dstSize_tooSmall,
+                    "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t) (litLimit - *litPtr), corruption_detected,
+                    "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
+
+    /* copy literals */
+    RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall,
+                    "output should not catch up to and overwrite literal buffer");
+    ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
+    op      = oLitEnd;
+    *litPtr = iLitEnd;
+
+    /* copy Match */
+    if (sequence.offset > (size_t) (oLitEnd - prefixStart))
+    {
+        /* offset beyond prefix */
+        RETURN_ERROR_IF(sequence.offset > (size_t) (oLitEnd - virtualStart), corruption_detected,
+                        "");
+        match = dictEnd - (prefixStart - match);
+        if (match + sequence.matchLength <= dictEnd)
+        {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+        }
+    }
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+    return sequenceLength;
+}
+
+HINT_INLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_execSequence(BYTE*             op,
+                         BYTE* const       oend,
+                         seq_t             sequence,
+                         const BYTE**      litPtr,
+                         const BYTE* const litLimit,
+                         const BYTE* const prefixStart,
+                         const BYTE* const virtualStart,
+                         const BYTE* const dictEnd) {
+    BYTE* const  oLitEnd        = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const  oMatchEnd      = op + sequenceLength; /* risk : address space overflow (32-bits) */
+    BYTE* const  oend_w =
+      oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE*       match   = oLitEnd - sequence.offset;
+
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+
+#if defined(__aarch64__)
+    /* prefetch sequence starting from match that will be used for copy later */
+    PREFETCH_L1(match);
+#endif
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(iLitEnd > litLimit || oMatchEnd > oend_w
+                 || (MEM_32bits() && (size_t) (oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart,
+                                    dictEnd);
+
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (UNLIKELY(sequence.litLength > 16))
+    {
+        ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
+    }
+    op      = oLitEnd;
+    *litPtr = iLitEnd; /* update for next sequence */
+
+    /* Copy Match */
+    if (sequence.offset > (size_t) (oLitEnd - prefixStart))
+    {
+        /* offset beyond prefix -> go into extDict */
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t) (oLitEnd - virtualStart)),
+                        corruption_detected, "");
+        match = dictEnd + (match - prefixStart);
+        if (match + sequence.matchLength <= dictEnd)
+        {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+        }
+    }
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);
+
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN))
+    {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t) sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
+    }
+    assert(sequence.offset < WILDCOPY_VECLEN);
+
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8)
+    {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t) sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
+    }
+    return sequenceLength;
+}
+
+HINT_INLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_execSequenceSplitLitBuffer(BYTE*             op,
+                                       BYTE* const       oend,
+                                       const BYTE* const oend_w,
+                                       seq_t             sequence,
+                                       const BYTE**      litPtr,
+                                       const BYTE* const litLimit,
+                                       const BYTE* const prefixStart,
+                                       const BYTE* const virtualStart,
+                                       const BYTE* const dictEnd) {
+    BYTE* const       oLitEnd        = op + sequence.litLength;
+    size_t const      sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const       oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
+    const BYTE* const iLitEnd   = *litPtr + sequence.litLength;
+    const BYTE*       match     = oLitEnd - sequence.offset;
+
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(iLitEnd > litLimit || oMatchEnd > oend_w
+                 || (MEM_32bits() && (size_t) (oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+        return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit,
+                                                  prefixStart, virtualStart, dictEnd);
+
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (UNLIKELY(sequence.litLength > 16))
+    {
+        ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
+    }
+    op      = oLitEnd;
+    *litPtr = iLitEnd; /* update for next sequence */
+
+    /* Copy Match */
+    if (sequence.offset > (size_t) (oLitEnd - prefixStart))
+    {
+        /* offset beyond prefix -> go into extDict */
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t) (oLitEnd - virtualStart)),
+                        corruption_detected, "");
+        match = dictEnd + (match - prefixStart);
+        if (match + sequence.matchLength <= dictEnd)
+        {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {
+            size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+        }
+    }
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);
+
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN))
+    {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t) sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
+    }
+    assert(sequence.offset < WILDCOPY_VECLEN);
+
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8)
+    {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t) sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
+    }
+    return sequenceLength;
+}
+
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) {
+    const void*                        ptr     = dt;
+    const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*) ptr;
+    DStatePtr->state                           = BIT_readBits(bitD, DTableH->tableLog);
+    DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", (U32) DStatePtr->state,
+             DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr,
+                                                        BIT_DStream_t* bitD,
+                                                        U16            nextState,
+                                                        U32            nbBits) {
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state     = nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offsets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
+    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
+       ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
+       : 0)
+
+typedef enum {
+    ZSTD_lo_isRegularOffset,
+    ZSTD_lo_isLongOffset = 1
+} ZSTD_longOffset_e;
+
+/**
+ * ZSTD_decodeSequence():
+ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
+ *                  only used in 32-bit mode
+ * @return : Sequence (litL + matchL + offset)
+ */
+FORCE_INLINE_TEMPLATE seq_t ZSTD_decodeSequence(seqState_t*             seqState,
+                                                const ZSTD_longOffset_e longOffsets,
+                                                const int               isLastSeq) {
+    seq_t seq;
+    /*
+     * ZSTD_seqSymbol is a 64 bits wide structure.
+     * It can be loaded in one operation
+     * and its fields extracted by simply shifting or bit-extracting on aarch64.
+     * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
+     * operations that cause performance drop. This can be avoided by using this
+     * ZSTD_memcpy hack.
+     */
+#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
+    ZSTD_seqSymbol        llDInfoS, mlDInfoS, ofDInfoS;
+    ZSTD_seqSymbol* const llDInfo = &llDInfoS;
+    ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
+    ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
+    ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
+    ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
+    ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state,
+                sizeof(ZSTD_seqSymbol));
+#else
+    const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
+    const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
+    const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
+#endif
+    seq.matchLength = mlDInfo->baseValue;
+    seq.litLength   = llDInfo->baseValue;
+    {
+        U32 const  ofBase    = ofDInfo->baseValue;
+        BYTE const llBits    = llDInfo->nbAdditionalBits;
+        BYTE const mlBits    = mlDInfo->nbAdditionalBits;
+        BYTE const ofBits    = ofDInfo->nbAdditionalBits;
+        BYTE const totalBits = llBits + mlBits + ofBits;
+
+        U16 const llNext   = llDInfo->nextState;
+        U16 const mlNext   = mlDInfo->nextState;
+        U16 const ofNext   = ofDInfo->nextState;
+        U32 const llnbBits = llDInfo->nbBits;
+        U32 const mlnbBits = mlDInfo->nbBits;
+        U32 const ofnbBits = ofDInfo->nbBits;
+
+        assert(llBits <= MaxLLBits);
+        assert(mlBits <= MaxMLBits);
+        assert(ofBits <= MaxOff);
+        /*
+         * As gcc has better branch and block analyzers, sometimes it is only
+         * valuable to mark likeliness for clang, it gives around 3-4% of
+         * performance.
+         */
+
+        /* sequence */
+        {
+            size_t offset;
+            if (ofBits > 1)
+            {
+                ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+                ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+                ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
+                ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32
+                                   >= MaxMLBits);
+                if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32))
+                {
+                    /* Always read extra bits, this keeps the logic simple,
+                     * avoids branches, and avoids accidentally reading 0 bits.
+                     */
+                    U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
+                    offset =
+                      ofBase
+                      + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                    BIT_reloadDStream(&seqState->DStream);
+                    offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+                }
+                else
+                {
+                    offset = ofBase
+                           + BIT_readBitsFast(&seqState->DStream,
+                                              ofBits /*>0*/); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                    if (MEM_32bits())
+                        BIT_reloadDStream(&seqState->DStream);
+                }
+                seqState->prevOffset[2] = seqState->prevOffset[1];
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset;
+            }
+            else
+            {
+                U32 const ll0 = (llDInfo->baseValue == 0);
+                if (LIKELY((ofBits == 0)))
+                {
+                    offset                  = seqState->prevOffset[ll0];
+                    seqState->prevOffset[1] = seqState->prevOffset[!ll0];
+                    seqState->prevOffset[0] = offset;
+                }
+                else
+                {
+                    offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+                    {
+                        size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1
+                                                    : seqState->prevOffset[offset];
+                        temp -=
+                          !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
+                        if (offset != 1)
+                            seqState->prevOffset[2] = seqState->prevOffset[1];
+                        seqState->prevOffset[1] = seqState->prevOffset[0];
+                        seqState->prevOffset[0] = offset = temp;
+                    }
+                }
+            }
+            seq.offset = offset;
+        }
+
+        if (mlBits > 0)
+            seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits /*>0*/);
+
+        if (MEM_32bits()
+            && (mlBits + llBits >= STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32))
+            BIT_reloadDStream(&seqState->DStream);
+        if (MEM_64bits()
+            && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64 - (LLFSELog + MLFSELog + OffFSELog)))
+            BIT_reloadDStream(&seqState->DStream);
+        /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+        ZSTD_STATIC_ASSERT(16 + LLFSELog + MLFSELog + OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+        if (llBits > 0)
+            seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits /*>0*/);
+
+        if (MEM_32bits())
+            BIT_reloadDStream(&seqState->DStream);
+
+        DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32) seq.litLength,
+                 (U32) seq.matchLength, (U32) seq.offset);
+
+        if (!isLastSeq)
+        {
+            /* don't update FSE state for last Sequence */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext,
+                                         llnbBits); /* <=  9 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext,
+                                         mlnbBits); /* <=  9 bits */
+            if (MEM_32bits())
+                BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext,
+                                         ofnbBits); /* <=  8 bits */
+            BIT_reloadDStream(&seqState->DStream);
+        }
+    }
+
+    return seq;
+}
+
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+    #if DEBUGLEVEL >= 1
+static int
+ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) {
+    size_t const windowSize = dctx->fParams.windowSize;
+    /* No dictionary used. */
+    if (dctx->dictContentEndForFuzzing == NULL)
+        return 0;
+    /* Dictionary is our prefix. */
+    if (prefixStart == dctx->dictContentBeginForFuzzing)
+        return 1;
+    /* Dictionary is not our ext-dict. */
+    if (dctx->dictEnd != dctx->dictContentEndForFuzzing)
+        return 0;
+    /* Dictionary is not within our window size. */
+    if ((size_t) (oLitEnd - prefixStart) >= windowSize)
+        return 0;
+    /* Dictionary is active. */
+    return 1;
+}
+    #endif
+
+static void ZSTD_assertValidSequence(ZSTD_DCtx const* dctx,
+                                     BYTE const*      op,
+                                     BYTE const*      oend,
+                                     seq_t const      seq,
+                                     BYTE const*      prefixStart,
+                                     BYTE const*      virtualStart) {
+    #if DEBUGLEVEL >= 1
+    if (dctx->isFrameDecompression)
+    {
+        size_t const      windowSize   = dctx->fParams.windowSize;
+        size_t const      sequenceSize = seq.litLength + seq.matchLength;
+        BYTE const* const oLitEnd      = op + seq.litLength;
+        DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", (U32) seq.litLength,
+                 (U32) seq.matchLength, (U32) seq.offset);
+        assert(op <= oend);
+        assert((size_t) (oend - op) >= sequenceSize);
+        assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
+        if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd))
+        {
+            size_t const dictSize = (size_t) ((char const*) dctx->dictContentEndForFuzzing
+                                              - (char const*) dctx->dictContentBeginForFuzzing);
+            /* Offset must be within the dictionary. */
+            assert(seq.offset <= (size_t) (oLitEnd - virtualStart));
+            assert(seq.offset <= windowSize + dictSize);
+        }
+        else
+        {
+            /* Offset must be within our window. */
+            assert(seq.offset <= windowSize);
+        }
+    }
+    #else
+    (void) dctx, (void) op, (void) oend, (void) seq, (void) prefixStart, (void) virtualStart;
+    #endif
+}
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+
+
+FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE
+ZSTD_decompressSequences_bodySplitLitBuffer(ZSTD_DCtx*              dctx,
+                                            void*                   dst,
+                                            size_t                  maxDstSize,
+                                            const void*             seqStart,
+                                            size_t                  seqSize,
+                                            int                     nbSeq,
+                                            const ZSTD_longOffset_e isLongOffset) {
+    const BYTE*       ip           = (const BYTE*) seqStart;
+    const BYTE* const iend         = ip + seqSize;
+    BYTE* const       ostart       = (BYTE*) dst;
+    BYTE* const       oend         = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
+    BYTE*             op           = ostart;
+    const BYTE*       litPtr       = dctx->litPtr;
+    const BYTE*       litBufferEnd = dctx->litBufferEnd;
+    const BYTE* const prefixStart  = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const vBase        = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd      = (const BYTE*) (dctx->dictEnd);
+    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
+
+    /* Literals are split between internal buffer & output buffer */
+    if (nbSeq)
+    {
+        seqState_t seqState;
+        dctx->fseEntropy = 1;
+        {
+            U32 i;
+            for (i = 0; i < ZSTD_REP_NUM; i++)
+                seqState.prevOffset[i] = dctx->entropy.rep[i];
+        }
+        RETURN_ERROR_IF(ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
+                        corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
+
+        ZSTD_STATIC_ASSERT(BIT_DStream_unfinished < BIT_DStream_completed
+                           && BIT_DStream_endOfBuffer < BIT_DStream_completed
+                           && BIT_DStream_completed < BIT_DStream_overflow);
+
+        /* decompress without overrunning litPtr begins */
+        {
+            seq_t sequence = {
+              0, 0,
+              0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
+                  /* Align the decompression loop to 32 + 16 bytes.
+                *
+                * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+                * speed swings based on the alignment of the decompression loop. This
+                * performance swing is caused by parts of the decompression loop falling
+                * out of the DSB. The entire decompression loop should fit in the DSB,
+                * when it can't we get much worse performance. You can measure if you've
+                * hit the good case or the bad case with this perf command for some
+                * compressed file test.zst:
+                *
+                *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+                *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+                *
+                * If you see most cycles served out of the MITE you've hit the bad case.
+                * If you see most cycles served out of the DSB you've hit the good case.
+                * If it is pretty even then you may be in an okay case.
+                *
+                * This issue has been reproduced on the following CPUs:
+                *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+                *               Use Instruments->Counters to get DSB/MITE cycles.
+                *               I never got performance swings, but I was able to
+                *               go from the good case of mostly DSB to half of the
+                *               cycles served from MITE.
+                *   - Coffeelake: Intel i9-9900k
+                *   - Coffeelake: Intel i7-9700k
+                *
+                * I haven't been able to reproduce the instability or DSB misses on any
+                * of the following CPUS:
+                *   - Haswell
+                *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+                *   - Skylake
+                *
+                * Alignment is done for each of the three major decompression loops:
+                *   - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
+                *   - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
+                *   - ZSTD_decompressSequences_body
+                * Alignment choices are made to minimize large swings on bad cases and influence on performance
+                * from changes external to this code, rather than to overoptimize on the current commit.
+                *
+                * If you are seeing performance stability this script can help test.
+                * It tests on 4 commits in zstd where I saw performance change.
+                *
+                *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+                */
+    #if defined(__GNUC__) && defined(__x86_64__)
+            __asm__(".p2align 6");
+        #if __GNUC__ >= 7
+            /* good for gcc-7, gcc-9, and gcc-11 */
+            __asm__("nop");
+            __asm__(".p2align 5");
+            __asm__("nop");
+            __asm__(".p2align 4");
+            #if __GNUC__ == 8 || __GNUC__ == 10
+            /* good for gcc-8 and gcc-10 */
+            __asm__("nop");
+            __asm__(".p2align 3");
+            #endif
+        #endif
+    #endif
+
+            /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
+            for (; nbSeq; nbSeq--)
+            {
+                sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq == 1);
+                if (litPtr + sequence.litLength > dctx->litBufferEnd)
+                    break;
+                {
+                    size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(
+                      op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence,
+                      &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+    #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                    assert(!ZSTD_isError(oneSeqSize));
+                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+    #endif
+                    if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                        return oneSeqSize;
+                    DEBUGLOG(6, "regenerated sequence size : %u", (U32) oneSeqSize);
+                    op += oneSeqSize;
+                }
+            }
+            DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
+
+            /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
+            if (nbSeq > 0)
+            {
+                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+                DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer",
+                         nbSeq, leftoverLit, sequence.litLength);
+                if (leftoverLit)
+                {
+                    RETURN_ERROR_IF(leftoverLit > (size_t) (oend - op), dstSize_tooSmall,
+                                    "remaining lit must fit within dstBuffer");
+                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+                    sequence.litLength -= leftoverLit;
+                    op += leftoverLit;
+                }
+                litPtr                  = dctx->litExtraBuffer;
+                litBufferEnd            = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                dctx->litBufferLocation = ZSTD_not_in_dst;
+                {
+                    size_t const oneSeqSize = ZSTD_execSequence(
+                      op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+    #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                    assert(!ZSTD_isError(oneSeqSize));
+                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+    #endif
+                    if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                        return oneSeqSize;
+                    DEBUGLOG(6, "regenerated sequence size : %u", (U32) oneSeqSize);
+                    op += oneSeqSize;
+                }
+                nbSeq--;
+            }
+        }
+
+        if (nbSeq > 0)
+        {
+            /* there is remaining lit from extra buffer */
+
+    #if defined(__GNUC__) && defined(__x86_64__)
+            __asm__(".p2align 6");
+            __asm__("nop");
+        #if __GNUC__ != 7
+            /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
+            __asm__(".p2align 4");
+            __asm__("nop");
+            __asm__(".p2align 3");
+        #elif __GNUC__ >= 11
+            __asm__(".p2align 3");
+        #else
+            __asm__(".p2align 5");
+            __asm__("nop");
+            __asm__(".p2align 3");
+        #endif
+    #endif
+
+            for (; nbSeq; nbSeq--)
+            {
+                seq_t const  sequence   = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq == 1);
+                size_t const oneSeqSize = ZSTD_execSequence(
+                  op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+    #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                assert(!ZSTD_isError(oneSeqSize));
+                ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+    #endif
+                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                    return oneSeqSize;
+                DEBUGLOG(6, "regenerated sequence size : %u", (U32) oneSeqSize);
+                op += oneSeqSize;
+            }
+        }
+
+        /* check if reached exact end */
+        DEBUGLOG(
+          5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i",
+          nbSeq);
+        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+        DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start,
+                 seqState.DStream.ptr, seqState.DStream.bitsConsumed);
+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
+        /* save reps for next block */
+        {
+            U32 i;
+            for (i = 0; i < ZSTD_REP_NUM; i++)
+                dctx->entropy.rep[i] = (U32) (seqState.prevOffset[i]);
+        }
+    }
+
+    /* last literal segment */
+    if (dctx->litBufferLocation == ZSTD_split)
+    {
+        /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
+        size_t const lastLLSize = (size_t) (litBufferEnd - litPtr);
+        DEBUGLOG(6, "copy last literals from segment : %u", (U32) lastLLSize);
+        RETURN_ERROR_IF(lastLLSize > (size_t) (oend - op), dstSize_tooSmall, "");
+        if (op != NULL)
+        {
+            ZSTD_memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+        litPtr                  = dctx->litExtraBuffer;
+        litBufferEnd            = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+        dctx->litBufferLocation = ZSTD_not_in_dst;
+    }
+    /* copy last literals from internal buffer */
+    {
+        size_t const lastLLSize = (size_t) (litBufferEnd - litPtr);
+        DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32) lastLLSize);
+        RETURN_ERROR_IF(lastLLSize > (size_t) (oend - op), dstSize_tooSmall, "");
+        if (op != NULL)
+        {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    DEBUGLOG(6, "decoded block of size %u bytes", (U32) (op - ostart));
+    return (size_t) (op - ostart);
+}
+
+FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE
+ZSTD_decompressSequences_body(ZSTD_DCtx*              dctx,
+                              void*                   dst,
+                              size_t                  maxDstSize,
+                              const void*             seqStart,
+                              size_t                  seqSize,
+                              int                     nbSeq,
+                              const ZSTD_longOffset_e isLongOffset) {
+    const BYTE*       ip          = (const BYTE*) seqStart;
+    const BYTE* const iend        = ip + seqSize;
+    BYTE* const       ostart      = (BYTE*) dst;
+    BYTE* const       oend        = dctx->litBufferLocation == ZSTD_not_in_dst
+                                    ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize)
+                                    : dctx->litBuffer;
+    BYTE*             op          = ostart;
+    const BYTE*       litPtr      = dctx->litPtr;
+    const BYTE* const litEnd      = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const vBase       = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd     = (const BYTE*) (dctx->dictEnd);
+    DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
+
+    /* Regen sequences */
+    if (nbSeq)
+    {
+        seqState_t seqState;
+        dctx->fseEntropy = 1;
+        {
+            U32 i;
+            for (i = 0; i < ZSTD_REP_NUM; i++)
+                seqState.prevOffset[i] = dctx->entropy.rep[i];
+        }
+        RETURN_ERROR_IF(ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
+                        corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
+
+    #if defined(__GNUC__) && defined(__x86_64__)
+        __asm__(".p2align 6");
+        __asm__("nop");
+        #if __GNUC__ >= 7
+        __asm__(".p2align 5");
+        __asm__("nop");
+        __asm__(".p2align 3");
+        #else
+        __asm__(".p2align 4");
+        __asm__("nop");
+        __asm__(".p2align 3");
+        #endif
+    #endif
+
+        for (; nbSeq; nbSeq--)
+        {
+            seq_t const  sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq == 1);
+            size_t const oneSeqSize =
+              ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+    #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+    #endif
+            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                return oneSeqSize;
+            DEBUGLOG(6, "regenerated sequence size : %u", (U32) oneSeqSize);
+            op += oneSeqSize;
+        }
+
+        /* check if reached exact end */
+        assert(nbSeq == 0);
+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
+        /* save reps for next block */
+        {
+            U32 i;
+            for (i = 0; i < ZSTD_REP_NUM; i++)
+                dctx->entropy.rep[i] = (U32) (seqState.prevOffset[i]);
+        }
+    }
+
+    /* last literal segment */
+    {
+        size_t const lastLLSize = (size_t) (litEnd - litPtr);
+        DEBUGLOG(6, "copy last literals : %u", (U32) lastLLSize);
+        RETURN_ERROR_IF(lastLLSize > (size_t) (oend - op), dstSize_tooSmall, "");
+        if (op != NULL)
+        {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    DEBUGLOG(6, "decoded block of size %u bytes", (U32) (op - ostart));
+    return (size_t) (op - ostart);
+}
+
+static size_t ZSTD_decompressSequences_default(ZSTD_DCtx*              dctx,
+                                               void*                   dst,
+                                               size_t                  maxDstSize,
+                                               const void*             seqStart,
+                                               size_t                  seqSize,
+                                               int                     nbSeq,
+                                               const ZSTD_longOffset_e isLongOffset) {
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq,
+                                         isLongOffset);
+}
+
+static size_t ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx*              dctx,
+                                                             void*                   dst,
+                                                             size_t                  maxDstSize,
+                                                             const void*             seqStart,
+                                                             size_t                  seqSize,
+                                                             int                     nbSeq,
+                                                             const ZSTD_longOffset_e isLongOffset) {
+    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize,
+                                                       nbSeq, isLongOffset);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+
+FORCE_INLINE_TEMPLATE
+
+size_t ZSTD_prefetchMatch(size_t            prefetchPos,
+                          seq_t const       sequence,
+                          const BYTE* const prefixStart,
+                          const BYTE* const dictEnd) {
+    prefetchPos += sequence.litLength;
+    {
+        const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+        /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+         * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        const BYTE* const match =
+          ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
+        PREFETCH_L1(match);
+        PREFETCH_L1(
+          match
+          + CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+    }
+    return prefetchPos + sequence.matchLength;
+}
+
+/* This decoding function employs prefetching
+ * to reduce latency impact of cache misses.
+ * It's generally employed when block contains a significant portion of long-distance matches
+ * or when coupled with a "cold" dictionary */
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(ZSTD_DCtx*              dctx,
+                                  void*                   dst,
+                                  size_t                  maxDstSize,
+                                  const void*             seqStart,
+                                  size_t                  seqSize,
+                                  int                     nbSeq,
+                                  const ZSTD_longOffset_e isLongOffset) {
+    const BYTE*       ip           = (const BYTE*) seqStart;
+    const BYTE* const iend         = ip + seqSize;
+    BYTE* const       ostart       = (BYTE*) dst;
+    BYTE* const       oend         = dctx->litBufferLocation == ZSTD_in_dst
+                                     ? dctx->litBuffer
+                                     : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
+    BYTE*             op           = ostart;
+    const BYTE*       litPtr       = dctx->litPtr;
+    const BYTE*       litBufferEnd = dctx->litBufferEnd;
+    const BYTE* const prefixStart  = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const dictStart    = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd      = (const BYTE*) (dctx->dictEnd);
+
+    /* Regen sequences */
+    if (nbSeq)
+    {
+    #define STORED_SEQS 8
+    #define STORED_SEQS_MASK (STORED_SEQS - 1)
+    #define ADVANCED_SEQS STORED_SEQS
+        seq_t      sequences[STORED_SEQS];
+        int const  seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+        seqState_t seqState;
+        int        seqNb;
+        size_t     prefetchPos =
+          (size_t) (op - prefixStart); /* track position relative to prefixStart */
+
+        dctx->fseEntropy = 1;
+        {
+            int i;
+            for (i = 0; i < ZSTD_REP_NUM; i++)
+                seqState.prevOffset[i] = dctx->entropy.rep[i];
+        }
+        assert(dst != NULL);
+        assert(iend >= ip);
+        RETURN_ERROR_IF(ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
+                        corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+        /* prepare in advance */
+        for (seqNb = 0; seqNb < seqAdvance; seqNb++)
+        {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq - 1);
+            prefetchPos          = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+            sequences[seqNb]     = sequence;
+        }
+
+        /* decompress without stomping litBuffer */
+        for (; seqNb < nbSeq; seqNb++)
+        {
+            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq - 1);
+
+            if (dctx->litBufferLocation == ZSTD_split
+                && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength
+                     > dctx->litBufferEnd)
+            {
+                /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
+                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+                if (leftoverLit)
+                {
+                    RETURN_ERROR_IF(leftoverLit > (size_t) (oend - op), dstSize_tooSmall,
+                                    "remaining lit must fit within dstBuffer");
+                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+                    sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
+                    op += leftoverLit;
+                }
+                litPtr                  = dctx->litExtraBuffer;
+                litBufferEnd            = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                dctx->litBufferLocation = ZSTD_not_in_dst;
+                {
+                    size_t const oneSeqSize = ZSTD_execSequence(
+                      op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr,
+                      litBufferEnd, prefixStart, dictStart, dictEnd);
+    #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                    assert(!ZSTD_isError(oneSeqSize));
+                    ZSTD_assertValidSequence(dctx, op, oend,
+                                             sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK],
+                                             prefixStart, dictStart);
+    #endif
+                    if (ZSTD_isError(oneSeqSize))
+                        return oneSeqSize;
+
+                    prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+                    sequences[seqNb & STORED_SEQS_MASK] = sequence;
+                    op += oneSeqSize;
+                }
+            }
+            else
+            {
+                /* lit buffer is either wholly contained in first or second split, or not split at all*/
+                size_t const oneSeqSize =
+                  dctx->litBufferLocation == ZSTD_split
+                    ? ZSTD_execSequenceSplitLitBuffer(
+                        op, oend,
+                        litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength
+                          - WILDCOPY_OVERLENGTH,
+                        sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr,
+                        litBufferEnd, prefixStart, dictStart, dictEnd)
+                    : ZSTD_execSequence(op, oend,
+                                        sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK],
+                                        &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+    #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                assert(!ZSTD_isError(oneSeqSize));
+                ZSTD_assertValidSequence(dctx, op, oend,
+                                         sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK],
+                                         prefixStart, dictStart);
+    #endif
+                if (ZSTD_isError(oneSeqSize))
+                    return oneSeqSize;
+
+                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+                sequences[seqNb & STORED_SEQS_MASK] = sequence;
+                op += oneSeqSize;
+            }
+        }
+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
+
+        /* finish queue */
+        seqNb -= seqAdvance;
+        for (; seqNb < nbSeq; seqNb++)
+        {
+            seq_t* sequence = &(sequences[seqNb & STORED_SEQS_MASK]);
+            if (dctx->litBufferLocation == ZSTD_split
+                && litPtr + sequence->litLength > dctx->litBufferEnd)
+            {
+                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+                if (leftoverLit)
+                {
+                    RETURN_ERROR_IF(leftoverLit > (size_t) (oend - op), dstSize_tooSmall,
+                                    "remaining lit must fit within dstBuffer");
+                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+                    sequence->litLength -= leftoverLit;
+                    op += leftoverLit;
+                }
+                litPtr                  = dctx->litExtraBuffer;
+                litBufferEnd            = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                dctx->litBufferLocation = ZSTD_not_in_dst;
+                {
+                    size_t const oneSeqSize = ZSTD_execSequence(
+                      op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+    #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                    assert(!ZSTD_isError(oneSeqSize));
+                    ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb & STORED_SEQS_MASK],
+                                             prefixStart, dictStart);
+    #endif
+                    if (ZSTD_isError(oneSeqSize))
+                        return oneSeqSize;
+                    op += oneSeqSize;
+                }
+            }
+            else
+            {
+                size_t const oneSeqSize =
+                  dctx->litBufferLocation == ZSTD_split
+                    ? ZSTD_execSequenceSplitLitBuffer(
+                        op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence,
+                        &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd)
+                    : ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart,
+                                        dictStart, dictEnd);
+    #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                assert(!ZSTD_isError(oneSeqSize));
+                ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb & STORED_SEQS_MASK],
+                                         prefixStart, dictStart);
+    #endif
+                if (ZSTD_isError(oneSeqSize))
+                    return oneSeqSize;
+                op += oneSeqSize;
+            }
+        }
+
+        /* save reps for next block */
+        {
+            U32 i;
+            for (i = 0; i < ZSTD_REP_NUM; i++)
+                dctx->entropy.rep[i] = (U32) (seqState.prevOffset[i]);
+        }
+    }
+
+    /* last literal segment */
+    if (dctx->litBufferLocation == ZSTD_split)
+    { /* first deplete literal buffer in dst, then copy litExtraBuffer */
+        size_t const lastLLSize = litBufferEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t) (oend - op), dstSize_tooSmall, "");
+        if (op != NULL)
+        {
+            ZSTD_memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+        litPtr       = dctx->litExtraBuffer;
+        litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+    }
+    {
+        size_t const lastLLSize = litBufferEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t) (oend - op), dstSize_tooSmall, "");
+        if (op != NULL)
+        {
+            ZSTD_memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return (size_t) (op - ostart);
+}
+
+static size_t ZSTD_decompressSequencesLong_default(ZSTD_DCtx*              dctx,
+                                                   void*                   dst,
+                                                   size_t                  maxDstSize,
+                                                   const void*             seqStart,
+                                                   size_t                  seqSize,
+                                                   int                     nbSeq,
+                                                   const ZSTD_longOffset_e isLongOffset) {
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq,
+                                             isLongOffset);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+#if DYNAMIC_BMI2
+
+    #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static BMI2_TARGET_ATTRIBUTE size_t DONT_VECTORIZE
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx*              dctx,
+                              void*                   dst,
+                              size_t                  maxDstSize,
+                              const void*             seqStart,
+                              size_t                  seqSize,
+                              int                     nbSeq,
+                              const ZSTD_longOffset_e isLongOffset) {
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq,
+                                         isLongOffset);
+}
+static BMI2_TARGET_ATTRIBUTE size_t DONT_VECTORIZE
+ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx*              dctx,
+                                            void*                   dst,
+                                            size_t                  maxDstSize,
+                                            const void*             seqStart,
+                                            size_t                  seqSize,
+                                            int                     nbSeq,
+                                            const ZSTD_longOffset_e isLongOffset) {
+    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize,
+                                                       nbSeq, isLongOffset);
+}
+    #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+    #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+static BMI2_TARGET_ATTRIBUTE size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx*              dctx,
+                                  void*                   dst,
+                                  size_t                  maxDstSize,
+                                  const void*             seqStart,
+                                  size_t                  seqSize,
+                                  int                     nbSeq,
+                                  const ZSTD_longOffset_e isLongOffset) {
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq,
+                                             isLongOffset);
+}
+    #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+#endif /* DYNAMIC_BMI2 */
+
+typedef size_t (*ZSTD_decompressSequences_t)(ZSTD_DCtx*              dctx,
+                                             void*                   dst,
+                                             size_t                  maxDstSize,
+                                             const void*             seqStart,
+                                             size_t                  seqSize,
+                                             int                     nbSeq,
+                                             const ZSTD_longOffset_e isLongOffset);
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static size_t ZSTD_decompressSequences(ZSTD_DCtx*              dctx,
+                                       void*                   dst,
+                                       size_t                  maxDstSize,
+                                       const void*             seqStart,
+                                       size_t                  seqSize,
+                                       int                     nbSeq,
+                                       const ZSTD_longOffset_e isLongOffset) {
+    DEBUGLOG(5, "ZSTD_decompressSequences");
+    #if DYNAMIC_BMI2
+    if (ZSTD_DCtx_get_bmi2(dctx))
+    {
+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq,
+                                             isLongOffset);
+    }
+    #endif
+    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq,
+                                            isLongOffset);
+}
+static size_t ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx*              dctx,
+                                                     void*                   dst,
+                                                     size_t                  maxDstSize,
+                                                     const void*             seqStart,
+                                                     size_t                  seqSize,
+                                                     int                     nbSeq,
+                                                     const ZSTD_longOffset_e isLongOffset) {
+    DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
+    #if DYNAMIC_BMI2
+    if (ZSTD_DCtx_get_bmi2(dctx))
+    {
+        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize,
+                                                           nbSeq, isLongOffset);
+    }
+    #endif
+    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize,
+                                                          nbSeq, isLongOffset);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+/* ZSTD_decompressSequencesLong() :
+ * decompression function triggered when a minimum share of offsets is considered "long",
+ * aka out of cache.
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
+ * This function will try to mitigate main memory latency through the use of prefetching */
+static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx*              dctx,
+                                           void*                   dst,
+                                           size_t                  maxDstSize,
+                                           const void*             seqStart,
+                                           size_t                  seqSize,
+                                           int                     nbSeq,
+                                           const ZSTD_longOffset_e isLongOffset) {
+    DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+    #if DYNAMIC_BMI2
+    if (ZSTD_DCtx_get_bmi2(dctx))
+    {
+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq,
+                                                 isLongOffset);
+    }
+    #endif
+    return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq,
+                                                isLongOffset);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+/**
+ * @returns The total size of the history referenceable by zstd, including
+ * both the prefix and the extDict. At @p op any offset larger than this
+ * is invalid.
+ */
+static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart) {
+    return (size_t) (op - virtualStart);
+}
+
+typedef struct {
+    unsigned longOffsetShare;
+    unsigned maxNbAdditionalBits;
+} ZSTD_OffsetInfo;
+
+/* ZSTD_getOffsetInfo() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ *           compared to maximum possible of (1<<OffFSELog),
+ *           as well as the maximum number additional bits required.
+ */
+static ZSTD_OffsetInfo ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq) {
+    ZSTD_OffsetInfo info = {0, 0};
+    /* If nbSeq == 0, then the offTable is uninitialized, but we have
+     * no sequences, so both values should be 0.
+     */
+    if (nbSeq != 0)
+    {
+        const void*           ptr      = offTable;
+        U32 const             tableLog = ((const ZSTD_seqSymbol_header*) ptr)[0].tableLog;
+        const ZSTD_seqSymbol* table    = offTable + 1;
+        U32 const             max      = 1 << tableLog;
+        U32                   u;
+        DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+        assert(max <= (1 << OffFSELog)); /* max not too large */
+        for (u = 0; u < max; u++)
+        {
+            info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
+            if (table[u].nbAdditionalBits > 22)
+                info.longOffsetShare += 1;
+        }
+
+        assert(tableLog <= OffFSELog);
+        info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
+    }
+
+    return info;
+}
+
+/**
+ * @returns The maximum offset we can decode in one read of our bitstream, without
+ * reloading more bits in the middle of the offset bits read. Any offsets larger
+ * than this must use the long offset decoder.
+ */
+static size_t ZSTD_maxShortOffset(void) {
+    if (MEM_64bits())
+    {
+        /* We can decode any offset without reloading bits.
+         * This might change if the max window size grows.
+         */
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+        return (size_t) -1;
+    }
+    else
+    {
+        /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
+         * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
+         * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
+         */
+        size_t const maxOffbase = ((size_t) 1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
+        size_t const maxOffset  = maxOffbase - ZSTD_REP_NUM;
+        assert(ZSTD_highbit32((U32) maxOffbase) == STREAM_ACCUMULATOR_MIN);
+        return maxOffset;
+    }
+}
+
+size_t ZSTD_decompressBlock_internal(
+  ZSTD_DCtx*                dctx,
+  void*                     dst,
+  size_t                    dstCapacity,
+  const void*               src,
+  size_t                    srcSize,
+  const streaming_operation streaming) { /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*) src;
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned) srcSize);
+
+    /* Note : the wording of the specification
+     * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
+     * This generally does not happen, as it makes little sense,
+     * since an uncompressed block would feature same size and have no decompression cost.
+     * Also, note that decoder from reference libzstd before < v1.5.4
+     * would consider this edge case as an error.
+     * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
+     * for broader compatibility with the deployed ecosystem of zstd decoders */
+    RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
+
+    /* Decode literals section */
+    {
+        size_t const litCSize =
+          ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32) litCSize,
+                 dctx->litSize);
+        if (ZSTD_isError(litCSize))
+            return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+
+    /* Build Decoding Tables */
+    {
+        /* Compute the maximum block size, which must also work when !frame and fParams are unset.
+         * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
+         */
+        size_t const blockSizeMax     = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
+        size_t const totalHistorySize = ZSTD_totalHistorySize(
+          ZSTD_maybeNullPtrAdd((BYTE*) dst, blockSizeMax), (BYTE const*) dctx->virtualStart);
+        /* isLongOffset must be true if there are long offsets.
+         * Offsets are long if they are larger than ZSTD_maxShortOffset().
+         * We don't expect that to be the case in 64-bit mode.
+         *
+         * We check here to see if our history is large enough to allow long offsets.
+         * If it isn't, then we can't possible have (valid) long offsets. If the offset
+         * is invalid, then it is okay to read it incorrectly.
+         *
+         * If isLongOffsets is true, then we will later check our decoding table to see
+         * if it is even possible to generate long offsets.
+         */
+        ZSTD_longOffset_e isLongOffset =
+          (ZSTD_longOffset_e) (MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
+        /* These macros control at build-time which decompressor implementation
+         * we use. If neither is defined, we do some inspection and dispatch at
+         * runtime.
+         */
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) \
+  && !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        int usePrefetchDecoder = dctx->ddictIsCold;
+#else
+        /* Set to 1 to avoid computing offset info if we don't need to.
+         * Otherwise this value is ignored.
+         */
+        int usePrefetchDecoder = 1;
+#endif
+        int          nbSeq;
+        size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+        if (ZSTD_isError(seqHSize))
+            return seqHSize;
+        ip += seqHSize;
+        srcSize -= seqHSize;
+
+        RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall,
+                        "NULL not handled");
+        RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*)
+                          && (size_t) (-1) - (size_t) dst < (size_t) (1 << 20),
+                        dstSize_tooSmall, "invalid dst");
+
+        /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
+         * compute information about the share of long offsets, and the maximum nbAdditionalBits.
+         * NOTE: could probably use a larger nbSeq limit
+         */
+        if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8)))
+        {
+            ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
+            if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN)
+            {
+                /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
+                 * enough, then we know it is impossible to have too long an offset in this block, so we can
+                 * use the regular offset decoder.
+                 */
+                isLongOffset = ZSTD_lo_isRegularOffset;
+            }
+            if (!usePrefetchDecoder)
+            {
+                U32 const minShare =
+                  MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+                usePrefetchDecoder = (info.longOffsetShare >= minShare);
+            }
+        }
+
+        dctx->ddictIsCold = 0;
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) \
+  && !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if (usePrefetchDecoder)
+        {
+#else
+        (void) usePrefetchDecoder;
+        {
+#endif
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq,
+                                                isLongOffset);
+#endif
+        }
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+        /* else */
+        if (dctx->litBufferLocation == ZSTD_split)
+            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize,
+                                                          nbSeq, isLongOffset);
+        else
+            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq,
+                                            isLongOffset);
+#endif
+    }
+}
+
+
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) {
+    if (dst != dctx->previousDstEnd && dstSize > 0)
+    { /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->virtualStart =
+          (const char*) dst
+          - ((const char*) (dctx->previousDstEnd) - (const char*) (dctx->prefixStart));
+        dctx->prefixStart    = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+size_t ZSTD_decompressBlock_deprecated(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) {
+    size_t dSize;
+    dctx->isFrameDecompression = 0;
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
+    FORWARD_IF_ERROR(dSize, "");
+    dctx->previousDstEnd = (char*) dst + dSize;
+    return dSize;
+}
+
+
+/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
+size_t ZSTD_decompressBlock(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) {
+    return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
+}
diff --git a/src/external/decompress/zstd_decompress_block.h b/src/external/decompress/zstd_decompress_block.h
new file mode 100644
index 00000000..0ffb268f
--- /dev/null
+++ b/src/external/decompress/zstd_decompress_block.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DEC_BLOCK_H
+#define ZSTD_DEC_BLOCK_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h"      /* size_t */
+#include "../zstd.h"                  /* DCtx, and some public functions */
+#include "../common/zstd_internal.h"  /* blockProperties_t, and some public functions */
+#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
+
+
+/* ===   Prototypes   === */
+
+/* note: prototypes already published within `zstd.h` :
+ * ZSTD_decompressBlock()
+ */
+
+/* note: prototypes already published within `zstd_internal.h` :
+ * ZSTD_getcBlockSize()
+ * ZSTD_decodeSeqHeaders()
+ */
+
+
+/* Streaming state is used to inform allocation of the literal buffer */
+typedef enum {
+    not_streaming = 0,
+    is_streaming  = 1
+} streaming_operation;
+
+/* ZSTD_decompressBlock_internal() :
+ * decompress block, starting at `src`,
+ * into destination buffer `dst`.
+ * @return : decompressed block size,
+ *           or an error code (which can be tested using ZSTD_isError())
+ */
+size_t ZSTD_decompressBlock_internal(ZSTD_DCtx*                dctx,
+                                     void*                     dst,
+                                     size_t                    dstCapacity,
+                                     const void*               src,
+                                     size_t                    srcSize,
+                                     const streaming_operation streaming);
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * this function must be called with valid parameters only
+ * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
+ * in which case it cannot fail.
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
+ * defined in zstd_decompress_internal.h.
+ * Internal use only.
+ */
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+                        const short*    normalizedCounter,
+                        unsigned        maxSymbolValue,
+                        const U32*      baseValue,
+                        const U8*       nbAdditionalBits,
+                        unsigned        tableLog,
+                        void*           wksp,
+                        size_t          wkspSize,
+                        int             bmi2);
+
+/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
+size_t ZSTD_decompressBlock_deprecated(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+#endif /* ZSTD_DEC_BLOCK_H */
diff --git a/src/external/decompress/zstd_decompress_internal.h b/src/external/decompress/zstd_decompress_internal.h
new file mode 100644
index 00000000..c9d0854b
--- /dev/null
+++ b/src/external/decompress/zstd_decompress_internal.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* zstd_decompress_internal:
+ * objects and definitions shared within lib/decompress modules */
+
+#ifndef ZSTD_DECOMPRESS_INTERNAL_H
+#define ZSTD_DECOMPRESS_INTERNAL_H
+
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/mem.h"           /* BYTE, U16, U32 */
+#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
+
+
+/*-*******************************************************
+ *  Constants
+ *********************************************************/
+static UNUSED_ATTR const U32 LL_base[MaxLL + 1] = {
+  0,  1,  2,    3,     4,     5,     6,     7,      8,      9,      10,     11,
+  12, 13, 14,   15,    16,    18,    20,    22,     24,     28,     32,     40,
+  48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
+
+static UNUSED_ATTR const U32 OF_base[MaxOff + 1] = {
+  0,        1,         1,         5,         0xD,       0x1D,       0x3D,       0x7D,
+  0xFD,     0x1FD,     0x3FD,     0x7FD,     0xFFD,     0x1FFD,     0x3FFD,     0x7FFD,
+  0xFFFD,   0x1FFFD,   0x3FFFD,   0x7FFFD,   0xFFFFD,   0x1FFFFD,   0x3FFFFD,   0x7FFFFD,
+  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD};
+
+static UNUSED_ATTR const U8 OF_bits[MaxOff + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                                                   11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                                                   22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
+
+static UNUSED_ATTR const U32 ML_base[MaxML + 1] = {
+  3,  4,    5,     6,     7,     8,     9,      10,     11,     12,     13,     14, 15, 16,
+  17, 18,   19,    20,    21,    22,    23,     24,     25,     26,     27,     28, 29, 30,
+  31, 32,   33,    34,    35,    37,    39,     41,     43,     47,     51,     59, 67, 83,
+  99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
+
+
+/*-*******************************************************
+ *  Decompression types
+ *********************************************************/
+typedef struct {
+    U32 fastMode;
+    U32 tableLog;
+} ZSTD_seqSymbol_header;
+
+typedef struct {
+    U16  nextState;
+    BYTE nbAdditionalBits;
+    BYTE nbBits;
+    U32  baseValue;
+} ZSTD_seqSymbol;
+
+#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
+
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE \
+    (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 \
+    ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
+#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
+
+typedef struct {
+    ZSTD_seqSymbol
+      LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
+    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(
+      OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
+    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(
+      MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+    HUF_DTable     hufTable[HUF_DTABLE_SIZE(
+      ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
+    U32            rep[ZSTD_REP_NUM];
+    U32            workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
+} ZSTD_entropyDTables_t;
+
+typedef enum {
+    ZSTDds_getFrameHeaderSize,
+    ZSTDds_decodeFrameHeader,
+    ZSTDds_decodeBlockHeader,
+    ZSTDds_decompressBlock,
+    ZSTDds_decompressLastBlock,
+    ZSTDds_checkChecksum,
+    ZSTDds_decodeSkippableHeader,
+    ZSTDds_skipFrame
+} ZSTD_dStage;
+
+typedef enum {
+    zdss_init = 0,
+    zdss_loadHeader,
+    zdss_read,
+    zdss_load,
+    zdss_flush
+} ZSTD_dStreamStage;
+
+typedef enum {
+    ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
+    ZSTD_dont_use         = 0,  /* Do not use the dictionary (if one exists free it) */
+    ZSTD_use_once         = 1   /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
+
+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
+typedef struct {
+    const ZSTD_DDict** ddictPtrTable;
+    size_t             ddictPtrTableSize;
+    size_t             ddictPtrCount;
+} ZSTD_DDictHashSet;
+
+#ifndef ZSTD_DECODER_INTERNAL_BUFFER
+    #define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
+#endif
+
+#define ZSTD_LBMIN 64
+#define ZSTD_LBMAX (128 << 10)
+
+/* extra buffer, compensates when dst is not large enough to store litBuffer */
+#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
+
+typedef enum {
+    ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
+    ZSTD_in_dst     = 1, /* Stored entirely within dst (in memory after current output write) */
+    ZSTD_split      = 2  /* Split between litExtraBuffer and dst */
+} ZSTD_litLocation_e;
+
+struct ZSTD_DCtx_s {
+    const ZSTD_seqSymbol* LLTptr;
+    const ZSTD_seqSymbol* MLTptr;
+    const ZSTD_seqSymbol* OFTptr;
+    const HUF_DTable*     HUFptr;
+    ZSTD_entropyDTables_t entropy;
+    U32
+      workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
+    const void* previousDstEnd;                     /* detect continuity */
+    const void* prefixStart;                        /* start of current segment */
+    const void*
+      virtualStart; /* virtual start of previous segment if it was just before current one */
+    const void*      dictEnd; /* end of previous segment */
+    size_t           expected;
+    ZSTD_frameHeader fParams;
+    U64              processedCSize;
+    U64              decodedSize;
+    blockType_e
+      bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
+    ZSTD_dStage   stage;
+    U32           litEntropy;
+    U32           fseEntropy;
+    XXH64_state_t xxhState;
+    size_t        headerSize;
+    ZSTD_format_e format;
+    ZSTD_forceIgnoreChecksum_e
+      forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
+    U32
+      validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
+    const BYTE*    litPtr;
+    ZSTD_customMem customMem;
+    size_t         litSize;
+    size_t         rleSize;
+    size_t         staticSize;
+    int            isFrameDecompression;
+#if DYNAMIC_BMI2 != 0
+    int
+      bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+#endif
+
+    /* dictionary */
+    ZSTD_DDict*       ddictLocal;
+    const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
+    U32               dictID;
+    int
+      ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+    ZSTD_dictUses_e    dictUses;
+    ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
+    ZSTD_refMultipleDDicts_e
+      refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
+    int disableHufAsm;
+    int maxBlockSizeParam;
+
+    /* streaming */
+    ZSTD_dStreamStage streamStage;
+    char*             inBuff;
+    size_t            inBuffSize;
+    size_t            inPos;
+    size_t            maxWindowSize;
+    char*             outBuff;
+    size_t            outBuffSize;
+    size_t            outStart;
+    size_t            outEnd;
+    size_t            lhSize;
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    void* legacyContext;
+    U32   previousLegacyVersion;
+    U32   legacyVersion;
+#endif
+    U32               hostageByte;
+    int               noForwardProgress;
+    ZSTD_bufferMode_e outBufferMode;
+    ZSTD_outBuffer    expectedOutBuffer;
+
+    /* workspace */
+    BYTE*              litBuffer;
+    const BYTE*        litBufferEnd;
+    ZSTD_litLocation_e litBufferLocation;
+    BYTE               litExtraBuffer
+      [ZSTD_LITBUFFEREXTRASIZE
+       + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+
+    size_t oversizedDuration;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    void const* dictContentBeginForFuzzing;
+    void const* dictContentEndForFuzzing;
+#endif
+
+    /* Tracing */
+#if ZSTD_TRACE
+    ZSTD_TraceCtx traceCtx;
+#endif
+}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
+
+MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s* dctx) {
+#if DYNAMIC_BMI2 != 0
+    return dctx->bmi2;
+#else
+    (void) dctx;
+    return 0;
+#endif
+}
+
+/*-*******************************************************
+ *  Shared internal functions
+ *********************************************************/
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
+size_t
+ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize);
+
+/*! ZSTD_checkContinuity() :
+ *  check if next `dst` follows previous position, where decompression ended.
+ *  If yes, do nothing (continue on current segment).
+ *  If not, classify previous segment as "external dictionary", and start a new segment.
+ *  This function cannot fail. */
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
+
+
+#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
diff --git a/src/external/miniz.h b/src/external/miniz.h
deleted file mode 100644
index c894d3dc..00000000
--- a/src/external/miniz.h
+++ /dev/null
@@ -1,10568 +0,0 @@
-#ifndef MINIZ_EXPORT
-    #define MINIZ_EXPORT
-#endif
-/* miniz.c 3.0.2 - public domain deflate/inflate, zlib-subset, ZIP
-   reading/writing/appending, PNG writing See "unlicense" statement at the end
-   of this file. Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13,
-   2013 Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951:
-   http://www.ietf.org/rfc/rfc1951.txt
-
-   Most API's defined in miniz.c are optional. For example, to disable the
-   archive related functions just define MINIZ_NO_ARCHIVE_APIS, or to get rid of
-   all stdio usage define MINIZ_NO_STDIO (see the list below for more macros).
-
-   * Low-level Deflate/Inflate implementation notes:
-
-     Compression: Use the "tdefl" API's. The compressor supports raw, static,
-   and dynamic blocks, lazy or greedy parsing, match length filtering, RLE-only,
-   and Huffman-only streams. It performs and compresses approximately as well as
-   zlib.
-
-     Decompression: Use the "tinfl" API's. The entire decompressor is
-   implemented as a single function coroutine: see tinfl_decompress(). It
-   supports decompression into a 32KB (or larger power of 2) wrapping buffer, or
-   into a memory block large enough to hold the entire file.
-
-     The low-level tdefl/tinfl API's do not make any use of dynamic memory
-   allocation.
-
-   * zlib-style API notes:
-
-     miniz.c implements a fairly large subset of zlib. There's enough
-   functionality present for it to be a drop-in zlib replacement in many apps:
-        The z_stream struct, optional memory allocation callbacks
-        deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
-        inflateInit/inflateInit2/inflate/inflateReset/inflateEnd
-        compress, compress2, compressBound, uncompress
-        CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly
-   routines. Supports raw deflate streams or standard zlib streams with adler-32
-   checking.
-
-     Limitations:
-      The callback API's are not implemented yet. No support for gzip headers or
-   zlib static dictionaries. I've tried to closely emulate zlib's various
-   flavors of stream flushing and return status codes, but there are no
-   guarantees that miniz.c pulls this off perfectly.
-
-   * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function,
-   originally written by Alex Evans. Supports 1-4 bytes/pixel images.
-
-   * ZIP archive API notes:
-
-     The ZIP archive API's where designed with simplicity and efficiency in
-   mind, with just enough abstraction to get the job done with minimal fuss.
-   There are simple API's to retrieve file information, read files from existing
-   archives, create new archives, append new files to existing archives, or
-   clone archive data from one archive to another. It supports archives located
-   in memory or the heap, on disk (using stdio.h), or you can specify custom
-   file read/write callbacks.
-
-     - Archive reading: Just call this function to read a single file from a
-   disk archive:
-
-      void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const
-   char *pArchive_name, size_t *pSize, mz_uint zip_flags);
-
-     For more complex cases, use the "mz_zip_reader" functions. Upon opening an
-   archive, the entire central directory is located and read as-is into memory,
-   and subsequent file access only occurs when reading individual files.
-
-     - Archives file scanning: The simple way is to use this function to scan a
-   loaded archive for a specific file:
-
-     int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName,
-   const char *pComment, mz_uint flags);
-
-     The locate operation can optionally check file comments too, which (as one
-   example) can be used to identify multiple versions of the same file in an
-   archive. This function uses a simple linear search through the central
-     directory, so it's not very fast.
-
-     Alternately, you can iterate through all the files in an archive (using
-   mz_zip_reader_get_num_files()) and retrieve detailed info on each file by
-   calling mz_zip_reader_file_stat().
-
-     - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer
-   immediately writes compressed file data to disk and builds an exact image of
-   the central directory in memory. The central directory image is written all
-   at once at the end of the archive file when the archive is finalized.
-
-     The archive writer can optionally align each file's local header and file
-   data to any power of 2 alignment, which can be useful when the archive will
-   be read from optical media. Also, the writer supports placing arbitrary data
-   blobs at the very beginning of ZIP archives. Archives written using either
-   feature are still readable by any ZIP tool.
-
-     - Archive appending: The simple way to add a single file to an archive is
-   to call this function:
-
-      mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename,
-   const char *pArchive_name, const void *pBuf, size_t buf_size, const void
-   *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
-
-     The archive will be created if it doesn't already exist, otherwise it'll be
-   appended to. Note the appending is done in-place and is not an atomic
-   operation, so if something goes wrong during the operation it's possible the
-   archive could be left without a central directory (although the local file
-   headers and file data will be fine, so the archive will be recoverable).
-
-     For more complex archive modification scenarios:
-     1. The safest way is to use a mz_zip_reader to read the existing archive,
-   cloning only those bits you want to preserve into a new archive using using
-   the mz_zip_writer_add_from_zip_reader() function (which compiles the
-     compressed file data as-is). When you're done, delete the old archive and
-   rename the newly written archive, and you're done. This is safe but requires
-   a bunch of temporary disk space or heap memory.
-
-     2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using
-   mz_zip_writer_init_from_reader(), append new files as needed, then finalize
-   the archive which will write an updated central directory to the original
-   archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place()
-   does.) There's a possibility that the archive's central directory could be
-   lost with this method if anything goes wrong, though.
-
-     - ZIP archive support limitations:
-     No spanning support. Extraction functions can only handle unencrypted,
-   stored or deflated files. Requires streams capable of seeking.
-
-   * This is a header file library, like stb_image.c. To get only a header file,
-   either cut and paste the below header, or create miniz.h, #define
-   MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it.
-
-   * Important: For best perf. be sure to customize the below macros for your
-   target platform: #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 #define
-   MINIZ_LITTLE_ENDIAN 1 #define MINIZ_HAS_64BIT_REGISTERS 1
-
-   * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before
-   including miniz.c to ensure miniz uses the 64-bit variants: fopen64(),
-   stat64(), etc. Otherwise you won't be able to process large files (i.e.
-   32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
-*/
-#pragma once
-
-/* Defines to completely disable specific portions of miniz.c:
-   If all macros here are defined the only functionality remaining will be
-   CRC-32 and adler-32. */
-
-/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on
- * stdio for file I/O. */
-/*#define MINIZ_NO_STDIO */
-
-/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able
- * to get the current time, or */
-/* get/set file times, and the C run-time funcs that get/set times won't be
- * called. */
-/* The current downside is the times written to your archives will be from 1979.
- */
-/*#define MINIZ_NO_TIME */
-
-/* Define MINIZ_NO_DEFLATE_APIS to disable all compression API's. */
-/*#define MINIZ_NO_DEFLATE_APIS */
-
-/* Define MINIZ_NO_INFLATE_APIS to disable all decompression API's. */
-/*#define MINIZ_NO_INFLATE_APIS */
-
-/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */
-/*#define MINIZ_NO_ARCHIVE_APIS */
-
-/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP
- * archive API's. */
-/*#define MINIZ_NO_ARCHIVE_WRITING_APIS */
-
-/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression
- * API's. */
-/*#define MINIZ_NO_ZLIB_APIS */
-
-/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent
- * conflicts against stock zlib. */
-/*#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES */
-
-/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc.
-   Note if MINIZ_NO_MALLOC is defined then the user must always provide custom
-   user alloc/free/realloc callbacks to the zlib and archive API's, and a few
-   stand-alone helper API's which don't provide custom user functions (such as
-   tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work.
- */
-/*#define MINIZ_NO_MALLOC */
-
-#ifdef MINIZ_NO_INFLATE_APIS
-    #define MINIZ_NO_ARCHIVE_APIS
-#endif
-
-#ifdef MINIZ_NO_DEFLATE_APIS
-    #define MINIZ_NO_ARCHIVE_WRITING_APIS
-#endif
-
-#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
-    /* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc
- * on Linux */
-    #define MINIZ_NO_TIME
-#endif
-
-#include <stddef.h>
-
-#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
-    #include <time.h>
-#endif
-
-#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) \
-  || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) \
-  || defined(__x86_64__)
-    /* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */
-    #define MINIZ_X86_OR_X64_CPU 1
-#else
-    #define MINIZ_X86_OR_X64_CPU 0
-#endif
-
-/* Set MINIZ_LITTLE_ENDIAN only if not set */
-#if !defined(MINIZ_LITTLE_ENDIAN)
-    #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)
-
-        #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-            /* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */
-            #define MINIZ_LITTLE_ENDIAN 1
-        #else
-            #define MINIZ_LITTLE_ENDIAN 0
-        #endif
-
-    #else
-
-        #if MINIZ_X86_OR_X64_CPU
-            #define MINIZ_LITTLE_ENDIAN 1
-        #else
-            #define MINIZ_LITTLE_ENDIAN 0
-        #endif
-
-    #endif
-#endif
-
-/* Using unaligned loads and stores causes errors when using UBSan */
-#if defined(__has_feature)
-    #if __has_feature(undefined_behavior_sanitizer)
-        #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
-    #endif
-#endif
-
-/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES only if not set */
-#if !defined(MINIZ_USE_UNALIGNED_LOADS_AND_STORES)
-    #if MINIZ_X86_OR_X64_CPU
-        /* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient
- * integer loads and stores from unaligned addresses. */
-        #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
-        #define MINIZ_UNALIGNED_USE_MEMCPY
-    #else
-        #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
-    #endif
-#endif
-
-#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) \
-  || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
-    /* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are
- * reasonably fast (and don't involve compiler generated calls to helper
- * functions). */
-    #define MINIZ_HAS_64BIT_REGISTERS 1
-#else
-    #define MINIZ_HAS_64BIT_REGISTERS 0
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* ------------------- zlib-style API Definitions. */
-
-/* For more compatibility with zlib, miniz.c uses unsigned long for some
- * parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */
-typedef unsigned long mz_ulong;
-
-/* mz_free() internally uses the MZ_FREE() macro (which by default calls free()
- * unless you've modified the MZ_MALLOC macro) to release a block allocated from
- * the heap. */
-MINIZ_EXPORT void mz_free(void* p);
-
-#define MZ_ADLER32_INIT (1)
-/* mz_adler32() returns the initial adler-32 value to use when called with
- * ptr==NULL. */
-MINIZ_EXPORT mz_ulong mz_adler32(mz_ulong adler, const unsigned char* ptr, size_t buf_len);
-
-#define MZ_CRC32_INIT (0)
-/* mz_crc32() returns the initial CRC-32 value to use when called with
- * ptr==NULL. */
-MINIZ_EXPORT mz_ulong mz_crc32(mz_ulong crc, const unsigned char* ptr, size_t buf_len);
-
-/* Compression strategies. */
-enum {
-    MZ_DEFAULT_STRATEGY = 0,
-    MZ_FILTERED         = 1,
-    MZ_HUFFMAN_ONLY     = 2,
-    MZ_RLE              = 3,
-    MZ_FIXED            = 4
-};
-
-/* Method */
-#define MZ_DEFLATED 8
-
-/* Heap allocation callbacks.
-Note that mz_alloc_func parameter types purposely differ from zlib's: items/size
-is size_t, not unsigned long. */
-typedef void* (*mz_alloc_func)(void* opaque, size_t items, size_t size);
-typedef void (*mz_free_func)(void* opaque, void* address);
-typedef void* (*mz_realloc_func)(void* opaque, void* address, size_t items, size_t size);
-
-/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best
- * possible compression (not zlib compatible, and may be very slow),
- * MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */
-enum {
-    MZ_NO_COMPRESSION      = 0,
-    MZ_BEST_SPEED          = 1,
-    MZ_BEST_COMPRESSION    = 9,
-    MZ_UBER_COMPRESSION    = 10,
-    MZ_DEFAULT_LEVEL       = 6,
-    MZ_DEFAULT_COMPRESSION = -1
-};
-
-#define MZ_VERSION "11.0.2"
-#define MZ_VERNUM 0xB002
-#define MZ_VER_MAJOR 11
-#define MZ_VER_MINOR 2
-#define MZ_VER_REVISION 0
-#define MZ_VER_SUBREVISION 0
-
-#ifndef MINIZ_NO_ZLIB_APIS
-
-/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The
- * other values are for advanced use (refer to the zlib docs). */
-enum {
-    MZ_NO_FLUSH      = 0,
-    MZ_PARTIAL_FLUSH = 1,
-    MZ_SYNC_FLUSH    = 2,
-    MZ_FULL_FLUSH    = 3,
-    MZ_FINISH        = 4,
-    MZ_BLOCK         = 5
-};
-
-/* Return status codes. MZ_PARAM_ERROR is non-standard. */
-enum {
-    MZ_OK            = 0,
-    MZ_STREAM_END    = 1,
-    MZ_NEED_DICT     = 2,
-    MZ_ERRNO         = -1,
-    MZ_STREAM_ERROR  = -2,
-    MZ_DATA_ERROR    = -3,
-    MZ_MEM_ERROR     = -4,
-    MZ_BUF_ERROR     = -5,
-    MZ_VERSION_ERROR = -6,
-    MZ_PARAM_ERROR   = -10000
-};
-
-    /* Window bits */
-    #define MZ_DEFAULT_WINDOW_BITS 15
-
-struct mz_internal_state;
-
-/* Compression/decompression stream struct. */
-typedef struct mz_stream_s {
-    const unsigned char* next_in;  /* pointer to next byte to read */
-    unsigned int         avail_in; /* number of bytes available at next_in */
-    mz_ulong             total_in; /* total number of bytes consumed so far */
-
-    unsigned char* next_out;  /* pointer to next byte to write */
-    unsigned int   avail_out; /* number of bytes that can be written to next_out */
-    mz_ulong       total_out; /* total number of bytes produced so far */
-
-    char*                     msg;   /* error msg (unused) */
-    struct mz_internal_state* state; /* internal state, allocated by zalloc/zfree */
-
-    mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */
-    mz_free_func  zfree;  /* optional heap free function (defaults to free) */
-    void*         opaque; /* heap alloc function user pointer */
-
-    int      data_type; /* data_type (unused) */
-    mz_ulong adler;     /* adler32 of the source or uncompressed data */
-    mz_ulong reserved;  /* not used */
-} mz_stream;
-
-typedef mz_stream* mz_streamp;
-
-/* Returns the version string of miniz.c. */
-MINIZ_EXPORT const char* mz_version(void);
-
-    #ifndef MINIZ_NO_DEFLATE_APIS
-
-/* mz_deflateInit() initializes a compressor with default options: */
-/* Parameters: */
-/*  pStream must point to an initialized mz_stream struct. */
-/*  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */
-/*  level 1 enables a specially optimized compression function that's been
- * optimized purely for performance, not ratio. */
-/*  (This special func. is currently only enabled when
- * MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */
-/* Return values: */
-/*  MZ_OK on success. */
-/*  MZ_STREAM_ERROR if the stream is bogus. */
-/*  MZ_PARAM_ERROR if the input parameters are bogus. */
-/*  MZ_MEM_ERROR on out of memory. */
-MINIZ_EXPORT int mz_deflateInit(mz_streamp pStream, int level);
-
-/* mz_deflateInit2() is like mz_deflate(), except with more control: */
-/* Additional parameters: */
-/*   method must be MZ_DEFLATED */
-/*   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with
- * zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no
- * header or footer) */
-/*   mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */
-MINIZ_EXPORT int mz_deflateInit2(
-  mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
-
-/* Quickly resets a compressor without having to reallocate anything. Same as
- * calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */
-MINIZ_EXPORT int mz_deflateReset(mz_streamp pStream);
-
-/* mz_deflate() compresses the input to output, consuming as much of the input
- * and producing as much output as possible. */
-/* Parameters: */
-/*   pStream is the stream to read from and write to. You must initialize/update
- * the next_in, avail_in, next_out, and avail_out members. */
-/*   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or
- * MZ_FINISH. */
-/* Return values: */
-/*   MZ_OK on success (when flushing, or if more input is needed but not
- * available, and/or there's more output to be written but the output buffer is
- * full). */
-/*   MZ_STREAM_END if all input has been consumed and all output bytes have been
- * written. Don't call mz_deflate() on the stream anymore. */
-/*   MZ_STREAM_ERROR if the stream is bogus. */
-/*   MZ_PARAM_ERROR if one of the parameters is invalid. */
-/*   MZ_BUF_ERROR if no forward progress is possible because the input and/or
- * output buffers are empty. (Fill up the input buffer or free up some output
- * space and try again.) */
-MINIZ_EXPORT int mz_deflate(mz_streamp pStream, int flush);
-
-/* mz_deflateEnd() deinitializes a compressor: */
-/* Return values: */
-/*  MZ_OK on success. */
-/*  MZ_STREAM_ERROR if the stream is bogus. */
-MINIZ_EXPORT int mz_deflateEnd(mz_streamp pStream);
-
-/* mz_deflateBound() returns a (very) conservative upper bound on the amount of
- * data that could be generated by deflate(), assuming flush is set to only
- * MZ_NO_FLUSH or MZ_FINISH. */
-MINIZ_EXPORT mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
-
-/* Single-call compression functions mz_compress() and mz_compress2(): */
-/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on
- * failure. */
-MINIZ_EXPORT int mz_compress(unsigned char*       pDest,
-                             mz_ulong*            pDest_len,
-                             const unsigned char* pSource,
-                             mz_ulong             source_len);
-MINIZ_EXPORT int mz_compress2(unsigned char*       pDest,
-                              mz_ulong*            pDest_len,
-                              const unsigned char* pSource,
-                              mz_ulong             source_len,
-                              int                  level);
-
-/* mz_compressBound() returns a (very) conservative upper bound on the amount of
- * data that could be generated by calling mz_compress(). */
-MINIZ_EXPORT mz_ulong mz_compressBound(mz_ulong source_len);
-
-    #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/
-
-    #ifndef MINIZ_NO_INFLATE_APIS
-
-/* Initializes a decompressor. */
-MINIZ_EXPORT int mz_inflateInit(mz_streamp pStream);
-
-/* mz_inflateInit2() is like mz_inflateInit() with an additional option that
- * controls the window size and whether or not the stream has been wrapped with
- * a zlib header/footer: */
-/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or
- * -MZ_DEFAULT_WINDOW_BITS (raw deflate). */
-MINIZ_EXPORT int mz_inflateInit2(mz_streamp pStream, int window_bits);
-
-/* Quickly resets a compressor without having to reallocate anything. Same as
- * calling mz_inflateEnd() followed by mz_inflateInit()/mz_inflateInit2(). */
-MINIZ_EXPORT int mz_inflateReset(mz_streamp pStream);
-
-/* Decompresses the input stream to the output, consuming only as much of the
- * input as needed, and writing as much to the output as possible. */
-/* Parameters: */
-/*   pStream is the stream to read from and write to. You must initialize/update
- * the next_in, avail_in, next_out, and avail_out members. */
-/*   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */
-/*   On the first call, if flush is MZ_FINISH it's assumed the input and output
- * buffers are both sized large enough to decompress the entire stream in a
- * single call (this is slightly faster). */
-/*   MZ_FINISH implies that there are no more source bytes available beside
- * what's already in the input buffer, and that the output buffer is large
- * enough to hold the rest of the decompressed data. */
-/* Return values: */
-/*   MZ_OK on success. Either more input is needed but not available, and/or
- * there's more output to be written but the output buffer is full. */
-/*   MZ_STREAM_END if all needed input has been consumed and all output bytes
- * have been written. For zlib streams, the adler-32 of the decompressed data
- * has also been verified. */
-/*   MZ_STREAM_ERROR if the stream is bogus. */
-/*   MZ_DATA_ERROR if the deflate stream is invalid. */
-/*   MZ_PARAM_ERROR if one of the parameters is invalid. */
-/*   MZ_BUF_ERROR if no forward progress is possible because the input buffer is
- * empty but the inflater needs more input to continue, or if the output buffer
- * is not large enough. Call mz_inflate() again */
-/*   with more input data, or with more room in the output buffer (except when
- * using single call decompression, described above). */
-MINIZ_EXPORT int mz_inflate(mz_streamp pStream, int flush);
-
-/* Deinitializes a decompressor. */
-MINIZ_EXPORT int mz_inflateEnd(mz_streamp pStream);
-
-/* Single-call decompression. */
-/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on
- * failure. */
-MINIZ_EXPORT int mz_uncompress(unsigned char*       pDest,
-                               mz_ulong*            pDest_len,
-                               const unsigned char* pSource,
-                               mz_ulong             source_len);
-MINIZ_EXPORT int mz_uncompress2(unsigned char*       pDest,
-                                mz_ulong*            pDest_len,
-                                const unsigned char* pSource,
-                                mz_ulong*            pSource_len);
-    #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/
-
-/* Returns a string description of the specified error code, or NULL if the
- * error code is invalid. */
-MINIZ_EXPORT const char* mz_error(int err);
-
-    /* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used
- * as a drop-in replacement for the subset of zlib that miniz.c supports. */
-    /* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you
- * use zlib in the same project. */
-    #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
-typedef unsigned char Byte;
-typedef unsigned int  uInt;
-typedef mz_ulong      uLong;
-typedef Byte          Bytef;
-typedef uInt          uIntf;
-typedef char          charf;
-typedef int           intf;
-typedef void*         voidpf;
-typedef uLong         uLongf;
-typedef void*         voidp;
-typedef void* const   voidpc;
-        #define Z_NULL 0
-        #define Z_NO_FLUSH MZ_NO_FLUSH
-        #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
-        #define Z_SYNC_FLUSH MZ_SYNC_FLUSH
-        #define Z_FULL_FLUSH MZ_FULL_FLUSH
-        #define Z_FINISH MZ_FINISH
-        #define Z_BLOCK MZ_BLOCK
-        #define Z_OK MZ_OK
-        #define Z_STREAM_END MZ_STREAM_END
-        #define Z_NEED_DICT MZ_NEED_DICT
-        #define Z_ERRNO MZ_ERRNO
-        #define Z_STREAM_ERROR MZ_STREAM_ERROR
-        #define Z_DATA_ERROR MZ_DATA_ERROR
-        #define Z_MEM_ERROR MZ_MEM_ERROR
-        #define Z_BUF_ERROR MZ_BUF_ERROR
-        #define Z_VERSION_ERROR MZ_VERSION_ERROR
-        #define Z_PARAM_ERROR MZ_PARAM_ERROR
-        #define Z_NO_COMPRESSION MZ_NO_COMPRESSION
-        #define Z_BEST_SPEED MZ_BEST_SPEED
-        #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
-        #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
-        #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
-        #define Z_FILTERED MZ_FILTERED
-        #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
-        #define Z_RLE MZ_RLE
-        #define Z_FIXED MZ_FIXED
-        #define Z_DEFLATED MZ_DEFLATED
-        #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
-        #define alloc_func mz_alloc_func
-        #define free_func mz_free_func
-        #define internal_state mz_internal_state
-        #define z_stream mz_stream
-
-        #ifndef MINIZ_NO_DEFLATE_APIS
-            #define deflateInit mz_deflateInit
-            #define deflateInit2 mz_deflateInit2
-            #define deflateReset mz_deflateReset
-            #define deflate mz_deflate
-            #define deflateEnd mz_deflateEnd
-            #define deflateBound mz_deflateBound
-            #define compress mz_compress
-            #define compress2 mz_compress2
-            #define compressBound mz_compressBound
-        #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/
-
-        #ifndef MINIZ_NO_INFLATE_APIS
-            #define inflateInit mz_inflateInit
-            #define inflateInit2 mz_inflateInit2
-            #define inflateReset mz_inflateReset
-            #define inflate mz_inflate
-            #define inflateEnd mz_inflateEnd
-            #define uncompress mz_uncompress
-            #define uncompress2 mz_uncompress2
-        #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/
-
-        #define crc32 mz_crc32
-        #define adler32 mz_adler32
-        #define MAX_WBITS 15
-        #define MAX_MEM_LEVEL 9
-        #define zError mz_error
-        #define ZLIB_VERSION MZ_VERSION
-        #define ZLIB_VERNUM MZ_VERNUM
-        #define ZLIB_VER_MAJOR MZ_VER_MAJOR
-        #define ZLIB_VER_MINOR MZ_VER_MINOR
-        #define ZLIB_VER_REVISION MZ_VER_REVISION
-        #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
-        #define zlibVersion mz_version
-        #define zlib_version mz_version()
-    #endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */
-
-#endif /* MINIZ_NO_ZLIB_APIS */
-
-#ifdef __cplusplus
-}
-#endif
-
-#pragma once
-#include <assert.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* ------------------- Types and macros */
-typedef unsigned char  mz_uint8;
-typedef signed short   mz_int16;
-typedef unsigned short mz_uint16;
-typedef unsigned int   mz_uint32;
-typedef unsigned int   mz_uint;
-typedef int64_t        mz_int64;
-typedef uint64_t       mz_uint64;
-typedef int            mz_bool;
-
-#define MZ_FALSE (0)
-#define MZ_TRUE (1)
-
-/* Works around MSVC's spammy "warning C4127: conditional expression is
- * constant" message. */
-#ifdef _MSC_VER
-    #define MZ_MACRO_END while (0, 0)
-#else
-    #define MZ_MACRO_END while (0)
-#endif
-
-#ifdef MINIZ_NO_STDIO
-    #define MZ_FILE void*
-#else
-    #include <stdio.h>
-    #define MZ_FILE FILE
-#endif /* #ifdef MINIZ_NO_STDIO */
-
-#ifdef MINIZ_NO_TIME
-typedef struct mz_dummy_time_t_tag {
-    mz_uint32 m_dummy1;
-    mz_uint32 m_dummy2;
-} mz_dummy_time_t;
-    #define MZ_TIME_T mz_dummy_time_t
-#else
-    #define MZ_TIME_T time_t
-#endif
-
-#define MZ_ASSERT(x) assert(x)
-
-#ifdef MINIZ_NO_MALLOC
-    #define MZ_MALLOC(x) NULL
-    #define MZ_FREE(x) (void) x, ((void) 0)
-    #define MZ_REALLOC(p, x) NULL
-#else
-    #define MZ_MALLOC(x) malloc(x)
-    #define MZ_FREE(x) free(x)
-    #define MZ_REALLOC(p, x) realloc(p, x)
-#endif
-
-#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b))
-#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b))
-#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
-#define MZ_CLEAR_ARR(obj) memset((obj), 0, sizeof(obj))
-#define MZ_CLEAR_PTR(obj) memset((obj), 0, sizeof(*obj))
-
-#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-    #define MZ_READ_LE16(p) *((const mz_uint16*) (p))
-    #define MZ_READ_LE32(p) *((const mz_uint32*) (p))
-#else
-    #define MZ_READ_LE16(p) \
-        ((mz_uint32) (((const mz_uint8*) (p))[0]) \
-         | ((mz_uint32) (((const mz_uint8*) (p))[1]) << 8U))
-    #define MZ_READ_LE32(p) \
-        ((mz_uint32) (((const mz_uint8*) (p))[0]) \
-         | ((mz_uint32) (((const mz_uint8*) (p))[1]) << 8U) \
-         | ((mz_uint32) (((const mz_uint8*) (p))[2]) << 16U) \
-         | ((mz_uint32) (((const mz_uint8*) (p))[3]) << 24U))
-#endif
-
-#define MZ_READ_LE64(p) \
-    (((mz_uint64) MZ_READ_LE32(p)) \
-     | (((mz_uint64) MZ_READ_LE32((const mz_uint8*) (p) + sizeof(mz_uint32))) << 32U))
-
-#ifdef _MSC_VER
-    #define MZ_FORCEINLINE __forceinline
-#elif defined(__GNUC__)
-    #define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__))
-#else
-    #define MZ_FORCEINLINE inline
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern MINIZ_EXPORT void* miniz_def_alloc_func(void* opaque, size_t items, size_t size);
-extern MINIZ_EXPORT void  miniz_def_free_func(void* opaque, void* address);
-extern MINIZ_EXPORT void*
-miniz_def_realloc_func(void* opaque, void* address, size_t items, size_t size);
-
-#define MZ_UINT16_MAX (0xFFFFU)
-#define MZ_UINT32_MAX (0xFFFFFFFFU)
-
-#ifdef __cplusplus
-}
-#endif
-#pragma once
-
-#ifndef MINIZ_NO_DEFLATE_APIS
-
-    #ifdef __cplusplus
-extern "C" {
-    #endif
-    /* ------------------- Low-level Compression API Definitions */
-
-    /* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly
- * slower, and raw/dynamic blocks will be output more frequently). */
-    #ifndef TDEFL_LESS_MEMORY
-        #define TDEFL_LESS_MEMORY 0
-    #endif
-
-/* tdefl_init() compression flags logically OR'd together (low 12 bits contain
- * the max. number of probes per dictionary search): */
-/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes
- * per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap
- * compression), 4095=Huffman+LZ (slowest/best compression). */
-enum {
-    TDEFL_HUFFMAN_ONLY       = 0,
-    TDEFL_DEFAULT_MAX_PROBES = 128,
-    TDEFL_MAX_PROBES_MASK    = 0xFFF
-};
-
-/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before
- * the deflate data, and the Adler-32 of the source data at the end. Otherwise,
- * you'll get raw deflate data. */
-/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even
- * when not writing zlib headers). */
-/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more
- * efficient lazy parsing. */
-/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's
- * initialization time to the minimum, but the output may vary from run to run
- * given the same input (depending on the contents of memory). */
-/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
- */
-/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */
-/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */
-/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */
-/* The low 12 bits are reserved to control the max # of hash probes per
- * dictionary lookup (see TDEFL_MAX_PROBES_MASK). */
-enum {
-    TDEFL_WRITE_ZLIB_HEADER             = 0x01000,
-    TDEFL_COMPUTE_ADLER32               = 0x02000,
-    TDEFL_GREEDY_PARSING_FLAG           = 0x04000,
-    TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
-    TDEFL_RLE_MATCHES                   = 0x10000,
-    TDEFL_FILTER_MATCHES                = 0x20000,
-    TDEFL_FORCE_ALL_STATIC_BLOCKS       = 0x40000,
-    TDEFL_FORCE_ALL_RAW_BLOCKS          = 0x80000
-};
-
-/* High level compression functions: */
-/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block
- * allocated via malloc(). */
-/* On entry: */
-/*  pSrc_buf, src_buf_len: Pointer and size of source block to compress. */
-/*  flags: The max match finder probes (default is 128) logically OR'd against
- * the above flags. Higher probes are slower but improve compression. */
-/* On return: */
-/*  Function returns a pointer to the compressed data, or NULL on failure. */
-/*  *pOut_len will be set to the compressed data's size, which could be larger
- * than src_buf_len on uncompressible data. */
-/*  The caller must free() the returned block when it's no longer needed. */
-MINIZ_EXPORT void*
-tdefl_compress_mem_to_heap(const void* pSrc_buf, size_t src_buf_len, size_t* pOut_len, int flags);
-
-/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in
- * memory. */
-/* Returns 0 on failure. */
-MINIZ_EXPORT size_t tdefl_compress_mem_to_mem(
-  void* pOut_buf, size_t out_buf_len, const void* pSrc_buf, size_t src_buf_len, int flags);
-
-/* Compresses an image to a compressed PNG file in memory. */
-/* On entry: */
-/*  pImage, w, h, and num_chans describe the image to compress. num_chans may be
- * 1, 2, 3, or 4. */
-/*  The image pitch in bytes per scanline will be w*num_chans. The leftmost
- * pixel on the top scanline is stored first in memory. */
-/*  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED,
- * MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */
-/*  If flip is true, the image will be flipped on the Y axis (useful for OpenGL
- * apps). */
-/* On return: */
-/*  Function returns a pointer to the compressed data, or NULL on failure. */
-/*  *pLen_out will be set to the size of the PNG image file. */
-/*  The caller must mz_free() the returned heap block (which will typically be
- * larger than *pLen_out) when it's no longer needed. */
-MINIZ_EXPORT void* tdefl_write_image_to_png_file_in_memory_ex(
-  const void* pImage, int w, int h, int num_chans, size_t* pLen_out, mz_uint level, mz_bool flip);
-MINIZ_EXPORT void* tdefl_write_image_to_png_file_in_memory(
-  const void* pImage, int w, int h, int num_chans, size_t* pLen_out);
-
-/* Output stream interface. The compressor uses this interface to write
- * compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */
-typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void* pUser);
-
-/* tdefl_compress_mem_to_output() compresses a block to an output stream. The
- * above helpers use this function internally. */
-MINIZ_EXPORT mz_bool tdefl_compress_mem_to_output(const void*            pBuf,
-                                                  size_t                 buf_len,
-                                                  tdefl_put_buf_func_ptr pPut_buf_func,
-                                                  void*                  pPut_buf_user,
-                                                  int                    flags);
-
-enum {
-    TDEFL_MAX_HUFF_TABLES    = 3,
-    TDEFL_MAX_HUFF_SYMBOLS_0 = 288,
-    TDEFL_MAX_HUFF_SYMBOLS_1 = 32,
-    TDEFL_MAX_HUFF_SYMBOLS_2 = 19,
-    TDEFL_LZ_DICT_SIZE       = 32768,
-    TDEFL_LZ_DICT_SIZE_MASK  = TDEFL_LZ_DICT_SIZE - 1,
-    TDEFL_MIN_MATCH_LEN      = 3,
-    TDEFL_MAX_MATCH_LEN      = 258
-};
-
-    /* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed
- * output block (using static/fixed Huffman codes). */
-    #if TDEFL_LESS_MEMORY
-enum {
-    TDEFL_LZ_CODE_BUF_SIZE      = 24 * 1024,
-    TDEFL_OUT_BUF_SIZE          = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
-    TDEFL_MAX_HUFF_SYMBOLS      = 288,
-    TDEFL_LZ_HASH_BITS          = 12,
-    TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
-    TDEFL_LZ_HASH_SHIFT         = (TDEFL_LZ_HASH_BITS + 2) / 3,
-    TDEFL_LZ_HASH_SIZE          = 1 << TDEFL_LZ_HASH_BITS
-};
-    #else
-enum {
-    TDEFL_LZ_CODE_BUF_SIZE      = 64 * 1024,
-    TDEFL_OUT_BUF_SIZE          = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
-    TDEFL_MAX_HUFF_SYMBOLS      = 288,
-    TDEFL_LZ_HASH_BITS          = 15,
-    TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
-    TDEFL_LZ_HASH_SHIFT         = (TDEFL_LZ_HASH_BITS + 2) / 3,
-    TDEFL_LZ_HASH_SIZE          = 1 << TDEFL_LZ_HASH_BITS
-};
-    #endif
-
-/* The low-level tdefl functions below may be used directly if the above helper
- * functions aren't flexible enough. The low-level functions don't make any heap
- * allocations, unlike the above helper functions. */
-typedef enum {
-    TDEFL_STATUS_BAD_PARAM      = -2,
-    TDEFL_STATUS_PUT_BUF_FAILED = -1,
-    TDEFL_STATUS_OKAY           = 0,
-    TDEFL_STATUS_DONE           = 1
-} tdefl_status;
-
-/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */
-typedef enum {
-    TDEFL_NO_FLUSH   = 0,
-    TDEFL_SYNC_FLUSH = 2,
-    TDEFL_FULL_FLUSH = 3,
-    TDEFL_FINISH     = 4
-} tdefl_flush;
-
-/* tdefl's compression state structure. */
-typedef struct {
-    tdefl_put_buf_func_ptr m_pPut_buf_func;
-    void*                  m_pPut_buf_user;
-    mz_uint                m_flags, m_max_probes[2];
-    int                    m_greedy_parsing;
-    mz_uint                m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
-    mz_uint8 *             m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
-    mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
-    mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs,
-      m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
-    tdefl_status    m_prev_return_status;
-    const void*     m_pIn_buf;
-    void*           m_pOut_buf;
-    size_t *        m_pIn_buf_size, *m_pOut_buf_size;
-    tdefl_flush     m_flush;
-    const mz_uint8* m_pSrc;
-    size_t          m_src_buf_left, m_out_buf_ofs;
-    mz_uint8        m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
-    mz_uint16       m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-    mz_uint16       m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-    mz_uint8        m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
-    mz_uint8        m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
-    mz_uint16       m_next[TDEFL_LZ_DICT_SIZE];
-    mz_uint16       m_hash[TDEFL_LZ_HASH_SIZE];
-    mz_uint8        m_output_buf[TDEFL_OUT_BUF_SIZE];
-} tdefl_compressor;
-
-/* Initializes the compressor. */
-/* There is no corresponding deinit() function because the tdefl API's do not
- * dynamically allocate memory. */
-/* pBut_buf_func: If NULL, output data will be supplied to the specified
- * callback. In this case, the user should call the tdefl_compress_buffer() API
- * for compression. */
-/* If pBut_buf_func is NULL the user should always call the tdefl_compress()
- * API. */
-/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER,
- * etc.) */
-MINIZ_EXPORT tdefl_status tdefl_init(tdefl_compressor*      d,
-                                     tdefl_put_buf_func_ptr pPut_buf_func,
-                                     void*                  pPut_buf_user,
-                                     int                    flags);
-
-/* Compresses a block of data, consuming as much of the specified input buffer
- * as possible, and writing as much compressed data to the specified output
- * buffer as possible. */
-MINIZ_EXPORT tdefl_status tdefl_compress(tdefl_compressor* d,
-                                         const void*       pIn_buf,
-                                         size_t*           pIn_buf_size,
-                                         void*             pOut_buf,
-                                         size_t*           pOut_buf_size,
-                                         tdefl_flush       flush);
-
-/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a
- * non-NULL tdefl_put_buf_func_ptr. */
-/* tdefl_compress_buffer() always consumes the entire input buffer. */
-MINIZ_EXPORT tdefl_status tdefl_compress_buffer(tdefl_compressor* d,
-                                                const void*       pIn_buf,
-                                                size_t            in_buf_size,
-                                                tdefl_flush       flush);
-
-MINIZ_EXPORT tdefl_status tdefl_get_prev_return_status(tdefl_compressor* d);
-MINIZ_EXPORT mz_uint32    tdefl_get_adler32(tdefl_compressor* d);
-
-/* Create tdefl_compress() flags given zlib-style compression parameters. */
-/* level may range from [0,10] (where 10 is absolute max compression, but may be
- * much slower on some files) */
-/* window_bits may be -15 (raw deflate) or 15 (zlib) */
-/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY,
- * MZ_RLE, or MZ_FIXED */
-MINIZ_EXPORT mz_uint tdefl_create_comp_flags_from_zip_params(int level,
-                                                             int window_bits,
-                                                             int strategy);
-
-    #ifndef MINIZ_NO_MALLOC
-/* Allocate the tdefl_compressor structure in C so that */
-/* non-C language bindings to tdefl_ API don't need to worry about */
-/* structure size and allocation mechanism. */
-MINIZ_EXPORT tdefl_compressor* tdefl_compressor_alloc(void);
-MINIZ_EXPORT void              tdefl_compressor_free(tdefl_compressor* pComp);
-    #endif
-
-    #ifdef __cplusplus
-}
-    #endif
-
-#endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/
-#pragma once
-
-/* ------------------- Low-level Decompression API Definitions */
-
-#ifndef MINIZ_NO_INFLATE_APIS
-
-    #ifdef __cplusplus
-extern "C" {
-    #endif
-/* Decompression flags used by tinfl_decompress(). */
-/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and
- * ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the
- * input is a raw deflate stream. */
-/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available
- * beyond the end of the supplied input buffer. If clear, the input buffer
- * contains all remaining input. */
-/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large
- * enough to hold the entire decompressed stream. If clear, the output buffer is
- * at least the size of the dictionary (typically 32KB). */
-/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the
- * decompressed bytes. */
-enum {
-    TINFL_FLAG_PARSE_ZLIB_HEADER             = 1,
-    TINFL_FLAG_HAS_MORE_INPUT                = 2,
-    TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
-    TINFL_FLAG_COMPUTE_ADLER32               = 8
-};
-
-/* High level decompression functions: */
-/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block
- * allocated via malloc(). */
-/* On entry: */
-/*  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data
- * to decompress. */
-/* On return: */
-/*  Function returns a pointer to the decompressed data, or NULL on failure. */
-/*  *pOut_len will be set to the decompressed data's size, which could be larger
- * than src_buf_len on uncompressible data. */
-/*  The caller must call mz_free() on the returned block when it's no longer
- * needed. */
-MINIZ_EXPORT void*
-tinfl_decompress_mem_to_heap(const void* pSrc_buf, size_t src_buf_len, size_t* pOut_len, int flags);
-
-    /* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block
- * in memory. */
-    /* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes
- * written on success. */
-    #define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t) (-1))
-MINIZ_EXPORT size_t tinfl_decompress_mem_to_mem(
-  void* pOut_buf, size_t out_buf_len, const void* pSrc_buf, size_t src_buf_len, int flags);
-
-/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an
- * internal 32KB buffer, and a user provided callback function will be called to
- * flush the buffer. */
-/* Returns 1 on success or 0 on failure. */
-typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void* pUser);
-MINIZ_EXPORT int tinfl_decompress_mem_to_callback(const void*            pIn_buf,
-                                                  size_t*                pIn_buf_size,
-                                                  tinfl_put_buf_func_ptr pPut_buf_func,
-                                                  void*                  pPut_buf_user,
-                                                  int                    flags);
-
-struct tinfl_decompressor_tag;
-typedef struct tinfl_decompressor_tag tinfl_decompressor;
-
-    #ifndef MINIZ_NO_MALLOC
-/* Allocate the tinfl_decompressor structure in C so that */
-/* non-C language bindings to tinfl_ API don't need to worry about */
-/* structure size and allocation mechanism. */
-MINIZ_EXPORT tinfl_decompressor* tinfl_decompressor_alloc(void);
-MINIZ_EXPORT void                tinfl_decompressor_free(tinfl_decompressor* pDecomp);
-    #endif
-
-    /* Max size of LZ dictionary. */
-    #define TINFL_LZ_DICT_SIZE 32768
-
-/* Return status. */
-typedef enum {
-    /* This flags indicates the inflator needs 1 or more input bytes to make
-     forward progress, but the caller is indicating that no more are available.
-     The compressed data */
-    /* is probably corrupted. If you call the inflator again with more bytes it'll
-     try to continue processing the input but this is a BAD sign (either the
-     data is corrupted or you called it incorrectly). */
-    /* If you call it again with no input you'll just get
-     TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */
-    TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4,
-
-    /* This flag indicates that one or more of the input parameters was obviously
-     bogus. (You can try calling it again, but if you get this error the calling
-     code is wrong.) */
-    TINFL_STATUS_BAD_PARAM = -3,
-
-    /* This flags indicate the inflator is finished but the adler32 check of the
-     uncompressed data didn't match. If you call it again it'll return
-     TINFL_STATUS_DONE. */
-    TINFL_STATUS_ADLER32_MISMATCH = -2,
-
-    /* This flags indicate the inflator has somehow failed (bad code, corrupted
-     input, etc.). If you call it again without resetting via tinfl_init() it
-     it'll just keep on returning the same status failure code. */
-    TINFL_STATUS_FAILED = -1,
-
-    /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */
-
-    /* This flag indicates the inflator has returned every byte of uncompressed
-     data that it can, has consumed every byte that it needed, has successfully
-     reached the end of the deflate stream, and */
-    /* if zlib headers and adler32 checking enabled that it has successfully
-     checked the uncompressed data's adler32. If you call it again you'll just
-     get TINFL_STATUS_DONE over and over again. */
-    TINFL_STATUS_DONE = 0,
-
-    /* This flag indicates the inflator MUST have more input data (even 1 byte)
-     before it can make any more forward progress, or you need to clear the
-     TINFL_FLAG_HAS_MORE_INPUT */
-    /* flag on the next call if you don't have any more source data. If the source
-     data was somehow corrupted it's also possible (but unlikely) for the
-     inflator to keep on demanding input to */
-    /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */
-    TINFL_STATUS_NEEDS_MORE_INPUT = 1,
-
-    /* This flag indicates the inflator definitely has 1 or more bytes of
-     uncompressed data available, but it cannot write this data into the output
-     buffer. */
-    /* Note if the source compressed data was corrupted it's possible for the
-     inflator to return a lot of uncompressed data to the caller. I've been
-     assuming you know how much uncompressed data to expect */
-    /* (either exact or worst case) and will stop calling the inflator and fail
-     after receiving too much. In pure streaming scenarios where you have no
-     idea how many bytes to expect this may not be possible */
-    /* so I may need to add some code to address this. */
-    TINFL_STATUS_HAS_MORE_OUTPUT = 2
-} tinfl_status;
-
-    /* Initializes the decompressor to its initial state. */
-    #define tinfl_init(r) \
-        do \
-        { \
-            (r)->m_state = 0; \
-        } \
-        MZ_MACRO_END
-    #define tinfl_get_adler32(r) (r)->m_check_adler32
-
-/* Main low-level decompressor coroutine function. This is the only function
- * actually needed for decompression. All the other functions are just
- * high-level helpers for improved usability. */
-/* This is a universal API, i.e. it can be used as a building block to build any
- * desired higher level decompression API. In the limit case, it can be called
- * once per every byte input or output. */
-MINIZ_EXPORT tinfl_status tinfl_decompress(tinfl_decompressor* r,
-                                           const mz_uint8*     pIn_buf_next,
-                                           size_t*             pIn_buf_size,
-                                           mz_uint8*           pOut_buf_start,
-                                           mz_uint8*           pOut_buf_next,
-                                           size_t*             pOut_buf_size,
-                                           const mz_uint32     decomp_flags);
-
-/* Internal/private bits follow. */
-enum {
-    TINFL_MAX_HUFF_TABLES    = 3,
-    TINFL_MAX_HUFF_SYMBOLS_0 = 288,
-    TINFL_MAX_HUFF_SYMBOLS_1 = 32,
-    TINFL_MAX_HUFF_SYMBOLS_2 = 19,
-    TINFL_FAST_LOOKUP_BITS   = 10,
-    TINFL_FAST_LOOKUP_SIZE   = 1 << TINFL_FAST_LOOKUP_BITS
-};
-
-    #if MINIZ_HAS_64BIT_REGISTERS
-        #define TINFL_USE_64BIT_BITBUF 1
-    #else
-        #define TINFL_USE_64BIT_BITBUF 0
-    #endif
-
-    #if TINFL_USE_64BIT_BITBUF
-typedef mz_uint64 tinfl_bit_buf_t;
-        #define TINFL_BITBUF_SIZE (64)
-    #else
-typedef mz_uint32 tinfl_bit_buf_t;
-        #define TINFL_BITBUF_SIZE (32)
-    #endif
-
-struct tinfl_decompressor_tag {
-    mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32,
-      m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
-    tinfl_bit_buf_t m_bit_buf;
-    size_t          m_dist_from_out_buf_start;
-    mz_int16        m_look_up[TINFL_MAX_HUFF_TABLES][TINFL_FAST_LOOKUP_SIZE];
-    mz_int16        m_tree_0[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
-    mz_int16        m_tree_1[TINFL_MAX_HUFF_SYMBOLS_1 * 2];
-    mz_int16        m_tree_2[TINFL_MAX_HUFF_SYMBOLS_2 * 2];
-    mz_uint8        m_code_size_0[TINFL_MAX_HUFF_SYMBOLS_0];
-    mz_uint8        m_code_size_1[TINFL_MAX_HUFF_SYMBOLS_1];
-    mz_uint8        m_code_size_2[TINFL_MAX_HUFF_SYMBOLS_2];
-    mz_uint8        m_raw_header[4],
-      m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
-};
-
-    #ifdef __cplusplus
-}
-    #endif
-
-#endif /*#ifndef MINIZ_NO_INFLATE_APIS*/
-
-#pragma once
-
-/* ------------------- ZIP archive reading/writing */
-
-#ifndef MINIZ_NO_ARCHIVE_APIS
-
-    #ifdef __cplusplus
-extern "C" {
-    #endif
-
-enum {
-    /* Note: These enums can be reduced as needed to save memory or stack space -
-     they are pretty conservative. */
-    MZ_ZIP_MAX_IO_BUF_SIZE               = 64 * 1024,
-    MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE     = 512,
-    MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512
-};
-
-typedef struct {
-    /* Central directory file index. */
-    mz_uint32 m_file_index;
-
-    /* Byte offset of this entry in the archive's central directory. Note we
-   * currently only support up to UINT_MAX or less bytes in the central dir. */
-    mz_uint64 m_central_dir_ofs;
-
-    /* These fields are copied directly from the zip's central dir. */
-    mz_uint16 m_version_made_by;
-    mz_uint16 m_version_needed;
-    mz_uint16 m_bit_flag;
-    mz_uint16 m_method;
-
-    /* CRC-32 of uncompressed data. */
-    mz_uint32 m_crc32;
-
-    /* File's compressed size. */
-    mz_uint64 m_comp_size;
-
-    /* File's uncompressed size. Note, I've seen some old archives where directory
-   * entries had 512 bytes for their uncompressed sizes, but when you try to
-   * unpack them you actually get 0 bytes. */
-    mz_uint64 m_uncomp_size;
-
-    /* Zip internal and external file attributes. */
-    mz_uint16 m_internal_attr;
-    mz_uint32 m_external_attr;
-
-    /* Entry's local header file offset in bytes. */
-    mz_uint64 m_local_header_ofs;
-
-    /* Size of comment in bytes. */
-    mz_uint32 m_comment_size;
-
-    /* MZ_TRUE if the entry appears to be a directory. */
-    mz_bool m_is_directory;
-
-    /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip
-   * doesn't support) */
-    mz_bool m_is_encrypted;
-
-    /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a
-   * compression method we support. */
-    mz_bool m_is_supported;
-
-    /* Filename. If string ends in '/' it's a subdirectory entry. */
-    /* Guaranteed to be zero terminated, may be truncated to fit. */
-    char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
-
-    /* Comment field. */
-    /* Guaranteed to be zero terminated, may be truncated to fit. */
-    char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
-
-    #ifdef MINIZ_NO_TIME
-    MZ_TIME_T m_padding;
-    #else
-    MZ_TIME_T m_time;
-    #endif
-} mz_zip_archive_file_stat;
-
-typedef size_t (*mz_file_read_func)(void* pOpaque, mz_uint64 file_ofs, void* pBuf, size_t n);
-typedef size_t (*mz_file_write_func)(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n);
-typedef mz_bool (*mz_file_needs_keepalive)(void* pOpaque);
-
-struct mz_zip_internal_state_tag;
-typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
-
-typedef enum {
-    MZ_ZIP_MODE_INVALID                    = 0,
-    MZ_ZIP_MODE_READING                    = 1,
-    MZ_ZIP_MODE_WRITING                    = 2,
-    MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
-} mz_zip_mode;
-
-typedef enum {
-    MZ_ZIP_FLAG_CASE_SENSITIVE                = 0x0100,
-    MZ_ZIP_FLAG_IGNORE_PATH                   = 0x0200,
-    MZ_ZIP_FLAG_COMPRESSED_DATA               = 0x0400,
-    MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800,
-    MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG =
-      0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each
-                 file as its validated to ensure the func finds the file in the
-                 central dir (intended for testing) */
-    MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY =
-      0x2000, /* validate the local headers, but don't decompress the entire
-                 file and check the crc32 */
-    MZ_ZIP_FLAG_WRITE_ZIP64 =
-      0x4000, /* always use the zip64 file format, instead of the original zip
-                 file format with automatic switch to zip64. Use as flags
-                 parameter with mz_zip_writer_init*_v2 */
-    MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000,
-    MZ_ZIP_FLAG_ASCII_FILENAME      = 0x10000,
-    /*After adding a compressed file, seek back
-  to local file header and set the correct sizes*/
-    MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE = 0x20000
-} mz_zip_flags;
-
-typedef enum {
-    MZ_ZIP_TYPE_INVALID = 0,
-    MZ_ZIP_TYPE_USER,
-    MZ_ZIP_TYPE_MEMORY,
-    MZ_ZIP_TYPE_HEAP,
-    MZ_ZIP_TYPE_FILE,
-    MZ_ZIP_TYPE_CFILE,
-    MZ_ZIP_TOTAL_TYPES
-} mz_zip_type;
-
-/* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or
- * modify this enum. */
-typedef enum {
-    MZ_ZIP_NO_ERROR = 0,
-    MZ_ZIP_UNDEFINED_ERROR,
-    MZ_ZIP_TOO_MANY_FILES,
-    MZ_ZIP_FILE_TOO_LARGE,
-    MZ_ZIP_UNSUPPORTED_METHOD,
-    MZ_ZIP_UNSUPPORTED_ENCRYPTION,
-    MZ_ZIP_UNSUPPORTED_FEATURE,
-    MZ_ZIP_FAILED_FINDING_CENTRAL_DIR,
-    MZ_ZIP_NOT_AN_ARCHIVE,
-    MZ_ZIP_INVALID_HEADER_OR_CORRUPTED,
-    MZ_ZIP_UNSUPPORTED_MULTIDISK,
-    MZ_ZIP_DECOMPRESSION_FAILED,
-    MZ_ZIP_COMPRESSION_FAILED,
-    MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE,
-    MZ_ZIP_CRC_CHECK_FAILED,
-    MZ_ZIP_UNSUPPORTED_CDIR_SIZE,
-    MZ_ZIP_ALLOC_FAILED,
-    MZ_ZIP_FILE_OPEN_FAILED,
-    MZ_ZIP_FILE_CREATE_FAILED,
-    MZ_ZIP_FILE_WRITE_FAILED,
-    MZ_ZIP_FILE_READ_FAILED,
-    MZ_ZIP_FILE_CLOSE_FAILED,
-    MZ_ZIP_FILE_SEEK_FAILED,
-    MZ_ZIP_FILE_STAT_FAILED,
-    MZ_ZIP_INVALID_PARAMETER,
-    MZ_ZIP_INVALID_FILENAME,
-    MZ_ZIP_BUF_TOO_SMALL,
-    MZ_ZIP_INTERNAL_ERROR,
-    MZ_ZIP_FILE_NOT_FOUND,
-    MZ_ZIP_ARCHIVE_TOO_LARGE,
-    MZ_ZIP_VALIDATION_FAILED,
-    MZ_ZIP_WRITE_CALLBACK_FAILED,
-    MZ_ZIP_TOTAL_ERRORS
-} mz_zip_error;
-
-typedef struct {
-    mz_uint64 m_archive_size;
-    mz_uint64 m_central_directory_file_ofs;
-
-    /* We only support up to UINT32_MAX files in zip64 mode. */
-    mz_uint32    m_total_files;
-    mz_zip_mode  m_zip_mode;
-    mz_zip_type  m_zip_type;
-    mz_zip_error m_last_error;
-
-    mz_uint64 m_file_offset_alignment;
-
-    mz_alloc_func   m_pAlloc;
-    mz_free_func    m_pFree;
-    mz_realloc_func m_pRealloc;
-    void*           m_pAlloc_opaque;
-
-    mz_file_read_func       m_pRead;
-    mz_file_write_func      m_pWrite;
-    mz_file_needs_keepalive m_pNeeds_keepalive;
-    void*                   m_pIO_opaque;
-
-    mz_zip_internal_state* m_pState;
-
-} mz_zip_archive;
-
-typedef struct {
-    mz_zip_archive* pZip;
-    mz_uint         flags;
-
-    int status;
-
-    mz_uint64 read_buf_size, read_buf_ofs, read_buf_avail, comp_remaining, out_buf_ofs,
-      cur_file_ofs;
-    mz_zip_archive_file_stat file_stat;
-    void*                    pRead_buf;
-    void*                    pWrite_buf;
-
-    size_t out_blk_remain;
-
-    tinfl_decompressor inflator;
-
-    #ifdef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-    mz_uint padding;
-    #else
-    mz_uint file_crc32;
-    #endif
-
-} mz_zip_reader_extract_iter_state;
-
-/* -------- ZIP reading */
-
-/* Inits a ZIP archive reader. */
-/* These functions read and validate the archive's central directory. */
-MINIZ_EXPORT mz_bool mz_zip_reader_init(mz_zip_archive* pZip, mz_uint64 size, mz_uint flags);
-
-MINIZ_EXPORT mz_bool mz_zip_reader_init_mem(mz_zip_archive* pZip,
-                                            const void*     pMem,
-                                            size_t          size,
-                                            mz_uint         flags);
-
-    #ifndef MINIZ_NO_STDIO
-/* Read a archive from a disk file. */
-/* file_start_ofs is the file offset where the archive actually begins, or 0. */
-/* actual_archive_size is the true total size of the archive, which may be
- * smaller than the file's actual size on disk. If zero the entire file is
- * treated as the archive. */
-MINIZ_EXPORT mz_bool mz_zip_reader_init_file(mz_zip_archive* pZip,
-                                             const char*     pFilename,
-                                             mz_uint32       flags);
-MINIZ_EXPORT mz_bool mz_zip_reader_init_file_v2(mz_zip_archive* pZip,
-                                                const char*     pFilename,
-                                                mz_uint         flags,
-                                                mz_uint64       file_start_ofs,
-                                                mz_uint64       archive_size);
-
-/* Read an archive from an already opened FILE, beginning at the current file
- * position. */
-/* The archive is assumed to be archive_size bytes long. If archive_size is 0,
- * then the entire rest of the file is assumed to contain the archive. */
-/* The FILE will NOT be closed when mz_zip_reader_end() is called. */
-MINIZ_EXPORT mz_bool mz_zip_reader_init_cfile(mz_zip_archive* pZip,
-                                              MZ_FILE*        pFile,
-                                              mz_uint64       archive_size,
-                                              mz_uint         flags);
-    #endif
-
-/* Ends archive reading, freeing all allocations, and closing the input archive
- * file if mz_zip_reader_init_file() was used. */
-MINIZ_EXPORT mz_bool mz_zip_reader_end(mz_zip_archive* pZip);
-
-/* -------- ZIP reading or writing */
-
-/* Clears a mz_zip_archive struct to all zeros. */
-/* Important: This must be done before passing the struct to any mz_zip
- * functions. */
-MINIZ_EXPORT void mz_zip_zero_struct(mz_zip_archive* pZip);
-
-MINIZ_EXPORT mz_zip_mode mz_zip_get_mode(mz_zip_archive* pZip);
-MINIZ_EXPORT mz_zip_type mz_zip_get_type(mz_zip_archive* pZip);
-
-/* Returns the total number of files in the archive. */
-MINIZ_EXPORT mz_uint mz_zip_reader_get_num_files(mz_zip_archive* pZip);
-
-MINIZ_EXPORT mz_uint64 mz_zip_get_archive_size(mz_zip_archive* pZip);
-MINIZ_EXPORT mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive* pZip);
-MINIZ_EXPORT MZ_FILE*  mz_zip_get_cfile(mz_zip_archive* pZip);
-
-/* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf.
- */
-MINIZ_EXPORT size_t mz_zip_read_archive_data(mz_zip_archive* pZip,
-                                             mz_uint64       file_ofs,
-                                             void*           pBuf,
-                                             size_t          n);
-
-/* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct.
- * These functions retrieve/manipulate this field. */
-/* Note that the m_last_error functionality is not thread safe. */
-MINIZ_EXPORT mz_zip_error mz_zip_set_last_error(mz_zip_archive* pZip, mz_zip_error err_num);
-MINIZ_EXPORT mz_zip_error mz_zip_peek_last_error(mz_zip_archive* pZip);
-MINIZ_EXPORT mz_zip_error mz_zip_clear_last_error(mz_zip_archive* pZip);
-MINIZ_EXPORT mz_zip_error mz_zip_get_last_error(mz_zip_archive* pZip);
-MINIZ_EXPORT const char*  mz_zip_get_error_string(mz_zip_error mz_err);
-
-/* MZ_TRUE if the archive file entry is a directory entry. */
-MINIZ_EXPORT mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive* pZip, mz_uint file_index);
-
-/* MZ_TRUE if the file is encrypted/strong encrypted. */
-MINIZ_EXPORT mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive* pZip, mz_uint file_index);
-
-/* MZ_TRUE if the compression method is supported, and the file is not
- * encrypted, and the file is not a compressed patch file. */
-MINIZ_EXPORT mz_bool mz_zip_reader_is_file_supported(mz_zip_archive* pZip, mz_uint file_index);
-
-/* Retrieves the filename of an archive file entry. */
-/* Returns the number of bytes written to pFilename, or if filename_buf_size is
- * 0 this function returns the number of bytes needed to fully store the
- * filename. */
-MINIZ_EXPORT mz_uint mz_zip_reader_get_filename(mz_zip_archive* pZip,
-                                                mz_uint         file_index,
-                                                char*           pFilename,
-                                                mz_uint         filename_buf_size);
-
-/* Attempts to locates a file in the archive's central directory. */
-/* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */
-/* Returns -1 if the file cannot be found. */
-MINIZ_EXPORT int     mz_zip_reader_locate_file(mz_zip_archive* pZip,
-                                               const char*     pName,
-                                               const char*     pComment,
-                                               mz_uint         flags);
-MINIZ_EXPORT mz_bool mz_zip_reader_locate_file_v2(mz_zip_archive* pZip,
-                                                  const char*     pName,
-                                                  const char*     pComment,
-                                                  mz_uint         flags,
-                                                  mz_uint32*      file_index);
-
-/* Returns detailed information about an archive file entry. */
-MINIZ_EXPORT mz_bool mz_zip_reader_file_stat(mz_zip_archive*           pZip,
-                                             mz_uint                   file_index,
-                                             mz_zip_archive_file_stat* pStat);
-
-/* MZ_TRUE if the file is in zip64 format. */
-/* A file is considered zip64 if it contained a zip64 end of central directory
- * marker, or if it contained any zip64 extended file information fields in the
- * central directory. */
-MINIZ_EXPORT mz_bool mz_zip_is_zip64(mz_zip_archive* pZip);
-
-/* Returns the total central directory size in bytes. */
-/* The current max supported size is <= MZ_UINT32_MAX. */
-MINIZ_EXPORT size_t mz_zip_get_central_dir_size(mz_zip_archive* pZip);
-
-/* Extracts a archive file to a memory buffer using no memory allocation. */
-/* There must be at least enough room on the stack to store the inflator's state
- * (~34KB or so). */
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive* pZip,
-                                                           mz_uint         file_index,
-                                                           void*           pBuf,
-                                                           size_t          buf_size,
-                                                           mz_uint         flags,
-                                                           void*           pUser_read_buf,
-                                                           size_t          user_read_buf_size);
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive* pZip,
-                                                                const char*     pFilename,
-                                                                void*           pBuf,
-                                                                size_t          buf_size,
-                                                                mz_uint         flags,
-                                                                void*           pUser_read_buf,
-                                                                size_t          user_read_buf_size);
-
-/* Extracts a archive file to a memory buffer. */
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_mem(
-  mz_zip_archive* pZip, mz_uint file_index, void* pBuf, size_t buf_size, mz_uint flags);
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_mem(
-  mz_zip_archive* pZip, const char* pFilename, void* pBuf, size_t buf_size, mz_uint flags);
-
-/* Extracts a archive file to a dynamically allocated heap buffer. */
-/* The memory will be allocated via the mz_zip_archive's alloc/realloc
- * functions. */
-/* Returns NULL and sets the last error on failure. */
-MINIZ_EXPORT void* mz_zip_reader_extract_to_heap(mz_zip_archive* pZip,
-                                                 mz_uint         file_index,
-                                                 size_t*         pSize,
-                                                 mz_uint         flags);
-MINIZ_EXPORT void* mz_zip_reader_extract_file_to_heap(mz_zip_archive* pZip,
-                                                      const char*     pFilename,
-                                                      size_t*         pSize,
-                                                      mz_uint         flags);
-
-/* Extracts a archive file using a callback function to output the file's data.
- */
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive*    pZip,
-                                                       mz_uint            file_index,
-                                                       mz_file_write_func pCallback,
-                                                       void*              pOpaque,
-                                                       mz_uint            flags);
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive*    pZip,
-                                                            const char*        pFilename,
-                                                            mz_file_write_func pCallback,
-                                                            void*              pOpaque,
-                                                            mz_uint            flags);
-
-/* Extract a file iteratively */
-MINIZ_EXPORT mz_zip_reader_extract_iter_state*
-mz_zip_reader_extract_iter_new(mz_zip_archive* pZip, mz_uint file_index, mz_uint flags);
-MINIZ_EXPORT mz_zip_reader_extract_iter_state*
-mz_zip_reader_extract_file_iter_new(mz_zip_archive* pZip, const char* pFilename, mz_uint flags);
-MINIZ_EXPORT size_t  mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState,
-                                                     void*                             pvBuf,
-                                                     size_t                            buf_size);
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState);
-
-    #ifndef MINIZ_NO_STDIO
-/* Extracts a archive file to a disk file and sets its last accessed and
- * modified times. */
-/* This function only extracts files, not archive directory records. */
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_file(mz_zip_archive* pZip,
-                                                   mz_uint         file_index,
-                                                   const char*     pDst_filename,
-                                                   mz_uint         flags);
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive* pZip,
-                                                        const char*     pArchive_filename,
-                                                        const char*     pDst_filename,
-                                                        mz_uint         flags);
-
-/* Extracts a archive file starting at the current position in the destination
- * FILE stream. */
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive* pZip,
-                                                    mz_uint         file_index,
-                                                    MZ_FILE*        File,
-                                                    mz_uint         flags);
-MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive* pZip,
-                                                         const char*     pArchive_filename,
-                                                         MZ_FILE*        pFile,
-                                                         mz_uint         flags);
-    #endif
-
-    #if 0
-/* TODO */
-	typedef void *mz_zip_streaming_extract_state_ptr;
-	mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags);
-	mz_uint64 mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
-	mz_uint64 mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
-	mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, mz_uint64 new_ofs);
-	size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size);
-	mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
-    #endif
-
-/* This function compares the archive's local headers, the optional local zip64
- * extended information block, and the optional descriptor following the
- * compressed data vs. the data in the central directory. */
-/* It also validates that each file can be successfully uncompressed unless the
- * MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */
-MINIZ_EXPORT mz_bool mz_zip_validate_file(mz_zip_archive* pZip, mz_uint file_index, mz_uint flags);
-
-/* Validates an entire archive by calling mz_zip_validate_file() on each file.
- */
-MINIZ_EXPORT mz_bool mz_zip_validate_archive(mz_zip_archive* pZip, mz_uint flags);
-
-/* Misc utils/helpers, valid for ZIP reading or writing */
-MINIZ_EXPORT mz_bool mz_zip_validate_mem_archive(const void*   pMem,
-                                                 size_t        size,
-                                                 mz_uint       flags,
-                                                 mz_zip_error* pErr);
-    #ifndef MINIZ_NO_STDIO
-MINIZ_EXPORT mz_bool mz_zip_validate_file_archive(const char*   pFilename,
-                                                  mz_uint       flags,
-                                                  mz_zip_error* pErr);
-    #endif
-
-/* Universal end function - calls either mz_zip_reader_end() or
- * mz_zip_writer_end(). */
-MINIZ_EXPORT mz_bool mz_zip_end(mz_zip_archive* pZip);
-
-/* -------- ZIP writing */
-
-    #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-/* Inits a ZIP archive writer. */
-/*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init
- * or mz_zip_writer_init_v2*/
-/*The output is streamable, i.e. file_ofs in mz_file_write_func always increases
- * only by n*/
-MINIZ_EXPORT mz_bool mz_zip_writer_init(mz_zip_archive* pZip, mz_uint64 existing_size);
-MINIZ_EXPORT mz_bool mz_zip_writer_init_v2(mz_zip_archive* pZip,
-                                           mz_uint64       existing_size,
-                                           mz_uint         flags);
-
-MINIZ_EXPORT mz_bool mz_zip_writer_init_heap(mz_zip_archive* pZip,
-                                             size_t          size_to_reserve_at_beginning,
-                                             size_t          initial_allocation_size);
-MINIZ_EXPORT mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive* pZip,
-                                                size_t          size_to_reserve_at_beginning,
-                                                size_t          initial_allocation_size,
-                                                mz_uint         flags);
-
-        #ifndef MINIZ_NO_STDIO
-MINIZ_EXPORT mz_bool mz_zip_writer_init_file(mz_zip_archive* pZip,
-                                             const char*     pFilename,
-                                             mz_uint64       size_to_reserve_at_beginning);
-MINIZ_EXPORT mz_bool mz_zip_writer_init_file_v2(mz_zip_archive* pZip,
-                                                const char*     pFilename,
-                                                mz_uint64       size_to_reserve_at_beginning,
-                                                mz_uint         flags);
-MINIZ_EXPORT mz_bool mz_zip_writer_init_cfile(mz_zip_archive* pZip, MZ_FILE* pFile, mz_uint flags);
-        #endif
-
-/* Converts a ZIP archive reader object into a writer object, to allow efficient
- * in-place file appends to occur on an existing archive. */
-/* For archives opened using mz_zip_reader_init_file, pFilename must be the
- * archive's filename so it can be reopened for writing. If the file can't be
- * reopened, mz_zip_reader_end() will be called. */
-/* For archives opened using mz_zip_reader_init_mem, the memory block must be
- * growable using the realloc callback (which defaults to realloc unless you've
- * overridden it). */
-/* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's
- * user provided m_pWrite function cannot be NULL. */
-/* Note: In-place archive modification is not recommended unless you know what
- * you're doing, because if execution stops or something goes wrong before */
-/* the archive is finalized the file's central directory will be hosed. */
-MINIZ_EXPORT mz_bool mz_zip_writer_init_from_reader(mz_zip_archive* pZip, const char* pFilename);
-MINIZ_EXPORT mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive* pZip,
-                                                       const char*     pFilename,
-                                                       mz_uint         flags);
-
-/* Adds the contents of a memory buffer to an archive. These functions record
- * the current local time into the archive. */
-/* To add a directory entry, call this method with an archive name ending in a
- * forwardslash with an empty buffer. */
-/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
- * MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
- * just set to MZ_DEFAULT_COMPRESSION. */
-MINIZ_EXPORT mz_bool mz_zip_writer_add_mem(mz_zip_archive* pZip,
-                                           const char*     pArchive_name,
-                                           const void*     pBuf,
-                                           size_t          buf_size,
-                                           mz_uint         level_and_flags);
-
-/* Like mz_zip_writer_add_mem(), except you can specify a file comment field,
- * and optionally supply the function with already compressed data. */
-/* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA
- * flag is specified. */
-MINIZ_EXPORT mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive* pZip,
-                                              const char*     pArchive_name,
-                                              const void*     pBuf,
-                                              size_t          buf_size,
-                                              const void*     pComment,
-                                              mz_uint16       comment_size,
-                                              mz_uint         level_and_flags,
-                                              mz_uint64       uncomp_size,
-                                              mz_uint32       uncomp_crc32);
-
-MINIZ_EXPORT mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive* pZip,
-                                                 const char*     pArchive_name,
-                                                 const void*     pBuf,
-                                                 size_t          buf_size,
-                                                 const void*     pComment,
-                                                 mz_uint16       comment_size,
-                                                 mz_uint         level_and_flags,
-                                                 mz_uint64       uncomp_size,
-                                                 mz_uint32       uncomp_crc32,
-                                                 MZ_TIME_T*      last_modified,
-                                                 const char*     user_extra_data_local,
-                                                 mz_uint         user_extra_data_local_len,
-                                                 const char*     user_extra_data_central,
-                                                 mz_uint         user_extra_data_central_len);
-
-/* Adds the contents of a file to an archive. This function also records the
- * disk file's modified time into the archive. */
-/* File data is supplied via a read callback function. User
- * mz_zip_writer_add_(c)file to add a file directly.*/
-MINIZ_EXPORT mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive*   pZip,
-                                                         const char*       pArchive_name,
-                                                         mz_file_read_func read_callback,
-                                                         void*             callback_opaque,
-                                                         mz_uint64         max_size,
-                                                         const MZ_TIME_T*  pFile_time,
-                                                         const void*       pComment,
-                                                         mz_uint16         comment_size,
-                                                         mz_uint           level_and_flags,
-                                                         mz_uint32         ext_attributes,
-                                                         const char*       user_extra_data_local,
-                                                         mz_uint     user_extra_data_local_len,
-                                                         const char* user_extra_data_central,
-                                                         mz_uint     user_extra_data_central_len);
-
-        #ifndef MINIZ_NO_STDIO
-/* Adds the contents of a disk file to an archive. This function also records
- * the disk file's modified time into the archive. */
-/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
- * MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
- * just set to MZ_DEFAULT_COMPRESSION. */
-MINIZ_EXPORT mz_bool mz_zip_writer_add_file(mz_zip_archive* pZip,
-                                            const char*     pArchive_name,
-                                            const char*     pSrc_filename,
-                                            const void*     pComment,
-                                            mz_uint16       comment_size,
-                                            mz_uint         level_and_flags,
-                                            mz_uint32       ext_attributes);
-
-/* Like mz_zip_writer_add_file(), except the file data is read from the
- * specified FILE stream. */
-MINIZ_EXPORT mz_bool mz_zip_writer_add_cfile(mz_zip_archive*  pZip,
-                                             const char*      pArchive_name,
-                                             MZ_FILE*         pSrc_file,
-                                             mz_uint64        max_size,
-                                             const MZ_TIME_T* pFile_time,
-                                             const void*      pComment,
-                                             mz_uint16        comment_size,
-                                             mz_uint          level_and_flags,
-                                             mz_uint32        ext_attributes,
-                                             const char*      user_extra_data_local,
-                                             mz_uint          user_extra_data_local_len,
-                                             const char*      user_extra_data_central,
-                                             mz_uint          user_extra_data_central_len);
-        #endif
-
-/* Adds a file to an archive by fully cloning the data from another archive. */
-/* This function fully clones the source file's compressed data (no
- * recompression), along with its full filename, extra data (it may add or
- * modify the zip64 local header extra data field), and the optional descriptor
- * following the compressed data. */
-MINIZ_EXPORT mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive* pZip,
-                                                       mz_zip_archive* pSource_zip,
-                                                       mz_uint         src_file_index);
-
-/* Finalizes the archive by writing the central directory records followed by
- * the end of central directory record. */
-/* After an archive is finalized, the only valid call on the mz_zip_archive
- * struct is mz_zip_writer_end(). */
-/* An archive must be manually finalized by calling this function for it to be
- * valid. */
-MINIZ_EXPORT mz_bool mz_zip_writer_finalize_archive(mz_zip_archive* pZip);
-
-/* Finalizes a heap archive, returning a pointer to the heap block and its size.
- */
-/* The heap block will be allocated using the mz_zip_archive's alloc/realloc
- * callbacks. */
-MINIZ_EXPORT mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive* pZip,
-                                                         void**          ppBuf,
-                                                         size_t*         pSize);
-
-/* Ends archive writing, freeing all allocations, and closing the output file if
- * mz_zip_writer_init_file() was used. */
-/* Note for the archive to be valid, it *must* have been finalized before ending
- * (this function will not do it for you). */
-MINIZ_EXPORT mz_bool mz_zip_writer_end(mz_zip_archive* pZip);
-
-/* -------- Misc. high-level helper functions: */
-
-/* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically)
- * appends a memory blob to a ZIP archive. */
-/* Note this is NOT a fully safe operation. If it crashes or dies in some way
- * your archive can be left in a screwed up state (without a central directory).
- */
-/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED,
- * MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or
- * just set to MZ_DEFAULT_COMPRESSION. */
-/* TODO: Perhaps add an option to leave the existing central dir in place in
- * case the add dies? We could then truncate the file (so the old central dir
- * would be at the end) if something goes wrong. */
-MINIZ_EXPORT mz_bool mz_zip_add_mem_to_archive_file_in_place(const char* pZip_filename,
-                                                             const char* pArchive_name,
-                                                             const void* pBuf,
-                                                             size_t      buf_size,
-                                                             const void* pComment,
-                                                             mz_uint16   comment_size,
-                                                             mz_uint     level_and_flags);
-MINIZ_EXPORT mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char*   pZip_filename,
-                                                                const char*   pArchive_name,
-                                                                const void*   pBuf,
-                                                                size_t        buf_size,
-                                                                const void*   pComment,
-                                                                mz_uint16     comment_size,
-                                                                mz_uint       level_and_flags,
-                                                                mz_zip_error* pErr);
-
-        #ifndef MINIZ_NO_STDIO
-/* Reads a single file from an archive into a heap block. */
-/* If pComment is not NULL, only the file with the specified comment will be
- * extracted. */
-/* Returns NULL on failure. */
-MINIZ_EXPORT void* mz_zip_extract_archive_file_to_heap(const char* pZip_filename,
-                                                       const char* pArchive_name,
-                                                       size_t*     pSize,
-                                                       mz_uint     flags);
-MINIZ_EXPORT void* mz_zip_extract_archive_file_to_heap_v2(const char*   pZip_filename,
-                                                          const char*   pArchive_name,
-                                                          const char*   pComment,
-                                                          size_t*       pSize,
-                                                          mz_uint       flags,
-                                                          mz_zip_error* pErr);
-        #endif
-
-    #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */
-
-    #ifdef __cplusplus
-}
-    #endif
-
-#endif /* MINIZ_NO_ARCHIVE_APIS */
-
-#ifndef MINIZ_HEADER_FILE_ONLY
-/**************************************************************************
- *
- * Copyright 2013-2014 RAD Game Tools and Valve Software
- * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- **************************************************************************/
-
-typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1];
-typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1];
-typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1];
-
-    #ifdef __cplusplus
-extern "C" {
-    #endif
-
-/* ------------------- zlib-style API's */
-
-mz_ulong mz_adler32(mz_ulong adler, const unsigned char* ptr, size_t buf_len) {
-    mz_uint32 i, s1 = (mz_uint32) (adler & 0xffff), s2 = (mz_uint32) (adler >> 16);
-    size_t    block_len = buf_len % 5552;
-    if (!ptr)
-        return MZ_ADLER32_INIT;
-    while (buf_len)
-    {
-        for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
-        {
-            s1 += ptr[0], s2 += s1;
-            s1 += ptr[1], s2 += s1;
-            s1 += ptr[2], s2 += s1;
-            s1 += ptr[3], s2 += s1;
-            s1 += ptr[4], s2 += s1;
-            s1 += ptr[5], s2 += s1;
-            s1 += ptr[6], s2 += s1;
-            s1 += ptr[7], s2 += s1;
-        }
-        for (; i < block_len; ++i)
-            s1 += *ptr++, s2 += s1;
-        s1 %= 65521U, s2 %= 65521U;
-        buf_len -= block_len;
-        block_len = 5552;
-    }
-    return (s2 << 16) + s1;
-}
-
-    /* Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C
- * implementation that balances processor cache usage against speed":
- * http://www.geocities.com/malbrain/ */
-    #if 0
-    mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len)
-    {
-        static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
-                                               0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
-        mz_uint32 crcu32 = (mz_uint32)crc;
-        if (!ptr)
-            return MZ_CRC32_INIT;
-        crcu32 = ~crcu32;
-        while (buf_len--)
-        {
-            mz_uint8 b = *ptr++;
-            crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)];
-            crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)];
-        }
-        return ~crcu32;
-    }
-    #elif defined(USE_EXTERNAL_MZCRC)
-/* If USE_EXTERNAL_CRC is defined, an external module will export the
- * mz_crc32() symbol for us to use, e.g. an SSE-accelerated version.
- * Depending on the impl, it may be necessary to ~ the input/output crc values.
- */
-mz_ulong mz_crc32(mz_ulong crc, const mz_uint8* ptr, size_t buf_len);
-    #else
-/* Faster, but larger CPU cache footprint.
- */
-mz_ulong mz_crc32(mz_ulong crc, const mz_uint8* ptr, size_t buf_len) {
-    static const mz_uint32 s_crc_table[256] = {
-      0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535,
-      0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD,
-      0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D,
-      0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
-      0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4,
-      0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
-      0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC,
-      0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
-      0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB,
-      0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F,
-      0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB,
-      0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
-      0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA,
-      0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE,
-      0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A,
-      0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
-      0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409,
-      0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
-      0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739,
-      0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
-      0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268,
-      0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0,
-      0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8,
-      0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
-      0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF,
-      0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703,
-      0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7,
-      0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
-      0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE,
-      0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
-      0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6,
-      0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
-      0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D,
-      0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5,
-      0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605,
-      0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
-      0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D};
-
-    mz_uint32       crc32     = (mz_uint32) crc ^ 0xFFFFFFFF;
-    const mz_uint8* pByte_buf = (const mz_uint8*) ptr;
-
-    while (buf_len >= 4)
-    {
-        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF];
-        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[1]) & 0xFF];
-        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[2]) & 0xFF];
-        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[3]) & 0xFF];
-        pByte_buf += 4;
-        buf_len -= 4;
-    }
-
-    while (buf_len)
-    {
-        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF];
-        ++pByte_buf;
-        --buf_len;
-    }
-
-    return ~crc32;
-}
-    #endif
-
-void mz_free(void* p) { MZ_FREE(p); }
-
-MINIZ_EXPORT void* miniz_def_alloc_func(void* opaque, size_t items, size_t size) {
-    (void) opaque, (void) items, (void) size;
-    return MZ_MALLOC(items * size);
-}
-MINIZ_EXPORT void miniz_def_free_func(void* opaque, void* address) {
-    (void) opaque, (void) address;
-    MZ_FREE(address);
-}
-MINIZ_EXPORT void* miniz_def_realloc_func(void* opaque, void* address, size_t items, size_t size) {
-    (void) opaque, (void) address, (void) items, (void) size;
-    return MZ_REALLOC(address, items * size);
-}
-
-const char* mz_version(void) { return MZ_VERSION; }
-
-    #ifndef MINIZ_NO_ZLIB_APIS
-
-        #ifndef MINIZ_NO_DEFLATE_APIS
-
-int mz_deflateInit(mz_streamp pStream, int level) {
-    return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9,
-                           MZ_DEFAULT_STRATEGY);
-}
-
-int mz_deflateInit2(
-  mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) {
-    tdefl_compressor* pComp;
-    mz_uint           comp_flags =
-      TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy);
-
-    if (!pStream)
-        return MZ_STREAM_ERROR;
-    if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9))
-        || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)))
-        return MZ_PARAM_ERROR;
-
-    pStream->data_type = 0;
-    pStream->adler     = MZ_ADLER32_INIT;
-    pStream->msg       = NULL;
-    pStream->reserved  = 0;
-    pStream->total_in  = 0;
-    pStream->total_out = 0;
-    if (!pStream->zalloc)
-        pStream->zalloc = miniz_def_alloc_func;
-    if (!pStream->zfree)
-        pStream->zfree = miniz_def_free_func;
-
-    pComp = (tdefl_compressor*) pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor));
-    if (!pComp)
-        return MZ_MEM_ERROR;
-
-    pStream->state = (struct mz_internal_state*) pComp;
-
-    if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY)
-    {
-        mz_deflateEnd(pStream);
-        return MZ_PARAM_ERROR;
-    }
-
-    return MZ_OK;
-}
-
-int mz_deflateReset(mz_streamp pStream) {
-    if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree))
-        return MZ_STREAM_ERROR;
-    pStream->total_in = pStream->total_out = 0;
-    tdefl_init((tdefl_compressor*) pStream->state, NULL, NULL,
-               ((tdefl_compressor*) pStream->state)->m_flags);
-    return MZ_OK;
-}
-
-int mz_deflate(mz_streamp pStream, int flush) {
-    size_t   in_bytes, out_bytes;
-    mz_ulong orig_total_in, orig_total_out;
-    int      mz_status = MZ_OK;
-
-    if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH)
-        || (!pStream->next_out))
-        return MZ_STREAM_ERROR;
-    if (!pStream->avail_out)
-        return MZ_BUF_ERROR;
-
-    if (flush == MZ_PARTIAL_FLUSH)
-        flush = MZ_SYNC_FLUSH;
-
-    if (((tdefl_compressor*) pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE)
-        return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR;
-
-    orig_total_in  = pStream->total_in;
-    orig_total_out = pStream->total_out;
-    for (;;)
-    {
-        tdefl_status defl_status;
-        in_bytes  = pStream->avail_in;
-        out_bytes = pStream->avail_out;
-
-        defl_status = tdefl_compress((tdefl_compressor*) pStream->state, pStream->next_in,
-                                     &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush) flush);
-        pStream->next_in += (mz_uint) in_bytes;
-        pStream->avail_in -= (mz_uint) in_bytes;
-        pStream->total_in += (mz_uint) in_bytes;
-        pStream->adler = tdefl_get_adler32((tdefl_compressor*) pStream->state);
-
-        pStream->next_out += (mz_uint) out_bytes;
-        pStream->avail_out -= (mz_uint) out_bytes;
-        pStream->total_out += (mz_uint) out_bytes;
-
-        if (defl_status < 0)
-        {
-            mz_status = MZ_STREAM_ERROR;
-            break;
-        }
-        else if (defl_status == TDEFL_STATUS_DONE)
-        {
-            mz_status = MZ_STREAM_END;
-            break;
-        }
-        else if (!pStream->avail_out)
-            break;
-        else if ((!pStream->avail_in) && (flush != MZ_FINISH))
-        {
-            if ((flush) || (pStream->total_in != orig_total_in)
-                || (pStream->total_out != orig_total_out))
-                break;
-            return MZ_BUF_ERROR; /* Can't make forward progress without some input.
-                            */
-        }
-    }
-    return mz_status;
-}
-
-int mz_deflateEnd(mz_streamp pStream) {
-    if (!pStream)
-        return MZ_STREAM_ERROR;
-    if (pStream->state)
-    {
-        pStream->zfree(pStream->opaque, pStream->state);
-        pStream->state = NULL;
-    }
-    return MZ_OK;
-}
-
-mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) {
-    (void) pStream;
-    /* This is really over conservative. (And lame, but it's actually pretty
-   * tricky to compute a true upper bound given the way tdefl's blocking works.)
-   */
-    return MZ_MAX(128 + (source_len * 110) / 100,
-                  128 + source_len + ((source_len / (31 * 1024)) + 1) * 5);
-}
-
-int mz_compress2(unsigned char*       pDest,
-                 mz_ulong*            pDest_len,
-                 const unsigned char* pSource,
-                 mz_ulong             source_len,
-                 int                  level) {
-    int       status;
-    mz_stream stream;
-    memset(&stream, 0, sizeof(stream));
-
-            #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__WATCOMC__)
-                /* In case mz_ulong is 64-bits (argh I hate longs). */
-            #else
-    if ((mz_uint64) (source_len | *pDest_len) > 0xFFFFFFFFU)
-        return MZ_PARAM_ERROR;
-            #endif
-    stream.next_in   = pSource;
-    stream.avail_in  = (mz_uint32) source_len;
-    stream.next_out  = pDest;
-    stream.avail_out = (mz_uint32) *pDest_len;
-
-    status = mz_deflateInit(&stream, level);
-    if (status != MZ_OK)
-        return status;
-
-    status = mz_deflate(&stream, MZ_FINISH);
-    if (status != MZ_STREAM_END)
-    {
-        mz_deflateEnd(&stream);
-        return (status == MZ_OK) ? MZ_BUF_ERROR : status;
-    }
-
-    *pDest_len = stream.total_out;
-    return mz_deflateEnd(&stream);
-}
-
-int mz_compress(unsigned char*       pDest,
-                mz_ulong*            pDest_len,
-                const unsigned char* pSource,
-                mz_ulong             source_len) {
-    return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION);
-}
-
-mz_ulong mz_compressBound(mz_ulong source_len) { return mz_deflateBound(NULL, source_len); }
-
-        #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/
-
-        #ifndef MINIZ_NO_INFLATE_APIS
-
-typedef struct {
-    tinfl_decompressor m_decomp;
-    mz_uint            m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed;
-    int                m_window_bits;
-    mz_uint8           m_dict[TINFL_LZ_DICT_SIZE];
-    tinfl_status       m_last_status;
-} inflate_state;
-
-int mz_inflateInit2(mz_streamp pStream, int window_bits) {
-    inflate_state* pDecomp;
-    if (!pStream)
-        return MZ_STREAM_ERROR;
-    if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))
-        return MZ_PARAM_ERROR;
-
-    pStream->data_type = 0;
-    pStream->adler     = 0;
-    pStream->msg       = NULL;
-    pStream->total_in  = 0;
-    pStream->total_out = 0;
-    pStream->reserved  = 0;
-    if (!pStream->zalloc)
-        pStream->zalloc = miniz_def_alloc_func;
-    if (!pStream->zfree)
-        pStream->zfree = miniz_def_free_func;
-
-    pDecomp = (inflate_state*) pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state));
-    if (!pDecomp)
-        return MZ_MEM_ERROR;
-
-    pStream->state = (struct mz_internal_state*) pDecomp;
-
-    tinfl_init(&pDecomp->m_decomp);
-    pDecomp->m_dict_ofs    = 0;
-    pDecomp->m_dict_avail  = 0;
-    pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
-    pDecomp->m_first_call  = 1;
-    pDecomp->m_has_flushed = 0;
-    pDecomp->m_window_bits = window_bits;
-
-    return MZ_OK;
-}
-
-int mz_inflateInit(mz_streamp pStream) { return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); }
-
-int mz_inflateReset(mz_streamp pStream) {
-    inflate_state* pDecomp;
-    if (!pStream)
-        return MZ_STREAM_ERROR;
-
-    pStream->data_type = 0;
-    pStream->adler     = 0;
-    pStream->msg       = NULL;
-    pStream->total_in  = 0;
-    pStream->total_out = 0;
-    pStream->reserved  = 0;
-
-    pDecomp = (inflate_state*) pStream->state;
-
-    tinfl_init(&pDecomp->m_decomp);
-    pDecomp->m_dict_ofs    = 0;
-    pDecomp->m_dict_avail  = 0;
-    pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
-    pDecomp->m_first_call  = 1;
-    pDecomp->m_has_flushed = 0;
-    /* pDecomp->m_window_bits = window_bits */;
-
-    return MZ_OK;
-}
-
-int mz_inflate(mz_streamp pStream, int flush) {
-    inflate_state* pState;
-    mz_uint        n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32;
-    size_t         in_bytes, out_bytes, orig_avail_in;
-    tinfl_status   status;
-
-    if ((!pStream) || (!pStream->state))
-        return MZ_STREAM_ERROR;
-    if (flush == MZ_PARTIAL_FLUSH)
-        flush = MZ_SYNC_FLUSH;
-    if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH))
-        return MZ_STREAM_ERROR;
-
-    pState = (inflate_state*) pStream->state;
-    if (pState->m_window_bits > 0)
-        decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
-    orig_avail_in = pStream->avail_in;
-
-    first_call           = pState->m_first_call;
-    pState->m_first_call = 0;
-    if (pState->m_last_status < 0)
-        return MZ_DATA_ERROR;
-
-    if (pState->m_has_flushed && (flush != MZ_FINISH))
-        return MZ_STREAM_ERROR;
-    pState->m_has_flushed |= (flush == MZ_FINISH);
-
-    if ((flush == MZ_FINISH) && (first_call))
-    {
-        /* MZ_FINISH on the first call implies that the input and output buffers are
-     * large enough to hold the entire compressed/decompressed file. */
-        decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
-        in_bytes  = pStream->avail_in;
-        out_bytes = pStream->avail_out;
-        status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out,
-                                  pStream->next_out, &out_bytes, decomp_flags);
-        pState->m_last_status = status;
-        pStream->next_in += (mz_uint) in_bytes;
-        pStream->avail_in -= (mz_uint) in_bytes;
-        pStream->total_in += (mz_uint) in_bytes;
-        pStream->adler = tinfl_get_adler32(&pState->m_decomp);
-        pStream->next_out += (mz_uint) out_bytes;
-        pStream->avail_out -= (mz_uint) out_bytes;
-        pStream->total_out += (mz_uint) out_bytes;
-
-        if (status < 0)
-            return MZ_DATA_ERROR;
-        else if (status != TINFL_STATUS_DONE)
-        {
-            pState->m_last_status = TINFL_STATUS_FAILED;
-            return MZ_BUF_ERROR;
-        }
-        return MZ_STREAM_END;
-    }
-    /* flush != MZ_FINISH then we must assume there's more input. */
-    if (flush != MZ_FINISH)
-        decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;
-
-    if (pState->m_dict_avail)
-    {
-        n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
-        memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
-        pStream->next_out += n;
-        pStream->avail_out -= n;
-        pStream->total_out += n;
-        pState->m_dict_avail -= n;
-        pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
-        return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail))
-               ? MZ_STREAM_END
-               : MZ_OK;
-    }
-
-    for (;;)
-    {
-        in_bytes  = pStream->avail_in;
-        out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;
-
-        status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict,
-                                  pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags);
-        pState->m_last_status = status;
-
-        pStream->next_in += (mz_uint) in_bytes;
-        pStream->avail_in -= (mz_uint) in_bytes;
-        pStream->total_in += (mz_uint) in_bytes;
-        pStream->adler = tinfl_get_adler32(&pState->m_decomp);
-
-        pState->m_dict_avail = (mz_uint) out_bytes;
-
-        n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
-        memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
-        pStream->next_out += n;
-        pStream->avail_out -= n;
-        pStream->total_out += n;
-        pState->m_dict_avail -= n;
-        pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
-
-        if (status < 0)
-            return MZ_DATA_ERROR; /* Stream is corrupted (there could be some
-                               uncompressed data left in the output dictionary -
-                               oh well). */
-        else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in))
-            return MZ_BUF_ERROR; /* Signal caller that we can't make forward progress
-                              without supplying more input or by setting flush
-                              to MZ_FINISH. */
-        else if (flush == MZ_FINISH)
-        {
-            /* The output buffer MUST be large to hold the remaining uncompressed data
-       * when flush==MZ_FINISH. */
-            if (status == TINFL_STATUS_DONE)
-                return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
-            /* status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's
-       * at least 1 more byte on the way. If there's no more room left in the
-       * output buffer then something is wrong. */
-            else if (!pStream->avail_out)
-                return MZ_BUF_ERROR;
-        }
-        else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out)
-                 || (pState->m_dict_avail))
-            break;
-    }
-
-    return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
-}
-
-int mz_inflateEnd(mz_streamp pStream) {
-    if (!pStream)
-        return MZ_STREAM_ERROR;
-    if (pStream->state)
-    {
-        pStream->zfree(pStream->opaque, pStream->state);
-        pStream->state = NULL;
-    }
-    return MZ_OK;
-}
-int mz_uncompress2(unsigned char*       pDest,
-                   mz_ulong*            pDest_len,
-                   const unsigned char* pSource,
-                   mz_ulong*            pSource_len) {
-    mz_stream stream;
-    int       status;
-    memset(&stream, 0, sizeof(stream));
-
-            #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__WATCOMC__)
-                /* In case mz_ulong is 64-bits (argh I hate longs). */
-            #else
-    if ((mz_uint64) (*pSource_len | *pDest_len) > 0xFFFFFFFFU)
-        return MZ_PARAM_ERROR;
-            #endif
-    stream.next_in   = pSource;
-    stream.avail_in  = (mz_uint32) *pSource_len;
-    stream.next_out  = pDest;
-    stream.avail_out = (mz_uint32) *pDest_len;
-
-    status = mz_inflateInit(&stream);
-    if (status != MZ_OK)
-        return status;
-
-    status       = mz_inflate(&stream, MZ_FINISH);
-    *pSource_len = *pSource_len - stream.avail_in;
-    if (status != MZ_STREAM_END)
-    {
-        mz_inflateEnd(&stream);
-        return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status;
-    }
-    *pDest_len = stream.total_out;
-
-    return mz_inflateEnd(&stream);
-}
-
-int mz_uncompress(unsigned char*       pDest,
-                  mz_ulong*            pDest_len,
-                  const unsigned char* pSource,
-                  mz_ulong             source_len) {
-    return mz_uncompress2(pDest, pDest_len, pSource, &source_len);
-}
-
-        #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/
-
-const char* mz_error(int err) {
-    static struct {
-        int         m_err;
-        const char* m_pDesc;
-    } s_error_descs[] = {{MZ_OK, ""},
-                         {MZ_STREAM_END, "stream end"},
-                         {MZ_NEED_DICT, "need dictionary"},
-                         {MZ_ERRNO, "file error"},
-                         {MZ_STREAM_ERROR, "stream error"},
-                         {MZ_DATA_ERROR, "data error"},
-                         {MZ_MEM_ERROR, "out of memory"},
-                         {MZ_BUF_ERROR, "buf error"},
-                         {MZ_VERSION_ERROR, "version error"},
-                         {MZ_PARAM_ERROR, "parameter error"}};
-    mz_uint i;
-    for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i)
-        if (s_error_descs[i].m_err == err)
-            return s_error_descs[i].m_pDesc;
-    return NULL;
-}
-
-    #endif /*MINIZ_NO_ZLIB_APIS */
-
-    #ifdef __cplusplus
-}
-    #endif
-
-/*
-  This is free and unencumbered software released into the public domain.
-
-  Anyone is free to copy, modify, publish, use, compile, sell, or
-  distribute this software, either in source code form or as a compiled
-  binary, for any purpose, commercial or non-commercial, and by any
-  means.
-
-  In jurisdictions that recognize copyright laws, the author or authors
-  of this software dedicate any and all copyright interest in the
-  software to the public domain. We make this dedication for the benefit
-  of the public at large and to the detriment of our heirs and
-  successors. We intend this dedication to be an overt act of
-  relinquishment in perpetuity of all present and future rights to this
-  software under copyright law.
-
-  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-  OTHER DEALINGS IN THE SOFTWARE.
-
-  For more information, please refer to <http://unlicense.org/>
-*/
-/**************************************************************************
- *
- * Copyright 2013-2014 RAD Game Tools and Valve Software
- * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- **************************************************************************/
-
-    #ifndef MINIZ_NO_DEFLATE_APIS
-
-        #ifdef __cplusplus
-extern "C" {
-        #endif
-
-/* ------------------- Low-level Compression (independent from all decompression
- * API's) */
-
-/* Purposely making these tables static for faster init and thread safety. */
-static const mz_uint16 s_tdefl_len_sym[256] = {
-  257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269,
-  269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273,
-  273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276,
-  276, 276, 276, 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
-  277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
-  278, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280,
-  280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281,
-  281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
-  281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
-  282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
-  282, 282, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
-  283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, 284, 284, 284,
-  284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
-  284, 284, 284, 284, 284, 284, 284, 284, 285};
-
-static const mz_uint8 s_tdefl_len_extra[256] = {
-  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0};
-
-static const mz_uint8 s_tdefl_small_dist_sym[512] = {
-  0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,
-  9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12,
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-  13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
-  17, 17, 17, 17, 17, 17, 17, 17};
-
-static const mz_uint8 s_tdefl_small_dist_extra[512] = {
-  0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
-
-static const mz_uint8 s_tdefl_large_dist_sym[128] = {
-  0,  0,  18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24,
-  24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-  26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28,
-  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-  28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-  29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29};
-
-static const mz_uint8 s_tdefl_large_dist_extra[128] = {
-  0,  0,  8,  8,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
-  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13,
-  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13};
-
-/* Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted
- * values. */
-typedef struct {
-    mz_uint16 m_key, m_sym_index;
-} tdefl_sym_freq;
-static tdefl_sym_freq*
-tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) {
-    mz_uint32       total_passes = 2, pass_shift, pass, i, hist[256 * 2];
-    tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
-    MZ_CLEAR_ARR(hist);
-    for (i = 0; i < num_syms; i++)
-    {
-        mz_uint freq = pSyms0[i].m_key;
-        hist[freq & 0xFF]++;
-        hist[256 + ((freq >> 8) & 0xFF)]++;
-    }
-    while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
-        total_passes--;
-    for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
-    {
-        const mz_uint32* pHist = &hist[pass << 8];
-        mz_uint          offsets[256], cur_ofs = 0;
-        for (i = 0; i < 256; i++)
-        {
-            offsets[i] = cur_ofs;
-            cur_ofs += pHist[i];
-        }
-        for (i = 0; i < num_syms; i++)
-            pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
-        {
-            tdefl_sym_freq* t = pCur_syms;
-            pCur_syms         = pNew_syms;
-            pNew_syms         = t;
-        }
-    }
-    return pCur_syms;
-}
-
-/* tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat,
- * alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. */
-static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq* A, int n) {
-    int root, leaf, next, avbl, used, dpth;
-    if (n == 0)
-        return;
-    else if (n == 1)
-    {
-        A[0].m_key = 1;
-        return;
-    }
-    A[0].m_key += A[1].m_key;
-    root = 0;
-    leaf = 2;
-    for (next = 1; next < n - 1; next++)
-    {
-        if (leaf >= n || A[root].m_key < A[leaf].m_key)
-        {
-            A[next].m_key   = A[root].m_key;
-            A[root++].m_key = (mz_uint16) next;
-        }
-        else
-            A[next].m_key = A[leaf++].m_key;
-        if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key))
-        {
-            A[next].m_key   = (mz_uint16) (A[next].m_key + A[root].m_key);
-            A[root++].m_key = (mz_uint16) next;
-        }
-        else
-            A[next].m_key = (mz_uint16) (A[next].m_key + A[leaf++].m_key);
-    }
-    A[n - 2].m_key = 0;
-    for (next = n - 3; next >= 0; next--)
-        A[next].m_key = A[A[next].m_key].m_key + 1;
-    avbl = 1;
-    used = dpth = 0;
-    root        = n - 2;
-    next        = n - 1;
-    while (avbl > 0)
-    {
-        while (root >= 0 && (int) A[root].m_key == dpth)
-        {
-            used++;
-            root--;
-        }
-        while (avbl > used)
-        {
-            A[next--].m_key = (mz_uint16) (dpth);
-            avbl--;
-        }
-        avbl = 2 * used;
-        dpth++;
-        used = 0;
-    }
-}
-
-/* Limits canonical Huffman code table's max code size. */
-enum {
-    TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32
-};
-static void
-tdefl_huffman_enforce_max_code_size(int* pNum_codes, int code_list_len, int max_code_size) {
-    int       i;
-    mz_uint32 total = 0;
-    if (code_list_len <= 1)
-        return;
-    for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++)
-        pNum_codes[max_code_size] += pNum_codes[i];
-    for (i = max_code_size; i > 0; i--)
-        total += (((mz_uint32) pNum_codes[i]) << (max_code_size - i));
-    while (total != (1UL << max_code_size))
-    {
-        pNum_codes[max_code_size]--;
-        for (i = max_code_size - 1; i > 0; i--)
-            if (pNum_codes[i])
-            {
-                pNum_codes[i]--;
-                pNum_codes[i + 1] += 2;
-                break;
-            }
-        total--;
-    }
-}
-
-static void tdefl_optimize_huffman_table(
-  tdefl_compressor* d, int table_num, int table_len, int code_size_limit, int static_table) {
-    int     i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE];
-    mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1];
-    MZ_CLEAR_ARR(num_codes);
-    if (static_table)
-    {
-        for (i = 0; i < table_len; i++)
-            num_codes[d->m_huff_code_sizes[table_num][i]]++;
-    }
-    else
-    {
-        tdefl_sym_freq   syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms;
-        int              num_used_syms = 0;
-        const mz_uint16* pSym_count    = &d->m_huff_count[table_num][0];
-        for (i = 0; i < table_len; i++)
-            if (pSym_count[i])
-            {
-                syms0[num_used_syms].m_key         = (mz_uint16) pSym_count[i];
-                syms0[num_used_syms++].m_sym_index = (mz_uint16) i;
-            }
-
-        pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1);
-        tdefl_calculate_minimum_redundancy(pSyms, num_used_syms);
-
-        for (i = 0; i < num_used_syms; i++)
-            num_codes[pSyms[i].m_key]++;
-
-        tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit);
-
-        MZ_CLEAR_ARR(d->m_huff_code_sizes[table_num]);
-        MZ_CLEAR_ARR(d->m_huff_codes[table_num]);
-        for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
-            for (l = num_codes[i]; l > 0; l--)
-                d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8) (i);
-    }
-
-    next_code[1] = 0;
-    for (j = 0, i = 2; i <= code_size_limit; i++)
-        next_code[i] = j = ((j + num_codes[i - 1]) << 1);
-
-    for (i = 0; i < table_len; i++)
-    {
-        mz_uint rev_code = 0, code, code_size;
-        if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0)
-            continue;
-        code = next_code[code_size]++;
-        for (l = code_size; l > 0; l--, code >>= 1)
-            rev_code = (rev_code << 1) | (code & 1);
-        d->m_huff_codes[table_num][i] = (mz_uint16) rev_code;
-    }
-}
-
-        #define TDEFL_PUT_BITS(b, l) \
-            do \
-            { \
-                mz_uint bits = b; \
-                mz_uint len  = l; \
-                MZ_ASSERT(bits <= ((1U << len) - 1U)); \
-                d->m_bit_buffer |= (bits << d->m_bits_in); \
-                d->m_bits_in += len; \
-                while (d->m_bits_in >= 8) \
-                { \
-                    if (d->m_pOutput_buf < d->m_pOutput_buf_end) \
-                        *d->m_pOutput_buf++ = (mz_uint8) (d->m_bit_buffer); \
-                    d->m_bit_buffer >>= 8; \
-                    d->m_bits_in -= 8; \
-                } \
-            } \
-            MZ_MACRO_END
-
-        #define TDEFL_RLE_PREV_CODE_SIZE() \
-            { \
-                if (rle_repeat_count) \
-                { \
-                    if (rle_repeat_count < 3) \
-                    { \
-                        d->m_huff_count[2][prev_code_size] = \
-                          (mz_uint16) (d->m_huff_count[2][prev_code_size] + rle_repeat_count); \
-                        while (rle_repeat_count--) \
-                            packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \
-                    } \
-                    else \
-                    { \
-                        d->m_huff_count[2][16] = (mz_uint16) (d->m_huff_count[2][16] + 1); \
-                        packed_code_sizes[num_packed_code_sizes++] = 16; \
-                        packed_code_sizes[num_packed_code_sizes++] = \
-                          (mz_uint8) (rle_repeat_count - 3); \
-                    } \
-                    rle_repeat_count = 0; \
-                } \
-            }
-
-        #define TDEFL_RLE_ZERO_CODE_SIZE() \
-            { \
-                if (rle_z_count) \
-                { \
-                    if (rle_z_count < 3) \
-                    { \
-                        d->m_huff_count[2][0] = (mz_uint16) (d->m_huff_count[2][0] + rle_z_count); \
-                        while (rle_z_count--) \
-                            packed_code_sizes[num_packed_code_sizes++] = 0; \
-                    } \
-                    else if (rle_z_count <= 10) \
-                    { \
-                        d->m_huff_count[2][17] = (mz_uint16) (d->m_huff_count[2][17] + 1); \
-                        packed_code_sizes[num_packed_code_sizes++] = 17; \
-                        packed_code_sizes[num_packed_code_sizes++] = (mz_uint8) (rle_z_count - 3); \
-                    } \
-                    else \
-                    { \
-                        d->m_huff_count[2][18] = (mz_uint16) (d->m_huff_count[2][18] + 1); \
-                        packed_code_sizes[num_packed_code_sizes++] = 18; \
-                        packed_code_sizes[num_packed_code_sizes++] = \
-                          (mz_uint8) (rle_z_count - 11); \
-                    } \
-                    rle_z_count = 0; \
-                } \
-            }
-
-static const mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = {16, 17, 18, 0, 8,  7, 9,  6, 10, 5,
-                                                                 11, 4,  12, 3, 13, 2, 14, 1, 15};
-
-static void tdefl_start_dynamic_block(tdefl_compressor* d) {
-    int     num_lit_codes, num_dist_codes, num_bit_lengths;
-    mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count,
-      packed_code_sizes_index;
-    mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1],
-      packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF;
-
-    d->m_huff_count[0][256] = 1;
-
-    tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE);
-    tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE);
-
-    for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--)
-        if (d->m_huff_code_sizes[0][num_lit_codes - 1])
-            break;
-    for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--)
-        if (d->m_huff_code_sizes[1][num_dist_codes - 1])
-            break;
-
-    memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
-    memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes);
-    total_code_sizes_to_pack = num_lit_codes + num_dist_codes;
-    num_packed_code_sizes    = 0;
-    rle_z_count              = 0;
-    rle_repeat_count         = 0;
-
-    memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2);
-    for (i = 0; i < total_code_sizes_to_pack; i++)
-    {
-        mz_uint8 code_size = code_sizes_to_pack[i];
-        if (!code_size)
-        {
-            TDEFL_RLE_PREV_CODE_SIZE();
-            if (++rle_z_count == 138)
-            {
-                TDEFL_RLE_ZERO_CODE_SIZE();
-            }
-        }
-        else
-        {
-            TDEFL_RLE_ZERO_CODE_SIZE();
-            if (code_size != prev_code_size)
-            {
-                TDEFL_RLE_PREV_CODE_SIZE();
-                d->m_huff_count[2][code_size] = (mz_uint16) (d->m_huff_count[2][code_size] + 1);
-                packed_code_sizes[num_packed_code_sizes++] = code_size;
-            }
-            else if (++rle_repeat_count == 6)
-            {
-                TDEFL_RLE_PREV_CODE_SIZE();
-            }
-        }
-        prev_code_size = code_size;
-    }
-    if (rle_repeat_count)
-    {
-        TDEFL_RLE_PREV_CODE_SIZE();
-    }
-    else
-    {
-        TDEFL_RLE_ZERO_CODE_SIZE();
-    }
-
-    tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE);
-
-    TDEFL_PUT_BITS(2, 2);
-
-    TDEFL_PUT_BITS(num_lit_codes - 257, 5);
-    TDEFL_PUT_BITS(num_dist_codes - 1, 5);
-
-    for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--)
-        if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]])
-            break;
-    num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1));
-    TDEFL_PUT_BITS(num_bit_lengths - 4, 4);
-    for (i = 0; (int) i < num_bit_lengths; i++)
-        TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3);
-
-    for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes;)
-    {
-        mz_uint code = packed_code_sizes[packed_code_sizes_index++];
-        MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2);
-        TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
-        if (code >= 16)
-            TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]);
-    }
-}
-
-static void tdefl_start_static_block(tdefl_compressor* d) {
-    mz_uint   i;
-    mz_uint8* p = &d->m_huff_code_sizes[0][0];
-
-    for (i = 0; i <= 143; ++i)
-        *p++ = 8;
-    for (; i <= 255; ++i)
-        *p++ = 9;
-    for (; i <= 279; ++i)
-        *p++ = 7;
-    for (; i <= 287; ++i)
-        *p++ = 8;
-
-    memset(d->m_huff_code_sizes[1], 5, 32);
-
-    tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE);
-    tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE);
-
-    TDEFL_PUT_BITS(1, 2);
-}
-
-static const mz_uint mz_bitmasks[17] = {0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F,
-                                        0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF,
-                                        0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF};
-
-        #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
-static mz_bool tdefl_compress_lz_codes(tdefl_compressor* d) {
-    mz_uint   flags;
-    mz_uint8* pLZ_codes;
-    mz_uint8* pOutput_buf      = d->m_pOutput_buf;
-    mz_uint8* pLZ_code_buf_end = d->m_pLZ_code_buf;
-    mz_uint64 bit_buffer       = d->m_bit_buffer;
-    mz_uint   bits_in          = d->m_bits_in;
-
-            #define TDEFL_PUT_BITS_FAST(b, l) \
-                { \
-                    bit_buffer |= (((mz_uint64) (b)) << bits_in); \
-                    bits_in += (l); \
-                }
-
-    flags = 1;
-    for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1)
-    {
-        if (flags == 1)
-            flags = *pLZ_codes++ | 0x100;
-
-        if (flags & 1)
-        {
-            mz_uint s0, s1, n0, n1, sym, num_extra_bits;
-            mz_uint match_len  = pLZ_codes[0];
-            mz_uint match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8));
-            pLZ_codes += 3;
-
-            MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-            TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]],
-                                d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-            TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]],
-                                s_tdefl_len_extra[match_len]);
-
-            /* This sequence coaxes MSVC into using cmov's vs. jmp's. */
-            s0             = s_tdefl_small_dist_sym[match_dist & 511];
-            n0             = s_tdefl_small_dist_extra[match_dist & 511];
-            s1             = s_tdefl_large_dist_sym[match_dist >> 8];
-            n1             = s_tdefl_large_dist_extra[match_dist >> 8];
-            sym            = (match_dist < 512) ? s0 : s1;
-            num_extra_bits = (match_dist < 512) ? n0 : n1;
-
-            MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
-            TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
-            TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
-        }
-        else
-        {
-            mz_uint lit = *pLZ_codes++;
-            MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-            TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
-
-            if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
-            {
-                flags >>= 1;
-                lit = *pLZ_codes++;
-                MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-                TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
-
-                if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
-                {
-                    flags >>= 1;
-                    lit = *pLZ_codes++;
-                    MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-                    TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
-                }
-            }
-        }
-
-        if (pOutput_buf >= d->m_pOutput_buf_end)
-            return MZ_FALSE;
-
-        memcpy(pOutput_buf, &bit_buffer, sizeof(mz_uint64));
-        pOutput_buf += (bits_in >> 3);
-        bit_buffer >>= (bits_in & ~7);
-        bits_in &= 7;
-    }
-
-            #undef TDEFL_PUT_BITS_FAST
-
-    d->m_pOutput_buf = pOutput_buf;
-    d->m_bits_in     = 0;
-    d->m_bit_buffer  = 0;
-
-    while (bits_in)
-    {
-        mz_uint32 n = MZ_MIN(bits_in, 16);
-        TDEFL_PUT_BITS((mz_uint) bit_buffer & mz_bitmasks[n], n);
-        bit_buffer >>= n;
-        bits_in -= n;
-    }
-
-    TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
-
-    return (d->m_pOutput_buf < d->m_pOutput_buf_end);
-}
-        #else
-static mz_bool tdefl_compress_lz_codes(tdefl_compressor* d) {
-    mz_uint   flags;
-    mz_uint8* pLZ_codes;
-
-    flags = 1;
-    for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1)
-    {
-        if (flags == 1)
-            flags = *pLZ_codes++ | 0x100;
-        if (flags & 1)
-        {
-            mz_uint sym, num_extra_bits;
-            mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8));
-            pLZ_codes += 3;
-
-            MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-            TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]],
-                           d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
-            TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]],
-                           s_tdefl_len_extra[match_len]);
-
-            if (match_dist < 512)
-            {
-                sym            = s_tdefl_small_dist_sym[match_dist];
-                num_extra_bits = s_tdefl_small_dist_extra[match_dist];
-            }
-            else
-            {
-                sym            = s_tdefl_large_dist_sym[match_dist >> 8];
-                num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
-            }
-            MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
-            TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
-            TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
-        }
-        else
-        {
-            mz_uint lit = *pLZ_codes++;
-            MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
-            TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
-        }
-    }
-
-    TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
-
-    return (d->m_pOutput_buf < d->m_pOutput_buf_end);
-}
-        #endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN &&       \
-          MINIZ_HAS_64BIT_REGISTERS */
-
-static mz_bool tdefl_compress_block(tdefl_compressor* d, mz_bool static_block) {
-    if (static_block)
-        tdefl_start_static_block(d);
-    else
-        tdefl_start_dynamic_block(d);
-    return tdefl_compress_lz_codes(d);
-}
-
-static const mz_uint s_tdefl_num_probes[11] = {0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500};
-
-static int tdefl_flush_block(tdefl_compressor* d, int flush) {
-    mz_uint   saved_bit_buf, saved_bits_in;
-    mz_uint8* pSaved_output_buf;
-    mz_bool   comp_block_succeeded = MZ_FALSE;
-    int       n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0)
-                        && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
-    mz_uint8* pOutput_buf_start =
-      ((d->m_pPut_buf_func == NULL)
-       && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE))
-        ? ((mz_uint8*) d->m_pOut_buf + d->m_out_buf_ofs)
-        : d->m_output_buf;
-
-    d->m_pOutput_buf     = pOutput_buf_start;
-    d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;
-
-    MZ_ASSERT(!d->m_output_flush_remaining);
-    d->m_output_flush_ofs       = 0;
-    d->m_output_flush_remaining = 0;
-
-    *d->m_pLZ_flags = (mz_uint8) (*d->m_pLZ_flags >> d->m_num_flags_left);
-    d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);
-
-    if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index))
-    {
-        const mz_uint8 cmf = 0x78;
-        mz_uint8       flg, flevel = 3;
-        mz_uint        header, i, mz_un = sizeof(s_tdefl_num_probes) / sizeof(mz_uint);
-
-        /* Determine compression level by reversing the process in
-     * tdefl_create_comp_flags_from_zip_params() */
-        for (i = 0; i < mz_un; i++)
-            if (s_tdefl_num_probes[i] == (d->m_flags & 0xFFF))
-                break;
-
-        if (i < 2)
-            flevel = 0;
-        else if (i < 6)
-            flevel = 1;
-        else if (i == 6)
-            flevel = 2;
-
-        header = cmf << 8 | (flevel << 6);
-        header += 31 - (header % 31);
-        flg = header & 0xFF;
-
-        TDEFL_PUT_BITS(cmf, 8);
-        TDEFL_PUT_BITS(flg, 8);
-    }
-
-    TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1);
-
-    pSaved_output_buf = d->m_pOutput_buf;
-    saved_bit_buf     = d->m_bit_buffer;
-    saved_bits_in     = d->m_bits_in;
-
-    if (!use_raw_block)
-        comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS)
-                                                         || (d->m_total_lz_bytes < 48));
-
-    /* If the block gets expanded, forget the current contents of the output
-   * buffer and send a raw block instead. */
-    if (((use_raw_block)
-         || ((d->m_total_lz_bytes)
-             && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes)))
-        && ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size))
-    {
-        mz_uint i;
-        d->m_pOutput_buf = pSaved_output_buf;
-        d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
-        TDEFL_PUT_BITS(0, 2);
-        if (d->m_bits_in)
-        {
-            TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-        }
-        for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF)
-        {
-            TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
-        }
-        for (i = 0; i < d->m_total_lz_bytes; ++i)
-        {
-            TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8);
-        }
-    }
-    /* Check for the extremely unlikely (if not impossible) case of the compressed
-     block not fitting into the output buffer when using dynamic codes. */
-    else if (!comp_block_succeeded)
-    {
-        d->m_pOutput_buf = pSaved_output_buf;
-        d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
-        tdefl_compress_block(d, MZ_TRUE);
-    }
-
-    if (flush)
-    {
-        if (flush == TDEFL_FINISH)
-        {
-            if (d->m_bits_in)
-            {
-                TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-            }
-            if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER)
-            {
-                mz_uint i, a = d->m_adler32;
-                for (i = 0; i < 4; i++)
-                {
-                    TDEFL_PUT_BITS((a >> 24) & 0xFF, 8);
-                    a <<= 8;
-                }
-            }
-        }
-        else
-        {
-            mz_uint i, z = 0;
-            TDEFL_PUT_BITS(0, 3);
-            if (d->m_bits_in)
-            {
-                TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
-            }
-            for (i = 2; i; --i, z ^= 0xFFFF)
-            {
-                TDEFL_PUT_BITS(z & 0xFFFF, 16);
-            }
-        }
-    }
-
-    MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);
-
-    memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
-    memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
-
-    d->m_pLZ_code_buf   = d->m_lz_code_buf + 1;
-    d->m_pLZ_flags      = d->m_lz_code_buf;
-    d->m_num_flags_left = 8;
-    d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes;
-    d->m_total_lz_bytes = 0;
-    d->m_block_index++;
-
-    if ((n = (int) (d->m_pOutput_buf - pOutput_buf_start)) != 0)
-    {
-        if (d->m_pPut_buf_func)
-        {
-            *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8*) d->m_pIn_buf;
-            if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
-                return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED);
-        }
-        else if (pOutput_buf_start == d->m_output_buf)
-        {
-            int bytes_to_copy =
-              (int) MZ_MIN((size_t) n, (size_t) (*d->m_pOut_buf_size - d->m_out_buf_ofs));
-            memcpy((mz_uint8*) d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy);
-            d->m_out_buf_ofs += bytes_to_copy;
-            if ((n -= bytes_to_copy) != 0)
-            {
-                d->m_output_flush_ofs       = bytes_to_copy;
-                d->m_output_flush_remaining = n;
-            }
-        }
-        else
-        {
-            d->m_out_buf_ofs += n;
-        }
-    }
-
-    return d->m_output_flush_remaining;
-}
-
-        #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
-            #ifdef MINIZ_UNALIGNED_USE_MEMCPY
-static mz_uint16 TDEFL_READ_UNALIGNED_WORD(const mz_uint8* p) {
-    mz_uint16 ret;
-    memcpy(&ret, p, sizeof(mz_uint16));
-    return ret;
-}
-static mz_uint16 TDEFL_READ_UNALIGNED_WORD2(const mz_uint16* p) {
-    mz_uint16 ret;
-    memcpy(&ret, p, sizeof(mz_uint16));
-    return ret;
-}
-            #else
-                #define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*) (p)
-                #define TDEFL_READ_UNALIGNED_WORD2(p) *(const mz_uint16*) (p)
-            #endif
-static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor* d,
-                                            mz_uint           lookahead_pos,
-                                            mz_uint           max_dist,
-                                            mz_uint           max_match_len,
-                                            mz_uint*          pMatch_dist,
-                                            mz_uint*          pMatch_len) {
-    mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len,
-                  probe_pos          = pos, next_probe_pos, probe_len;
-    mz_uint          num_probes_left = d->m_max_probes[match_len >= 32];
-    const mz_uint16 *s               = (const mz_uint16*) (d->m_dict + pos), *p, *q;
-    mz_uint16        c01             = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]),
-              s01                    = TDEFL_READ_UNALIGNED_WORD2(s);
-    MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
-    if (max_match_len <= match_len)
-        return;
-    for (;;)
-    {
-        for (;;)
-        {
-            if (--num_probes_left == 0)
-                return;
-            #define TDEFL_PROBE \
-                next_probe_pos = d->m_next[probe_pos]; \
-                if ((!next_probe_pos) \
-                    || ((dist = (mz_uint16) (lookahead_pos - next_probe_pos)) > max_dist)) \
-                    return; \
-                probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
-                if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \
-                    break;
-            TDEFL_PROBE;
-            TDEFL_PROBE;
-            TDEFL_PROBE;
-        }
-        if (!dist)
-            break;
-        q = (const mz_uint16*) (d->m_dict + probe_pos);
-        if (TDEFL_READ_UNALIGNED_WORD2(q) != s01)
-            continue;
-        p         = s;
-        probe_len = 32;
-        do
-        {
-        } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q))
-                 && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q))
-                 && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q))
-                 && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q))
-                 && (--probe_len > 0));
-        if (!probe_len)
-        {
-            *pMatch_dist = dist;
-            *pMatch_len  = MZ_MIN(max_match_len, (mz_uint) TDEFL_MAX_MATCH_LEN);
-            break;
-        }
-        else if ((probe_len = ((mz_uint) (p - s) * 2)
-                            + (mz_uint) (*(const mz_uint8*) p == *(const mz_uint8*) q))
-                 > match_len)
-        {
-            *pMatch_dist = dist;
-            if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len)
-                break;
-            c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]);
-        }
-    }
-}
-        #else
-static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor* d,
-                                            mz_uint           lookahead_pos,
-                                            mz_uint           max_dist,
-                                            mz_uint           max_match_len,
-                                            mz_uint*          pMatch_dist,
-                                            mz_uint*          pMatch_len) {
-    mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len,
-                  probe_pos         = pos, next_probe_pos, probe_len;
-    mz_uint         num_probes_left = d->m_max_probes[match_len >= 32];
-    const mz_uint8 *s               = d->m_dict + pos, *p, *q;
-    mz_uint8        c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
-    MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
-    if (max_match_len <= match_len)
-        return;
-    for (;;)
-    {
-        for (;;)
-        {
-            if (--num_probes_left == 0)
-                return;
-            #define TDEFL_PROBE \
-                next_probe_pos = d->m_next[probe_pos]; \
-                if ((!next_probe_pos) \
-                    || ((dist = (mz_uint16) (lookahead_pos - next_probe_pos)) > max_dist)) \
-                    return; \
-                probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
-                if ((d->m_dict[probe_pos + match_len] == c0) \
-                    && (d->m_dict[probe_pos + match_len - 1] == c1)) \
-                    break;
-            TDEFL_PROBE;
-            TDEFL_PROBE;
-            TDEFL_PROBE;
-        }
-        if (!dist)
-            break;
-        p = s;
-        q = d->m_dict + probe_pos;
-        for (probe_len = 0; probe_len < max_match_len; probe_len++)
-            if (*p++ != *q++)
-                break;
-        if (probe_len > match_len)
-        {
-            *pMatch_dist = dist;
-            if ((*pMatch_len = match_len = probe_len) == max_match_len)
-                return;
-            c0 = d->m_dict[pos + match_len];
-            c1 = d->m_dict[pos + match_len - 1];
-        }
-    }
-}
-        #endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES */
-
-        #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-            #ifdef MINIZ_UNALIGNED_USE_MEMCPY
-static mz_uint32 TDEFL_READ_UNALIGNED_WORD32(const mz_uint8* p) {
-    mz_uint32 ret;
-    memcpy(&ret, p, sizeof(mz_uint32));
-    return ret;
-}
-            #else
-                #define TDEFL_READ_UNALIGNED_WORD32(p) *(const mz_uint32*) (p)
-            #endif
-static mz_bool tdefl_compress_fast(tdefl_compressor* d) {
-    /* Faster, minimally featured LZRW1-style match+parse loop with better
-   * register utilization. Intended for applications where raw throughput is
-   * valued more highly than ratio. */
-    mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size,
-            dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes,
-            num_flags_left = d->m_num_flags_left;
-    mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
-    mz_uint   cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
-
-    while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size)))
-    {
-        const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
-        mz_uint       dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
-        mz_uint       num_bytes_to_process =
-          (mz_uint) MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size);
-        d->m_src_buf_left -= num_bytes_to_process;
-        lookahead_size += num_bytes_to_process;
-
-        while (num_bytes_to_process)
-        {
-            mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process);
-            memcpy(d->m_dict + dst_pos, d->m_pSrc, n);
-            if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-                memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc,
-                       MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos));
-            d->m_pSrc += n;
-            dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK;
-            num_bytes_to_process -= n;
-        }
-
-        dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size);
-        if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE))
-            break;
-
-        while (lookahead_size >= 4)
-        {
-            mz_uint   cur_match_dist, cur_match_len = 1;
-            mz_uint8* pCur_dict     = d->m_dict + cur_pos;
-            mz_uint   first_trigram = TDEFL_READ_UNALIGNED_WORD32(pCur_dict) & 0xFFFFFF;
-            mz_uint   hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8))))
-                         & TDEFL_LEVEL1_HASH_SIZE_MASK;
-            mz_uint probe_pos = d->m_hash[hash];
-            d->m_hash[hash]   = (mz_uint16) lookahead_pos;
-
-            if (((cur_match_dist = (mz_uint16) (lookahead_pos - probe_pos)) <= dict_size)
-                && ((TDEFL_READ_UNALIGNED_WORD32(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK))
-                     & 0xFFFFFF)
-                    == first_trigram))
-            {
-                const mz_uint16* p         = (const mz_uint16*) pCur_dict;
-                const mz_uint16* q         = (const mz_uint16*) (d->m_dict + probe_pos);
-                mz_uint32        probe_len = 32;
-                do
-                {
-                } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q))
-                         && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q))
-                         && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q))
-                         && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q))
-                         && (--probe_len > 0));
-                cur_match_len = ((mz_uint) (p - (const mz_uint16*) pCur_dict) * 2)
-                              + (mz_uint) (*(const mz_uint8*) p == *(const mz_uint8*) q);
-                if (!probe_len)
-                    cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;
-
-                if ((cur_match_len < TDEFL_MIN_MATCH_LEN)
-                    || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)))
-                {
-                    cur_match_len   = 1;
-                    *pLZ_code_buf++ = (mz_uint8) first_trigram;
-                    *pLZ_flags      = (mz_uint8) (*pLZ_flags >> 1);
-                    d->m_huff_count[0][(mz_uint8) first_trigram]++;
-                }
-                else
-                {
-                    mz_uint32 s0, s1;
-                    cur_match_len = MZ_MIN(cur_match_len, lookahead_size);
-
-                    MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1)
-                              && (cur_match_dist <= TDEFL_LZ_DICT_SIZE));
-
-                    cur_match_dist--;
-
-                    pLZ_code_buf[0] = (mz_uint8) (cur_match_len - TDEFL_MIN_MATCH_LEN);
-            #ifdef MINIZ_UNALIGNED_USE_MEMCPY
-                    memcpy(&pLZ_code_buf[1], &cur_match_dist, sizeof(cur_match_dist));
-            #else
-                    *(mz_uint16*) (&pLZ_code_buf[1]) = (mz_uint16) cur_match_dist;
-            #endif
-                    pLZ_code_buf += 3;
-                    *pLZ_flags = (mz_uint8) ((*pLZ_flags >> 1) | 0x80);
-
-                    s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
-                    s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
-                    d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++;
-
-                    d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++;
-                }
-            }
-            else
-            {
-                *pLZ_code_buf++ = (mz_uint8) first_trigram;
-                *pLZ_flags      = (mz_uint8) (*pLZ_flags >> 1);
-                d->m_huff_count[0][(mz_uint8) first_trigram]++;
-            }
-
-            if (--num_flags_left == 0)
-            {
-                num_flags_left = 8;
-                pLZ_flags      = pLZ_code_buf++;
-            }
-
-            total_lz_bytes += cur_match_len;
-            lookahead_pos += cur_match_len;
-            dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint) TDEFL_LZ_DICT_SIZE);
-            cur_pos   = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
-            MZ_ASSERT(lookahead_size >= cur_match_len);
-            lookahead_size -= cur_match_len;
-
-            if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
-            {
-                int n;
-                d->m_lookahead_pos  = lookahead_pos;
-                d->m_lookahead_size = lookahead_size;
-                d->m_dict_size      = dict_size;
-                d->m_total_lz_bytes = total_lz_bytes;
-                d->m_pLZ_code_buf   = pLZ_code_buf;
-                d->m_pLZ_flags      = pLZ_flags;
-                d->m_num_flags_left = num_flags_left;
-                if ((n = tdefl_flush_block(d, 0)) != 0)
-                    return (n < 0) ? MZ_FALSE : MZ_TRUE;
-                total_lz_bytes = d->m_total_lz_bytes;
-                pLZ_code_buf   = d->m_pLZ_code_buf;
-                pLZ_flags      = d->m_pLZ_flags;
-                num_flags_left = d->m_num_flags_left;
-            }
-        }
-
-        while (lookahead_size)
-        {
-            mz_uint8 lit = d->m_dict[cur_pos];
-
-            total_lz_bytes++;
-            *pLZ_code_buf++ = lit;
-            *pLZ_flags      = (mz_uint8) (*pLZ_flags >> 1);
-            if (--num_flags_left == 0)
-            {
-                num_flags_left = 8;
-                pLZ_flags      = pLZ_code_buf++;
-            }
-
-            d->m_huff_count[0][lit]++;
-
-            lookahead_pos++;
-            dict_size = MZ_MIN(dict_size + 1, (mz_uint) TDEFL_LZ_DICT_SIZE);
-            cur_pos   = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
-            lookahead_size--;
-
-            if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
-            {
-                int n;
-                d->m_lookahead_pos  = lookahead_pos;
-                d->m_lookahead_size = lookahead_size;
-                d->m_dict_size      = dict_size;
-                d->m_total_lz_bytes = total_lz_bytes;
-                d->m_pLZ_code_buf   = pLZ_code_buf;
-                d->m_pLZ_flags      = pLZ_flags;
-                d->m_num_flags_left = num_flags_left;
-                if ((n = tdefl_flush_block(d, 0)) != 0)
-                    return (n < 0) ? MZ_FALSE : MZ_TRUE;
-                total_lz_bytes = d->m_total_lz_bytes;
-                pLZ_code_buf   = d->m_pLZ_code_buf;
-                pLZ_flags      = d->m_pLZ_flags;
-                num_flags_left = d->m_num_flags_left;
-            }
-        }
-    }
-
-    d->m_lookahead_pos  = lookahead_pos;
-    d->m_lookahead_size = lookahead_size;
-    d->m_dict_size      = dict_size;
-    d->m_total_lz_bytes = total_lz_bytes;
-    d->m_pLZ_code_buf   = pLZ_code_buf;
-    d->m_pLZ_flags      = pLZ_flags;
-    d->m_num_flags_left = num_flags_left;
-    return MZ_TRUE;
-}
-        #endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */
-
-static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor* d, mz_uint8 lit) {
-    d->m_total_lz_bytes++;
-    *d->m_pLZ_code_buf++ = lit;
-    *d->m_pLZ_flags      = (mz_uint8) (*d->m_pLZ_flags >> 1);
-    if (--d->m_num_flags_left == 0)
-    {
-        d->m_num_flags_left = 8;
-        d->m_pLZ_flags      = d->m_pLZ_code_buf++;
-    }
-    d->m_huff_count[0][lit]++;
-}
-
-static MZ_FORCEINLINE void
-tdefl_record_match(tdefl_compressor* d, mz_uint match_len, mz_uint match_dist) {
-    mz_uint32 s0, s1;
-
-    MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1)
-              && (match_dist <= TDEFL_LZ_DICT_SIZE));
-
-    d->m_total_lz_bytes += match_len;
-
-    d->m_pLZ_code_buf[0] = (mz_uint8) (match_len - TDEFL_MIN_MATCH_LEN);
-
-    match_dist -= 1;
-    d->m_pLZ_code_buf[1] = (mz_uint8) (match_dist & 0xFF);
-    d->m_pLZ_code_buf[2] = (mz_uint8) (match_dist >> 8);
-    d->m_pLZ_code_buf += 3;
-
-    *d->m_pLZ_flags = (mz_uint8) ((*d->m_pLZ_flags >> 1) | 0x80);
-    if (--d->m_num_flags_left == 0)
-    {
-        d->m_num_flags_left = 8;
-        d->m_pLZ_flags      = d->m_pLZ_code_buf++;
-    }
-
-    s0 = s_tdefl_small_dist_sym[match_dist & 511];
-    s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127];
-    d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
-    d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
-}
-
-static mz_bool tdefl_compress_normal(tdefl_compressor* d) {
-    const mz_uint8* pSrc         = d->m_pSrc;
-    size_t          src_buf_left = d->m_src_buf_left;
-    tdefl_flush     flush        = d->m_flush;
-
-    while ((src_buf_left) || ((flush) && (d->m_lookahead_size)))
-    {
-        mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
-        /* Update dictionary and hash chains. Keeps the lookahead size equal to
-     * TDEFL_MAX_MATCH_LEN. */
-        if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1))
-        {
-            mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK,
-                    ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
-            mz_uint hash    = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT)
-                         ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK];
-            mz_uint num_bytes_to_process =
-              (mz_uint) MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size);
-            const mz_uint8* pSrc_end = pSrc ? pSrc + num_bytes_to_process : NULL;
-            src_buf_left -= num_bytes_to_process;
-            d->m_lookahead_size += num_bytes_to_process;
-            while (pSrc != pSrc_end)
-            {
-                mz_uint8 c         = *pSrc++;
-                d->m_dict[dst_pos] = c;
-                if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-                    d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
-                hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
-                d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
-                d->m_hash[hash]                              = (mz_uint16) (ins_pos);
-                dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
-                ins_pos++;
-            }
-        }
-        else
-        {
-            while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
-            {
-                mz_uint8 c = *pSrc++;
-                mz_uint  dst_pos =
-                  (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
-                src_buf_left--;
-                d->m_dict[dst_pos] = c;
-                if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
-                    d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
-                if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN)
-                {
-                    mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
-                    mz_uint hash =
-                      ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2))
-                       ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT)
-                       ^ c)
-                      & (TDEFL_LZ_HASH_SIZE - 1);
-                    d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
-                    d->m_hash[hash]                              = (mz_uint16) (ins_pos);
-                }
-            }
-        }
-        d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size);
-        if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
-            break;
-
-        /* Simple lazy/greedy parsing state machine. */
-        len_to_move    = 1;
-        cur_match_dist = 0;
-        cur_match_len  = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1);
-        cur_pos        = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
-        if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS))
-        {
-            if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))
-            {
-                mz_uint8 c    = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK];
-                cur_match_len = 0;
-                while (cur_match_len < d->m_lookahead_size)
-                {
-                    if (d->m_dict[cur_pos + cur_match_len] != c)
-                        break;
-                    cur_match_len++;
-                }
-                if (cur_match_len < TDEFL_MIN_MATCH_LEN)
-                    cur_match_len = 0;
-                else
-                    cur_match_dist = 1;
-            }
-        }
-        else
-        {
-            tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size,
-                             &cur_match_dist, &cur_match_len);
-        }
-        if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U))
-            || (cur_pos == cur_match_dist)
-            || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5)))
-        {
-            cur_match_dist = cur_match_len = 0;
-        }
-        if (d->m_saved_match_len)
-        {
-            if (cur_match_len > d->m_saved_match_len)
-            {
-                tdefl_record_literal(d, (mz_uint8) d->m_saved_lit);
-                if (cur_match_len >= 128)
-                {
-                    tdefl_record_match(d, cur_match_len, cur_match_dist);
-                    d->m_saved_match_len = 0;
-                    len_to_move          = cur_match_len;
-                }
-                else
-                {
-                    d->m_saved_lit        = d->m_dict[cur_pos];
-                    d->m_saved_match_dist = cur_match_dist;
-                    d->m_saved_match_len  = cur_match_len;
-                }
-            }
-            else
-            {
-                tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
-                len_to_move          = d->m_saved_match_len - 1;
-                d->m_saved_match_len = 0;
-            }
-        }
-        else if (!cur_match_dist)
-            tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
-        else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES)
-                 || (cur_match_len >= 128))
-        {
-            tdefl_record_match(d, cur_match_len, cur_match_dist);
-            len_to_move = cur_match_len;
-        }
-        else
-        {
-            d->m_saved_lit        = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)];
-            d->m_saved_match_dist = cur_match_dist;
-            d->m_saved_match_len  = cur_match_len;
-        }
-        /* Move the lookahead forward by len_to_move bytes. */
-        d->m_lookahead_pos += len_to_move;
-        MZ_ASSERT(d->m_lookahead_size >= len_to_move);
-        d->m_lookahead_size -= len_to_move;
-        d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint) TDEFL_LZ_DICT_SIZE);
-        /* Check if it's time to flush the current LZ codes to the internal output
-     * buffer. */
-        if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
-            || ((d->m_total_lz_bytes > 31 * 1024)
-                && (((((mz_uint) (d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7)
-                     >= d->m_total_lz_bytes)
-                    || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))))
-        {
-            int n;
-            d->m_pSrc         = pSrc;
-            d->m_src_buf_left = src_buf_left;
-            if ((n = tdefl_flush_block(d, 0)) != 0)
-                return (n < 0) ? MZ_FALSE : MZ_TRUE;
-        }
-    }
-
-    d->m_pSrc         = pSrc;
-    d->m_src_buf_left = src_buf_left;
-    return MZ_TRUE;
-}
-
-static tdefl_status tdefl_flush_output_buffer(tdefl_compressor* d) {
-    if (d->m_pIn_buf_size)
-    {
-        *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8*) d->m_pIn_buf;
-    }
-
-    if (d->m_pOut_buf_size)
-    {
-        size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining);
-        memcpy((mz_uint8*) d->m_pOut_buf + d->m_out_buf_ofs,
-               d->m_output_buf + d->m_output_flush_ofs, n);
-        d->m_output_flush_ofs += (mz_uint) n;
-        d->m_output_flush_remaining -= (mz_uint) n;
-        d->m_out_buf_ofs += n;
-
-        *d->m_pOut_buf_size = d->m_out_buf_ofs;
-    }
-
-    return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY;
-}
-
-tdefl_status tdefl_compress(tdefl_compressor* d,
-                            const void*       pIn_buf,
-                            size_t*           pIn_buf_size,
-                            void*             pOut_buf,
-                            size_t*           pOut_buf_size,
-                            tdefl_flush       flush) {
-    if (!d)
-    {
-        if (pIn_buf_size)
-            *pIn_buf_size = 0;
-        if (pOut_buf_size)
-            *pOut_buf_size = 0;
-        return TDEFL_STATUS_BAD_PARAM;
-    }
-
-    d->m_pIn_buf       = pIn_buf;
-    d->m_pIn_buf_size  = pIn_buf_size;
-    d->m_pOut_buf      = pOut_buf;
-    d->m_pOut_buf_size = pOut_buf_size;
-    d->m_pSrc          = (const mz_uint8*) (pIn_buf);
-    d->m_src_buf_left  = pIn_buf_size ? *pIn_buf_size : 0;
-    d->m_out_buf_ofs   = 0;
-    d->m_flush         = flush;
-
-    if (((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL)))
-        || (d->m_prev_return_status != TDEFL_STATUS_OKAY)
-        || (d->m_wants_to_finish && (flush != TDEFL_FINISH))
-        || (pIn_buf_size && *pIn_buf_size && !pIn_buf)
-        || (pOut_buf_size && *pOut_buf_size && !pOut_buf))
-    {
-        if (pIn_buf_size)
-            *pIn_buf_size = 0;
-        if (pOut_buf_size)
-            *pOut_buf_size = 0;
-        return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM);
-    }
-    d->m_wants_to_finish |= (flush == TDEFL_FINISH);
-
-    if ((d->m_output_flush_remaining) || (d->m_finished))
-        return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
-
-        #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
-    if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1)
-        && ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0)
-        && ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES))
-            == 0))
-    {
-        if (!tdefl_compress_fast(d))
-            return d->m_prev_return_status;
-    }
-    else
-        #endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */
-    {
-        if (!tdefl_compress_normal(d))
-            return d->m_prev_return_status;
-    }
-
-    if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf))
-        d->m_adler32 = (mz_uint32) mz_adler32(d->m_adler32, (const mz_uint8*) pIn_buf,
-                                              d->m_pSrc - (const mz_uint8*) pIn_buf);
-
-    if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining))
-    {
-        if (tdefl_flush_block(d, flush) < 0)
-            return d->m_prev_return_status;
-        d->m_finished = (flush == TDEFL_FINISH);
-        if (flush == TDEFL_FULL_FLUSH)
-        {
-            MZ_CLEAR_ARR(d->m_hash);
-            MZ_CLEAR_ARR(d->m_next);
-            d->m_dict_size = 0;
-        }
-    }
-
-    return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
-}
-
-tdefl_status tdefl_compress_buffer(tdefl_compressor* d,
-                                   const void*       pIn_buf,
-                                   size_t            in_buf_size,
-                                   tdefl_flush       flush) {
-    MZ_ASSERT(d->m_pPut_buf_func);
-    return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush);
-}
-
-tdefl_status tdefl_init(tdefl_compressor*      d,
-                        tdefl_put_buf_func_ptr pPut_buf_func,
-                        void*                  pPut_buf_user,
-                        int                    flags) {
-    d->m_pPut_buf_func  = pPut_buf_func;
-    d->m_pPut_buf_user  = pPut_buf_user;
-    d->m_flags          = (mz_uint) (flags);
-    d->m_max_probes[0]  = 1 + ((flags & 0xFFF) + 2) / 3;
-    d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0;
-    d->m_max_probes[1]  = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
-    if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG))
-        MZ_CLEAR_ARR(d->m_hash);
-    d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes =
-      d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
-    d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index =
-      d->m_bit_buffer = d->m_wants_to_finish = 0;
-    d->m_pLZ_code_buf                        = d->m_lz_code_buf + 1;
-    d->m_pLZ_flags                           = d->m_lz_code_buf;
-    *d->m_pLZ_flags                          = 0;
-    d->m_num_flags_left                      = 8;
-    d->m_pOutput_buf                         = d->m_output_buf;
-    d->m_pOutput_buf_end                     = d->m_output_buf;
-    d->m_prev_return_status                  = TDEFL_STATUS_OKAY;
-    d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0;
-    d->m_adler32                                                  = 1;
-    d->m_pIn_buf                                                  = NULL;
-    d->m_pOut_buf                                                 = NULL;
-    d->m_pIn_buf_size                                             = NULL;
-    d->m_pOut_buf_size                                            = NULL;
-    d->m_flush                                                    = TDEFL_NO_FLUSH;
-    d->m_pSrc                                                     = NULL;
-    d->m_src_buf_left                                             = 0;
-    d->m_out_buf_ofs                                              = 0;
-    if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG))
-        MZ_CLEAR_ARR(d->m_dict);
-    memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
-    memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
-    return TDEFL_STATUS_OKAY;
-}
-
-tdefl_status tdefl_get_prev_return_status(tdefl_compressor* d) { return d->m_prev_return_status; }
-
-mz_uint32 tdefl_get_adler32(tdefl_compressor* d) { return d->m_adler32; }
-
-mz_bool tdefl_compress_mem_to_output(const void*            pBuf,
-                                     size_t                 buf_len,
-                                     tdefl_put_buf_func_ptr pPut_buf_func,
-                                     void*                  pPut_buf_user,
-                                     int                    flags) {
-    tdefl_compressor* pComp;
-    mz_bool           succeeded;
-    if (((buf_len) && (!pBuf)) || (!pPut_buf_func))
-        return MZ_FALSE;
-    pComp = (tdefl_compressor*) MZ_MALLOC(sizeof(tdefl_compressor));
-    if (!pComp)
-        return MZ_FALSE;
-    succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY);
-    succeeded =
-      succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE);
-    MZ_FREE(pComp);
-    return succeeded;
-}
-
-typedef struct {
-    size_t    m_size, m_capacity;
-    mz_uint8* m_pBuf;
-    mz_bool   m_expandable;
-} tdefl_output_buffer;
-
-static mz_bool tdefl_output_buffer_putter(const void* pBuf, int len, void* pUser) {
-    tdefl_output_buffer* p        = (tdefl_output_buffer*) pUser;
-    size_t               new_size = p->m_size + len;
-    if (new_size > p->m_capacity)
-    {
-        size_t    new_capacity = p->m_capacity;
-        mz_uint8* pNew_buf;
-        if (!p->m_expandable)
-            return MZ_FALSE;
-        do
-        {
-            new_capacity = MZ_MAX(128U, new_capacity << 1U);
-        } while (new_size > new_capacity);
-        pNew_buf = (mz_uint8*) MZ_REALLOC(p->m_pBuf, new_capacity);
-        if (!pNew_buf)
-            return MZ_FALSE;
-        p->m_pBuf     = pNew_buf;
-        p->m_capacity = new_capacity;
-    }
-    memcpy((mz_uint8*) p->m_pBuf + p->m_size, pBuf, len);
-    p->m_size = new_size;
-    return MZ_TRUE;
-}
-
-void* tdefl_compress_mem_to_heap(const void* pSrc_buf,
-                                 size_t      src_buf_len,
-                                 size_t*     pOut_len,
-                                 int         flags) {
-    tdefl_output_buffer out_buf;
-    MZ_CLEAR_OBJ(out_buf);
-    if (!pOut_len)
-        return MZ_FALSE;
-    else
-        *pOut_len = 0;
-    out_buf.m_expandable = MZ_TRUE;
-    if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf,
-                                      flags))
-        return NULL;
-    *pOut_len = out_buf.m_size;
-    return out_buf.m_pBuf;
-}
-
-size_t tdefl_compress_mem_to_mem(
-  void* pOut_buf, size_t out_buf_len, const void* pSrc_buf, size_t src_buf_len, int flags) {
-    tdefl_output_buffer out_buf;
-    MZ_CLEAR_OBJ(out_buf);
-    if (!pOut_buf)
-        return 0;
-    out_buf.m_pBuf     = (mz_uint8*) pOut_buf;
-    out_buf.m_capacity = out_buf_len;
-    if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf,
-                                      flags))
-        return 0;
-    return out_buf.m_size;
-}
-
-/* level may actually range from [0,10] (10 is a "hidden" max level, where we
- * want a bit more compression and it's fine if throughput to fall off a cliff
- * on some files). */
-mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) {
-    mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL]
-                       | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0);
-    if (window_bits > 0)
-        comp_flags |= TDEFL_WRITE_ZLIB_HEADER;
-
-    if (!level)
-        comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
-    else if (strategy == MZ_FILTERED)
-        comp_flags |= TDEFL_FILTER_MATCHES;
-    else if (strategy == MZ_HUFFMAN_ONLY)
-        comp_flags &= ~TDEFL_MAX_PROBES_MASK;
-    else if (strategy == MZ_FIXED)
-        comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
-    else if (strategy == MZ_RLE)
-        comp_flags |= TDEFL_RLE_MATCHES;
-
-    return comp_flags;
-}
-
-        #ifdef _MSC_VER
-            #pragma warning(push)
-            #pragma warning(disable: 4204) /* nonstandard extension used : non-constant   \
-                                   aggregate initializer (also supported by    \
-                                   GNU C and C99, so no big deal) */
-        #endif
-
-/* Simple PNG writer function by Alex Evans, 2011. Released into the public
- domain: https://gist.github.com/908299, more context at
- http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
- This is actually a modification of Alex's original code so PNG files generated
- by this function pass pngcheck. */
-void* tdefl_write_image_to_png_file_in_memory_ex(
-  const void* pImage, int w, int h, int num_chans, size_t* pLen_out, mz_uint level, mz_bool flip) {
-    /* Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was
-   * defined. */
-    static const mz_uint s_tdefl_png_num_probes[11] = {0,   1,   6,   32,  16,  32,
-                                                       128, 256, 512, 768, 1500};
-    tdefl_compressor*    pComp = (tdefl_compressor*) MZ_MALLOC(sizeof(tdefl_compressor));
-    tdefl_output_buffer  out_buf;
-    int                  i, bpl = w * num_chans, y, z;
-    mz_uint32            c;
-    *pLen_out = 0;
-    if (!pComp)
-        return NULL;
-    MZ_CLEAR_OBJ(out_buf);
-    out_buf.m_expandable = MZ_TRUE;
-    out_buf.m_capacity   = 57 + MZ_MAX(64, (1 + bpl) * h);
-    if (NULL == (out_buf.m_pBuf = (mz_uint8*) MZ_MALLOC(out_buf.m_capacity)))
-    {
-        MZ_FREE(pComp);
-        return NULL;
-    }
-    /* write dummy header */
-    for (z = 41; z; --z)
-        tdefl_output_buffer_putter(&z, 1, &out_buf);
-    /* compress image data */
-    tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf,
-               s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER);
-    for (y = 0; y < h; ++y)
-    {
-        tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH);
-        tdefl_compress_buffer(pComp, (const mz_uint8*) pImage + (flip ? (h - 1 - y) : y) * bpl, bpl,
-                              TDEFL_NO_FLUSH);
-    }
-    if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE)
-    {
-        MZ_FREE(pComp);
-        MZ_FREE(out_buf.m_pBuf);
-        return NULL;
-    }
-    /* write real header */
-    *pLen_out = out_buf.m_size - 41;
-    {
-        static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06};
-        mz_uint8 pnghdr[41] = {0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00,
-                               0x0d, 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-                               0x00, 0x00, 0x00, 0x00, 0x49, 0x44, 0x41, 0x54};
-        pnghdr[18]          = (mz_uint8) (w >> 8);
-        pnghdr[19]          = (mz_uint8) w;
-        pnghdr[22]          = (mz_uint8) (h >> 8);
-        pnghdr[23]          = (mz_uint8) h;
-        pnghdr[25]          = chans[num_chans];
-        pnghdr[33]          = (mz_uint8) (*pLen_out >> 24);
-        pnghdr[34]          = (mz_uint8) (*pLen_out >> 16);
-        pnghdr[35]          = (mz_uint8) (*pLen_out >> 8);
-        pnghdr[36]          = (mz_uint8) *pLen_out;
-        c                   = (mz_uint32) mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17);
-        for (i = 0; i < 4; ++i, c <<= 8)
-            ((mz_uint8*) (pnghdr + 29))[i] = (mz_uint8) (c >> 24);
-        memcpy(out_buf.m_pBuf, pnghdr, 41);
-    }
-    /* write footer (IDAT CRC-32, followed by IEND chunk) */
-    if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16,
-                                    &out_buf))
-    {
-        *pLen_out = 0;
-        MZ_FREE(pComp);
-        MZ_FREE(out_buf.m_pBuf);
-        return NULL;
-    }
-    c = (mz_uint32) mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4);
-    for (i = 0; i < 4; ++i, c <<= 8)
-        (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8) (c >> 24);
-    /* compute final size of file, grab compressed data buffer and return */
-    *pLen_out += 57;
-    MZ_FREE(pComp);
-    return out_buf.m_pBuf;
-}
-void* tdefl_write_image_to_png_file_in_memory(
-  const void* pImage, int w, int h, int num_chans, size_t* pLen_out) {
-    /* Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we
-   * can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's
-   * where #defined out) */
-    return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6,
-                                                      MZ_FALSE);
-}
-
-        #ifndef MINIZ_NO_MALLOC
-/* Allocate the tdefl_compressor and tinfl_decompressor structures in C so that
- */
-/* non-C language bindings to tdefL_ and tinfl_ API don't need to worry about */
-/* structure size and allocation mechanism. */
-tdefl_compressor* tdefl_compressor_alloc(void) {
-    return (tdefl_compressor*) MZ_MALLOC(sizeof(tdefl_compressor));
-}
-
-void tdefl_compressor_free(tdefl_compressor* pComp) { MZ_FREE(pComp); }
-        #endif
-
-        #ifdef _MSC_VER
-            #pragma warning(pop)
-        #endif
-
-        #ifdef __cplusplus
-}
-        #endif
-
-    #endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/
-/**************************************************************************
- *
- * Copyright 2013-2014 RAD Game Tools and Valve Software
- * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- **************************************************************************/
-
-    #ifndef MINIZ_NO_INFLATE_APIS
-
-        #ifdef __cplusplus
-extern "C" {
-        #endif
-
-    /* ------------------- Low-level Decompression (completely independent from all
- * compression API's) */
-
-        #define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
-        #define TINFL_MEMSET(p, c, l) memset(p, c, l)
-
-        #define TINFL_CR_BEGIN \
-            switch (r->m_state) \
-            { \
-            case 0 :
-        #define TINFL_CR_RETURN(state_index, result) \
-            do \
-            { \
-                status     = result; \
-                r->m_state = state_index; \
-                goto common_exit; \
-            case state_index :; \
-            } \
-            MZ_MACRO_END
-        #define TINFL_CR_RETURN_FOREVER(state_index, result) \
-            do \
-            { \
-                for (;;) \
-                { \
-                    TINFL_CR_RETURN(state_index, result); \
-                } \
-            } \
-            MZ_MACRO_END
-        #define TINFL_CR_FINISH }
-
-        #define TINFL_GET_BYTE(state_index, c) \
-            do \
-            { \
-                while (pIn_buf_cur >= pIn_buf_end) \
-                { \
-                    TINFL_CR_RETURN(state_index, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) \
-                                                   ? TINFL_STATUS_NEEDS_MORE_INPUT \
-                                                   : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); \
-                } \
-                c = *pIn_buf_cur++; \
-            } \
-            MZ_MACRO_END
-
-        #define TINFL_NEED_BITS(state_index, n) \
-            do \
-            { \
-                mz_uint c; \
-                TINFL_GET_BYTE(state_index, c); \
-                bit_buf |= (((tinfl_bit_buf_t) c) << num_bits); \
-                num_bits += 8; \
-            } while (num_bits < (mz_uint) (n))
-        #define TINFL_SKIP_BITS(state_index, n) \
-            do \
-            { \
-                if (num_bits < (mz_uint) (n)) \
-                { \
-                    TINFL_NEED_BITS(state_index, n); \
-                } \
-                bit_buf >>= (n); \
-                num_bits -= (n); \
-            } \
-            MZ_MACRO_END
-        #define TINFL_GET_BITS(state_index, b, n) \
-            do \
-            { \
-                if (num_bits < (mz_uint) (n)) \
-                { \
-                    TINFL_NEED_BITS(state_index, n); \
-                } \
-                b = bit_buf & ((1 << (n)) - 1); \
-                bit_buf >>= (n); \
-                num_bits -= (n); \
-            } \
-            MZ_MACRO_END
-
-        /* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes
- * remaining in the input buffer falls below 2. */
-        /* It reads just enough bytes from the input stream that are needed to decode
- * the next Huffman code (and absolutely no more). It works by trying to fully
- * decode a */
-        /* Huffman code by using whatever bits are currently present in the bit buffer.
- * If this fails, it reads another byte, and tries again until it succeeds or
- * until the */
-        /* bit buffer contains >=15 bits (deflate's max. Huffman code size). */
-        #define TINFL_HUFF_BITBUF_FILL(state_index, pLookUp, pTree) \
-            do \
-            { \
-                temp = pLookUp[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \
-                if (temp >= 0) \
-                { \
-                    code_len = temp >> 9; \
-                    if ((code_len) && (num_bits >= code_len)) \
-                        break; \
-                } \
-                else if (num_bits > TINFL_FAST_LOOKUP_BITS) \
-                { \
-                    code_len = TINFL_FAST_LOOKUP_BITS; \
-                    do \
-                    { \
-                        temp = pTree[~temp + ((bit_buf >> code_len++) & 1)]; \
-                    } while ((temp < 0) && (num_bits >= (code_len + 1))); \
-                    if (temp >= 0) \
-                        break; \
-                } \
-                TINFL_GET_BYTE(state_index, c); \
-                bit_buf |= (((tinfl_bit_buf_t) c) << num_bits); \
-                num_bits += 8; \
-            } while (num_bits < 15);
-
-        /* TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex
- * than you would initially expect because the zlib API expects the decompressor
- * to never read */
-        /* beyond the final byte of the deflate stream. (In other words, when this macro
- * wants to read another byte from the input, it REALLY needs another byte in
- * order to fully */
-        /* decode the next Huffman code.) Handling this properly is particularly
- * important on raw deflate (non-zlib) streams, which aren't followed by a byte
- * aligned adler-32. */
-        /* The slow path is only executed at the very end of the input buffer. */
-        /* v1.16: The original macro handled the case at the very end of the passed-in
- * input buffer, but we also need to handle the case where the user passes in
- * 1+zillion bytes */
-        /* following the deflate data and our non-conservative read-ahead path won't
- * kick in here on this code. This is much trickier. */
-        #define TINFL_HUFF_DECODE(state_index, sym, pLookUp, pTree) \
-            do \
-            { \
-                int     temp; \
-                mz_uint code_len, c; \
-                if (num_bits < 15) \
-                { \
-                    if ((pIn_buf_end - pIn_buf_cur) < 2) \
-                    { \
-                        TINFL_HUFF_BITBUF_FILL(state_index, pLookUp, pTree); \
-                    } \
-                    else \
-                    { \
-                        bit_buf |= (((tinfl_bit_buf_t) pIn_buf_cur[0]) << num_bits) \
-                                 | (((tinfl_bit_buf_t) pIn_buf_cur[1]) << (num_bits + 8)); \
-                        pIn_buf_cur += 2; \
-                        num_bits += 16; \
-                    } \
-                } \
-                if ((temp = pLookUp[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \
-                    code_len = temp >> 9, temp &= 511; \
-                else \
-                { \
-                    code_len = TINFL_FAST_LOOKUP_BITS; \
-                    do \
-                    { \
-                        temp = pTree[~temp + ((bit_buf >> code_len++) & 1)]; \
-                    } while (temp < 0); \
-                } \
-                sym = temp; \
-                bit_buf >>= code_len; \
-                num_bits -= code_len; \
-            } \
-            MZ_MACRO_END
-
-static void tinfl_clear_tree(tinfl_decompressor* r) {
-    if (r->m_type == 0)
-        MZ_CLEAR_ARR(r->m_tree_0);
-    else if (r->m_type == 1)
-        MZ_CLEAR_ARR(r->m_tree_1);
-    else
-        MZ_CLEAR_ARR(r->m_tree_2);
-}
-
-tinfl_status tinfl_decompress(tinfl_decompressor* r,
-                              const mz_uint8*     pIn_buf_next,
-                              size_t*             pIn_buf_size,
-                              mz_uint8*           pOut_buf_start,
-                              mz_uint8*           pOut_buf_next,
-                              size_t*             pOut_buf_size,
-                              const mz_uint32     decomp_flags) {
-    static const mz_uint16 s_length_base[31]  = {3,  4,   5,   6,   7,   8,   9,   10, 11, 13, 15,
-                                                 17, 19,  23,  27,  31,  35,  43,  51, 59, 67, 83,
-                                                 99, 115, 131, 163, 195, 227, 258, 0,  0};
-    static const mz_uint8  s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
-                                                 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0};
-    static const mz_uint16 s_dist_base[32]    = {
-      1,   2,   3,   4,   5,    7,    9,    13,   17,   25,   33,   49,    65,    97,    129, 193,
-      257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0,   0};
-    static const mz_uint8  s_dist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2,  3,  3,  4,  4,  5,  5,  6,
-                                               6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
-    static const mz_uint8  s_length_dezigzag[19] = {16, 17, 18, 0, 8,  7, 9,  6, 10, 5,
-                                                    11, 4,  12, 3, 13, 2, 14, 1, 15};
-    static const mz_uint16 s_min_table_sizes[3]  = {257, 1, 4};
-
-    mz_int16* pTrees[3];
-    mz_uint8* pCode_sizes[3];
-
-    tinfl_status    status = TINFL_STATUS_FAILED;
-    mz_uint32       num_bits, dist, counter, num_extra;
-    tinfl_bit_buf_t bit_buf;
-    const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size;
-    mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end =
-                                              pOut_buf_next ? pOut_buf_next + *pOut_buf_size : NULL;
-    size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)
-                               ? (size_t) -1
-                               : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1,
-           dist_from_out_buf_start;
-
-    /* Ensure the output buffer's size is a power of 2, unless the output buffer
-   * is large enough to hold the entire output file (in which case it doesn't
-   * matter). */
-    if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start))
-    {
-        *pIn_buf_size = *pOut_buf_size = 0;
-        return TINFL_STATUS_BAD_PARAM;
-    }
-
-    pTrees[0]      = r->m_tree_0;
-    pTrees[1]      = r->m_tree_1;
-    pTrees[2]      = r->m_tree_2;
-    pCode_sizes[0] = r->m_code_size_0;
-    pCode_sizes[1] = r->m_code_size_1;
-    pCode_sizes[2] = r->m_code_size_2;
-
-    num_bits                = r->m_num_bits;
-    bit_buf                 = r->m_bit_buf;
-    dist                    = r->m_dist;
-    counter                 = r->m_counter;
-    num_extra               = r->m_num_extra;
-    dist_from_out_buf_start = r->m_dist_from_out_buf_start;
-    TINFL_CR_BEGIN
-
-    bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0;
-    r->m_z_adler32 = r->m_check_adler32 = 1;
-    if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
-    {
-        TINFL_GET_BYTE(1, r->m_zhdr0);
-        TINFL_GET_BYTE(2, r->m_zhdr1);
-        counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32)
-                   || ((r->m_zhdr0 & 15) != 8));
-        if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
-            counter |=
-              (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U)
-               || ((out_buf_size_mask + 1) < (size_t) ((size_t) 1 << (8U + (r->m_zhdr0 >> 4)))));
-        if (counter)
-        {
-            TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED);
-        }
-    }
-
-    do
-    {
-        TINFL_GET_BITS(3, r->m_final, 3);
-        r->m_type = r->m_final >> 1;
-        if (r->m_type == 0)
-        {
-            TINFL_SKIP_BITS(5, num_bits & 7);
-            for (counter = 0; counter < 4; ++counter)
-            {
-                if (num_bits)
-                    TINFL_GET_BITS(6, r->m_raw_header[counter], 8);
-                else
-                    TINFL_GET_BYTE(7, r->m_raw_header[counter]);
-            }
-            if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8)))
-                != (mz_uint) (0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8))))
-            {
-                TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED);
-            }
-            while ((counter) && (num_bits))
-            {
-                TINFL_GET_BITS(51, dist, 8);
-                while (pOut_buf_cur >= pOut_buf_end)
-                {
-                    TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT);
-                }
-                *pOut_buf_cur++ = (mz_uint8) dist;
-                counter--;
-            }
-            while (counter)
-            {
-                size_t n;
-                while (pOut_buf_cur >= pOut_buf_end)
-                {
-                    TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT);
-                }
-                while (pIn_buf_cur >= pIn_buf_end)
-                {
-                    TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT)
-                                          ? TINFL_STATUS_NEEDS_MORE_INPUT
-                                          : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS);
-                }
-                n = MZ_MIN(MZ_MIN((size_t) (pOut_buf_end - pOut_buf_cur),
-                                  (size_t) (pIn_buf_end - pIn_buf_cur)),
-                           counter);
-                TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n);
-                pIn_buf_cur += n;
-                pOut_buf_cur += n;
-                counter -= (mz_uint) n;
-            }
-        }
-        else if (r->m_type == 3)
-        {
-            TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED);
-        }
-        else
-        {
-            if (r->m_type == 1)
-            {
-                mz_uint8* p = r->m_code_size_0;
-                mz_uint   i;
-                r->m_table_sizes[0] = 288;
-                r->m_table_sizes[1] = 32;
-                TINFL_MEMSET(r->m_code_size_1, 5, 32);
-                for (i = 0; i <= 143; ++i)
-                    *p++ = 8;
-                for (; i <= 255; ++i)
-                    *p++ = 9;
-                for (; i <= 279; ++i)
-                    *p++ = 7;
-                for (; i <= 287; ++i)
-                    *p++ = 8;
-            }
-            else
-            {
-                for (counter = 0; counter < 3; counter++)
-                {
-                    TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]);
-                    r->m_table_sizes[counter] += s_min_table_sizes[counter];
-                }
-                MZ_CLEAR_ARR(r->m_code_size_2);
-                for (counter = 0; counter < r->m_table_sizes[2]; counter++)
-                {
-                    mz_uint s;
-                    TINFL_GET_BITS(14, s, 3);
-                    r->m_code_size_2[s_length_dezigzag[counter]] = (mz_uint8) s;
-                }
-                r->m_table_sizes[2] = 19;
-            }
-            for (; (int) r->m_type >= 0; r->m_type--)
-            {
-                int       tree_next, tree_cur;
-                mz_int16* pLookUp;
-                mz_int16* pTree;
-                mz_uint8* pCode_size;
-                mz_uint   i, j, used_syms, total, sym_index, next_code[17], total_syms[16];
-                pLookUp    = r->m_look_up[r->m_type];
-                pTree      = pTrees[r->m_type];
-                pCode_size = pCode_sizes[r->m_type];
-                MZ_CLEAR_ARR(total_syms);
-                TINFL_MEMSET(pLookUp, 0, sizeof(r->m_look_up[0]));
-                tinfl_clear_tree(r);
-                for (i = 0; i < r->m_table_sizes[r->m_type]; ++i)
-                    total_syms[pCode_size[i]]++;
-                used_syms = 0, total = 0;
-                next_code[0] = next_code[1] = 0;
-                for (i = 1; i <= 15; ++i)
-                {
-                    used_syms += total_syms[i];
-                    next_code[i + 1] = (total = ((total + total_syms[i]) << 1));
-                }
-                if ((65536 != total) && (used_syms > 1))
-                {
-                    TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED);
-                }
-                for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type];
-                     ++sym_index)
-                {
-                    mz_uint rev_code = 0, l, cur_code, code_size = pCode_size[sym_index];
-                    if (!code_size)
-                        continue;
-                    cur_code = next_code[code_size]++;
-                    for (l = code_size; l > 0; l--, cur_code >>= 1)
-                        rev_code = (rev_code << 1) | (cur_code & 1);
-                    if (code_size <= TINFL_FAST_LOOKUP_BITS)
-                    {
-                        mz_int16 k = (mz_int16) ((code_size << 9) | sym_index);
-                        while (rev_code < TINFL_FAST_LOOKUP_SIZE)
-                        {
-                            pLookUp[rev_code] = k;
-                            rev_code += (1 << code_size);
-                        }
-                        continue;
-                    }
-                    if (0 == (tree_cur = pLookUp[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)]))
-                    {
-                        pLookUp[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16) tree_next;
-                        tree_cur                                         = tree_next;
-                        tree_next -= 2;
-                    }
-                    rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1);
-                    for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--)
-                    {
-                        tree_cur -= ((rev_code >>= 1) & 1);
-                        if (!pTree[-tree_cur - 1])
-                        {
-                            pTree[-tree_cur - 1] = (mz_int16) tree_next;
-                            tree_cur             = tree_next;
-                            tree_next -= 2;
-                        }
-                        else
-                            tree_cur = pTree[-tree_cur - 1];
-                    }
-                    tree_cur -= ((rev_code >>= 1) & 1);
-                    pTree[-tree_cur - 1] = (mz_int16) sym_index;
-                }
-                if (r->m_type == 2)
-                {
-                    for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);)
-                    {
-                        mz_uint s;
-                        TINFL_HUFF_DECODE(16, dist, r->m_look_up[2], r->m_tree_2);
-                        if (dist < 16)
-                        {
-                            r->m_len_codes[counter++] = (mz_uint8) dist;
-                            continue;
-                        }
-                        if ((dist == 16) && (!counter))
-                        {
-                            TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED);
-                        }
-                        num_extra = "\02\03\07"[dist - 16];
-                        TINFL_GET_BITS(18, s, num_extra);
-                        s += "\03\03\013"[dist - 16];
-                        TINFL_MEMSET(r->m_len_codes + counter,
-                                     (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
-                        counter += s;
-                    }
-                    if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter)
-                    {
-                        TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
-                    }
-                    TINFL_MEMCPY(r->m_code_size_0, r->m_len_codes, r->m_table_sizes[0]);
-                    TINFL_MEMCPY(r->m_code_size_1, r->m_len_codes + r->m_table_sizes[0],
-                                 r->m_table_sizes[1]);
-                }
-            }
-            for (;;)
-            {
-                mz_uint8* pSrc;
-                for (;;)
-                {
-                    if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2))
-                    {
-                        TINFL_HUFF_DECODE(23, counter, r->m_look_up[0], r->m_tree_0);
-                        if (counter >= 256)
-                            break;
-                        while (pOut_buf_cur >= pOut_buf_end)
-                        {
-                            TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT);
-                        }
-                        *pOut_buf_cur++ = (mz_uint8) counter;
-                    }
-                    else
-                    {
-                        int     sym2;
-                        mz_uint code_len;
-        #if TINFL_USE_64BIT_BITBUF
-                        if (num_bits < 30)
-                        {
-                            bit_buf |= (((tinfl_bit_buf_t) MZ_READ_LE32(pIn_buf_cur)) << num_bits);
-                            pIn_buf_cur += 4;
-                            num_bits += 32;
-                        }
-        #else
-                        if (num_bits < 15)
-                        {
-                            bit_buf |= (((tinfl_bit_buf_t) MZ_READ_LE16(pIn_buf_cur)) << num_bits);
-                            pIn_buf_cur += 2;
-                            num_bits += 16;
-                        }
-        #endif
-                        if ((sym2 = r->m_look_up[0][bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
-                            code_len = sym2 >> 9;
-                        else
-                        {
-                            code_len = TINFL_FAST_LOOKUP_BITS;
-                            do
-                            {
-                                sym2 = r->m_tree_0[~sym2 + ((bit_buf >> code_len++) & 1)];
-                            } while (sym2 < 0);
-                        }
-                        counter = sym2;
-                        bit_buf >>= code_len;
-                        num_bits -= code_len;
-                        if (counter & 256)
-                            break;
-
-        #if !TINFL_USE_64BIT_BITBUF
-                        if (num_bits < 15)
-                        {
-                            bit_buf |= (((tinfl_bit_buf_t) MZ_READ_LE16(pIn_buf_cur)) << num_bits);
-                            pIn_buf_cur += 2;
-                            num_bits += 16;
-                        }
-        #endif
-                        if ((sym2 = r->m_look_up[0][bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
-                            code_len = sym2 >> 9;
-                        else
-                        {
-                            code_len = TINFL_FAST_LOOKUP_BITS;
-                            do
-                            {
-                                sym2 = r->m_tree_0[~sym2 + ((bit_buf >> code_len++) & 1)];
-                            } while (sym2 < 0);
-                        }
-                        bit_buf >>= code_len;
-                        num_bits -= code_len;
-
-                        pOut_buf_cur[0] = (mz_uint8) counter;
-                        if (sym2 & 256)
-                        {
-                            pOut_buf_cur++;
-                            counter = sym2;
-                            break;
-                        }
-                        pOut_buf_cur[1] = (mz_uint8) sym2;
-                        pOut_buf_cur += 2;
-                    }
-                }
-                if ((counter &= 511) == 256)
-                    break;
-
-                num_extra = s_length_extra[counter - 257];
-                counter   = s_length_base[counter - 257];
-                if (num_extra)
-                {
-                    mz_uint extra_bits;
-                    TINFL_GET_BITS(25, extra_bits, num_extra);
-                    counter += extra_bits;
-                }
-
-                TINFL_HUFF_DECODE(26, dist, r->m_look_up[1], r->m_tree_1);
-                num_extra = s_dist_extra[dist];
-                dist      = s_dist_base[dist];
-                if (num_extra)
-                {
-                    mz_uint extra_bits;
-                    TINFL_GET_BITS(27, extra_bits, num_extra);
-                    dist += extra_bits;
-                }
-
-                dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
-                if ((dist == 0 || dist > dist_from_out_buf_start || dist_from_out_buf_start == 0)
-                    && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
-                {
-                    TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED);
-                }
-
-                pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask);
-
-                if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end)
-                {
-                    while (counter--)
-                    {
-                        while (pOut_buf_cur >= pOut_buf_end)
-                        {
-                            TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT);
-                        }
-                        *pOut_buf_cur++ =
-                          pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask];
-                    }
-                    continue;
-                }
-        #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
-                else if ((counter >= 9) && (counter <= dist))
-                {
-                    const mz_uint8* pSrc_end = pSrc + (counter & ~7);
-                    do
-                    {
-            #ifdef MINIZ_UNALIGNED_USE_MEMCPY
-                        memcpy(pOut_buf_cur, pSrc, sizeof(mz_uint32) * 2);
-            #else
-                        ((mz_uint32*) pOut_buf_cur)[0] = ((const mz_uint32*) pSrc)[0];
-                        ((mz_uint32*) pOut_buf_cur)[1] = ((const mz_uint32*) pSrc)[1];
-            #endif
-                        pOut_buf_cur += 8;
-                    } while ((pSrc += 8) < pSrc_end);
-                    if ((counter &= 7) < 3)
-                    {
-                        if (counter)
-                        {
-                            pOut_buf_cur[0] = pSrc[0];
-                            if (counter > 1)
-                                pOut_buf_cur[1] = pSrc[1];
-                            pOut_buf_cur += counter;
-                        }
-                        continue;
-                    }
-                }
-        #endif
-                while (counter > 2)
-                {
-                    pOut_buf_cur[0] = pSrc[0];
-                    pOut_buf_cur[1] = pSrc[1];
-                    pOut_buf_cur[2] = pSrc[2];
-                    pOut_buf_cur += 3;
-                    pSrc += 3;
-                    counter -= 3;
-                }
-                if (counter > 0)
-                {
-                    pOut_buf_cur[0] = pSrc[0];
-                    if (counter > 1)
-                        pOut_buf_cur[1] = pSrc[1];
-                    pOut_buf_cur += counter;
-                }
-            }
-        }
-    } while (!(r->m_final & 1));
-
-    /* Ensure byte alignment and put back any bytes from the bitbuf if we've
-   * looked ahead too far on gzip, or other Deflate streams followed by
-   * arbitrary data. */
-    /* I'm being super conservative here. A number of simplifications can be made
-   * to the byte alignment part, and the Adler32 check shouldn't ever need to
-   * worry about reading from the bitbuf now. */
-    TINFL_SKIP_BITS(32, num_bits & 7);
-    while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8))
-    {
-        --pIn_buf_cur;
-        num_bits -= 8;
-    }
-    bit_buf &= ~(~(tinfl_bit_buf_t) 0 << num_bits);
-    MZ_ASSERT(!num_bits); /* if this assert fires then we've read beyond the end
-                           of non-deflate/zlib streams with following data (such
-                           as gzip streams). */
-
-    if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
-    {
-        for (counter = 0; counter < 4; ++counter)
-        {
-            mz_uint s;
-            if (num_bits)
-                TINFL_GET_BITS(41, s, 8);
-            else
-                TINFL_GET_BYTE(42, s);
-            r->m_z_adler32 = (r->m_z_adler32 << 8) | s;
-        }
-    }
-    TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE);
-
-    TINFL_CR_FINISH
-
-common_exit:
-    /* As long as we aren't telling the caller that we NEED more input to make
-   * forward progress: */
-    /* Put back any bytes from the bitbuf in case we've looked ahead too far on
-   * gzip, or other Deflate streams followed by arbitrary data. */
-    /* We need to be very careful here to NOT push back any bytes we definitely
-   * know we need to make forward progress, though, or we'll lock the caller up
-   * into an inf loop. */
-    if ((status != TINFL_STATUS_NEEDS_MORE_INPUT)
-        && (status != TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS))
-    {
-        while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8))
-        {
-            --pIn_buf_cur;
-            num_bits -= 8;
-        }
-    }
-    r->m_num_bits                = num_bits;
-    r->m_bit_buf                 = bit_buf & ~(~(tinfl_bit_buf_t) 0 << num_bits);
-    r->m_dist                    = dist;
-    r->m_counter                 = counter;
-    r->m_num_extra               = num_extra;
-    r->m_dist_from_out_buf_start = dist_from_out_buf_start;
-    *pIn_buf_size                = pIn_buf_cur - pIn_buf_next;
-    *pOut_buf_size               = pOut_buf_cur - pOut_buf_next;
-    if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32))
-        && (status >= 0))
-    {
-        const mz_uint8* ptr     = pOut_buf_next;
-        size_t          buf_len = *pOut_buf_size;
-        mz_uint32       i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16;
-        size_t          block_len = buf_len % 5552;
-        while (buf_len)
-        {
-            for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
-            {
-                s1 += ptr[0], s2 += s1;
-                s1 += ptr[1], s2 += s1;
-                s1 += ptr[2], s2 += s1;
-                s1 += ptr[3], s2 += s1;
-                s1 += ptr[4], s2 += s1;
-                s1 += ptr[5], s2 += s1;
-                s1 += ptr[6], s2 += s1;
-                s1 += ptr[7], s2 += s1;
-            }
-            for (; i < block_len; ++i)
-                s1 += *ptr++, s2 += s1;
-            s1 %= 65521U, s2 %= 65521U;
-            buf_len -= block_len;
-            block_len = 5552;
-        }
-        r->m_check_adler32 = (s2 << 16) + s1;
-        if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
-            && (r->m_check_adler32 != r->m_z_adler32))
-            status = TINFL_STATUS_ADLER32_MISMATCH;
-    }
-    return status;
-}
-
-/* Higher level helper functions. */
-void* tinfl_decompress_mem_to_heap(const void* pSrc_buf,
-                                   size_t      src_buf_len,
-                                   size_t*     pOut_len,
-                                   int         flags) {
-    tinfl_decompressor decomp;
-    void *             pBuf        = NULL, *pNew_buf;
-    size_t             src_buf_ofs = 0, out_buf_capacity = 0;
-    *pOut_len = 0;
-    tinfl_init(&decomp);
-    for (;;)
-    {
-        size_t src_buf_size = src_buf_len - src_buf_ofs,
-               dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
-        tinfl_status status = tinfl_decompress(
-          &decomp, (const mz_uint8*) pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*) pBuf,
-          pBuf ? (mz_uint8*) pBuf + *pOut_len : NULL, &dst_buf_size,
-          (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
-        if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT))
-        {
-            MZ_FREE(pBuf);
-            *pOut_len = 0;
-            return NULL;
-        }
-        src_buf_ofs += src_buf_size;
-        *pOut_len += dst_buf_size;
-        if (status == TINFL_STATUS_DONE)
-            break;
-        new_out_buf_capacity = out_buf_capacity * 2;
-        if (new_out_buf_capacity < 128)
-            new_out_buf_capacity = 128;
-        pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity);
-        if (!pNew_buf)
-        {
-            MZ_FREE(pBuf);
-            *pOut_len = 0;
-            return NULL;
-        }
-        pBuf             = pNew_buf;
-        out_buf_capacity = new_out_buf_capacity;
-    }
-    return pBuf;
-}
-
-size_t tinfl_decompress_mem_to_mem(
-  void* pOut_buf, size_t out_buf_len, const void* pSrc_buf, size_t src_buf_len, int flags) {
-    tinfl_decompressor decomp;
-    tinfl_status       status;
-    tinfl_init(&decomp);
-    status = tinfl_decompress(&decomp, (const mz_uint8*) pSrc_buf, &src_buf_len,
-                              (mz_uint8*) pOut_buf, (mz_uint8*) pOut_buf, &out_buf_len,
-                              (flags & ~TINFL_FLAG_HAS_MORE_INPUT)
-                                | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
-    return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len;
-}
-
-int tinfl_decompress_mem_to_callback(const void*            pIn_buf,
-                                     size_t*                pIn_buf_size,
-                                     tinfl_put_buf_func_ptr pPut_buf_func,
-                                     void*                  pPut_buf_user,
-                                     int                    flags) {
-    int                result = 0;
-    tinfl_decompressor decomp;
-    mz_uint8*          pDict      = (mz_uint8*) MZ_MALLOC(TINFL_LZ_DICT_SIZE);
-    size_t             in_buf_ofs = 0, dict_ofs = 0;
-    if (!pDict)
-        return TINFL_STATUS_FAILED;
-    memset(pDict, 0, TINFL_LZ_DICT_SIZE);
-    tinfl_init(&decomp);
-    for (;;)
-    {
-        size_t in_buf_size  = *pIn_buf_size - in_buf_ofs,
-               dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
-        tinfl_status status = tinfl_decompress(
-          &decomp, (const mz_uint8*) pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs,
-          &dst_buf_size,
-          (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)));
-        in_buf_ofs += in_buf_size;
-        if ((dst_buf_size)
-            && (!(*pPut_buf_func)(pDict + dict_ofs, (int) dst_buf_size, pPut_buf_user)))
-            break;
-        if (status != TINFL_STATUS_HAS_MORE_OUTPUT)
-        {
-            result = (status == TINFL_STATUS_DONE);
-            break;
-        }
-        dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1);
-    }
-    MZ_FREE(pDict);
-    *pIn_buf_size = in_buf_ofs;
-    return result;
-}
-
-        #ifndef MINIZ_NO_MALLOC
-tinfl_decompressor* tinfl_decompressor_alloc(void) {
-    tinfl_decompressor* pDecomp = (tinfl_decompressor*) MZ_MALLOC(sizeof(tinfl_decompressor));
-    if (pDecomp)
-        tinfl_init(pDecomp);
-    return pDecomp;
-}
-
-void tinfl_decompressor_free(tinfl_decompressor* pDecomp) { MZ_FREE(pDecomp); }
-        #endif
-
-        #ifdef __cplusplus
-}
-        #endif
-
-    #endif /*#ifndef MINIZ_NO_INFLATE_APIS*/
-/**************************************************************************
- *
- * Copyright 2013-2014 RAD Game Tools and Valve Software
- * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
- * Copyright 2016 Martin Raiber
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- **************************************************************************/
-
-    #ifndef MINIZ_NO_ARCHIVE_APIS
-
-        #ifdef __cplusplus
-extern "C" {
-        #endif
-
-    /* ------------------- .ZIP archive reading */
-
-        #ifdef MINIZ_NO_STDIO
-            #define MZ_FILE void*
-        #else
-            #include <sys/stat.h>
-
-            #if defined(_MSC_VER) || defined(__MINGW64__) || defined(__MINGW32__)
-
-                #ifndef WIN32_LEAN_AND_MEAN
-                    #define WIN32_LEAN_AND_MEAN
-                #endif
-                #ifndef __cplusplus
-                    #define MICROSOFT_WINDOWS_WINBASE_H_DEFINE_INTERLOCKED_CPLUSPLUS_OVERLOADS 0
-                #endif
-                #ifndef NOMINMAX
-                    #define NOMINMAX
-                #endif
-                #include <windows.h>
-
-static WCHAR* mz_utf8z_to_widechar(const char* str) {
-    int    reqChars = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
-    WCHAR* wStr     = (WCHAR*) malloc(reqChars * sizeof(WCHAR));
-    MultiByteToWideChar(CP_UTF8, 0, str, -1, wStr, reqChars);
-    return wStr;
-}
-
-static FILE* mz_fopen(const char* pFilename, const char* pMode) {
-    WCHAR* wFilename = mz_utf8z_to_widechar(pFilename);
-    WCHAR* wMode     = mz_utf8z_to_widechar(pMode);
-    FILE*  pFile     = NULL;
-                #ifdef ZIP_ENABLE_SHARABLE_FILE_OPEN
-    pFile = _wfopen(wFilename, wMode);
-                #else
-    errno_t err = _wfopen_s(&pFile, wFilename, wMode);
-                #endif
-    free(wFilename);
-    free(wMode);
-                #ifdef ZIP_ENABLE_SHARABLE_FILE_OPEN
-    return pFile;
-                #else
-    return err ? NULL : pFile;
-                #endif
-}
-
-static FILE* mz_freopen(const char* pPath, const char* pMode, FILE* pStream) {
-    WCHAR* wPath = mz_utf8z_to_widechar(pPath);
-    WCHAR* wMode = mz_utf8z_to_widechar(pMode);
-    FILE*  pFile = NULL;
-                #ifdef ZIP_ENABLE_SHARABLE_FILE_OPEN
-    pFile = _wfreopen(wPath, wMode, pStream);
-                #else
-    errno_t err = _wfreopen_s(&pFile, wPath, wMode, pStream);
-                #endif
-    free(wPath);
-    free(wMode);
-                #ifdef ZIP_ENABLE_SHARABLE_FILE_OPEN
-    return pFile;
-                #else
-    return err ? NULL : pFile;
-                #endif
-}
-
-                #if defined(__MINGW32__)
-static int mz_stat(const char* path, struct _stat* buffer) {
-    WCHAR* wPath = mz_utf8z_to_widechar(path);
-    int    res   = _wstat(wPath, buffer);
-    free(wPath);
-    return res;
-}
-                #else
-static int mz_stat64(const char* path, struct __stat64* buffer) {
-    WCHAR* wPath = mz_utf8z_to_widechar(path);
-    int    res   = _wstat64(wPath, buffer);
-    free(wPath);
-    return res;
-}
-                #endif
-
-static int mz_mkdir(const char* pDirname) {
-    WCHAR* wDirname = mz_utf8z_to_widechar(pDirname);
-    int    res      = _wmkdir(wDirname);
-    free(wDirname);
-    return res;
-}
-
-                #ifndef MINIZ_NO_TIME
-                    #include <sys/utime.h>
-                #endif
-                #define MZ_FOPEN mz_fopen
-                #define MZ_FCLOSE fclose
-                #define MZ_FREAD fread
-                #define MZ_FWRITE fwrite
-                #define MZ_FTELL64 _ftelli64
-                #define MZ_FSEEK64 _fseeki64
-                #if defined(__MINGW32__)
-                    #define MZ_FILE_STAT_STRUCT _stat
-                    #define MZ_FILE_STAT mz_stat
-                #else
-                    #define MZ_FILE_STAT_STRUCT _stat64
-                    #define MZ_FILE_STAT mz_stat64
-                #endif
-                #define MZ_FFLUSH fflush
-                #define MZ_FREOPEN mz_freopen
-                #define MZ_DELETE_FILE remove
-                #define MZ_MKDIR(d) mz_mkdir(d)
-
-            #elif defined(__MINGW32__) || defined(__WATCOMC__)
-                #ifndef MINIZ_NO_TIME
-                    #include <sys/utime.h>
-                #endif
-                #define MZ_FOPEN(f, m) fopen(f, m)
-                #define MZ_FCLOSE fclose
-                #define MZ_FREAD fread
-                #define MZ_FWRITE fwrite
-                #define MZ_FTELL64 _ftelli64
-                #define MZ_FSEEK64 _fseeki64
-                #define MZ_FILE_STAT_STRUCT stat
-                #define MZ_FILE_STAT stat
-                #define MZ_FFLUSH fflush
-                #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-                #define MZ_DELETE_FILE remove
-                #define MZ_MKDIR(d) _mkdir(d)
-
-            #elif defined(__TINYC__)
-                #ifndef MINIZ_NO_TIME
-                    #include <sys/utime.h>
-                #endif
-                #define MZ_FOPEN(f, m) fopen(f, m)
-                #define MZ_FCLOSE fclose
-                #define MZ_FREAD fread
-                #define MZ_FWRITE fwrite
-                #define MZ_FTELL64 ftell
-                #define MZ_FSEEK64 fseek
-                #define MZ_FILE_STAT_STRUCT stat
-                #define MZ_FILE_STAT stat
-                #define MZ_FFLUSH fflush
-                #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-                #define MZ_DELETE_FILE remove
-                #if defined(_WIN32) || defined(_WIN64)
-                    #define MZ_MKDIR(d) _mkdir(d)
-                #else
-                    #define MZ_MKDIR(d) mkdir(d, 0755)
-                #endif
-
-            #elif defined(__USE_LARGEFILE64) /* gcc, clang */
-                #ifndef MINIZ_NO_TIME
-                    #include <utime.h>
-                #endif
-                #define MZ_FOPEN(f, m) fopen64(f, m)
-                #define MZ_FCLOSE fclose
-                #define MZ_FREAD fread
-                #define MZ_FWRITE fwrite
-                #define MZ_FTELL64 ftello64
-                #define MZ_FSEEK64 fseeko64
-                #define MZ_FILE_STAT_STRUCT stat64
-                #define MZ_FILE_STAT stat64
-                #define MZ_FFLUSH fflush
-                #define MZ_FREOPEN(p, m, s) freopen64(p, m, s)
-                #define MZ_DELETE_FILE remove
-                #define MZ_MKDIR(d) mkdir(d, 0755)
-
-            #elif defined(__APPLE__) || defined(__FreeBSD__) \
-              || (defined(__linux__) && defined(__x86_64__))
-                #ifndef MINIZ_NO_TIME
-                    #include <utime.h>
-                #endif
-                #define MZ_FOPEN(f, m) fopen(f, m)
-                #define MZ_FCLOSE fclose
-                #define MZ_FREAD fread
-                #define MZ_FWRITE fwrite
-                #define MZ_FTELL64 ftello
-                #define MZ_FSEEK64 fseeko
-                #define MZ_FILE_STAT_STRUCT stat
-                #define MZ_FILE_STAT stat
-                #define MZ_FFLUSH fflush
-                #define MZ_FREOPEN(p, m, s) freopen(p, m, s)
-                #define MZ_DELETE_FILE remove
-                #define MZ_MKDIR(d) mkdir(d, 0755)
-
-            #else
-                #pragma message( \
-                  "Using fopen, ftello, fseeko, stat() etc. path for file I/O - this path may not support large files.")
-                #ifndef MINIZ_NO_TIME
-                    #include <utime.h>
-                #endif
-                #define MZ_FOPEN(f, m) fopen(f, m)
-                #define MZ_FCLOSE fclose
-                #define MZ_FREAD fread
-                #define MZ_FWRITE fwrite
-                #ifdef __STRICT_ANSI__
-                    #define MZ_FTELL64 ftell
-                    #define MZ_FSEEK64 fseek
-                #else
-                    #define MZ_FTELL64 ftello
-                    #define MZ_FSEEK64 fseeko
-                #endif
-                #define MZ_FILE_STAT_STRUCT stat
-                #define MZ_FILE_STAT stat
-                #define MZ_FFLUSH fflush
-                #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
-                #define MZ_DELETE_FILE remove
-                #define MZ_MKDIR(d) mkdir(d, 0755)
-            #endif /* #ifdef _MSC_VER */
-        #endif     /* #ifdef MINIZ_NO_STDIO */
-
-        #ifndef CHMOD
-            // Upon successful completion, a value of 0 is returned.
-            // Otherwise, a value of -1 is returned and errno is set to indicate the error.
-            // int chmod(const char *path, mode_t mode);
-            #define CHMOD(f, m) chmod(f, m)
-        #endif
-
-        #define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c))
-
-/* Various ZIP archive enums. To completely avoid cross platform compiler
- * alignment and platform endian issues, miniz.c doesn't use structs for any of
- * this stuff. */
-enum {
-    /* ZIP archive identifiers and record sizes */
-    MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG  = 0x06054b50,
-    MZ_ZIP_CENTRAL_DIR_HEADER_SIG         = 0x02014b50,
-    MZ_ZIP_LOCAL_DIR_HEADER_SIG           = 0x04034b50,
-    MZ_ZIP_LOCAL_DIR_HEADER_SIZE          = 30,
-    MZ_ZIP_CENTRAL_DIR_HEADER_SIZE        = 46,
-    MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22,
-
-    /* ZIP64 archive identifier and record sizes */
-    MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG        = 0x06064b50,
-    MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG       = 0x07064b50,
-    MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE       = 56,
-    MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE      = 20,
-    MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID = 0x0001,
-    MZ_ZIP_DATA_DESCRIPTOR_ID                     = 0x08074b50,
-    MZ_ZIP_DATA_DESCRIPTER_SIZE64                 = 24,
-    MZ_ZIP_DATA_DESCRIPTER_SIZE32                 = 16,
-
-    /* Central directory header record offsets */
-    MZ_ZIP_CDH_SIG_OFS               = 0,
-    MZ_ZIP_CDH_VERSION_MADE_BY_OFS   = 4,
-    MZ_ZIP_CDH_VERSION_NEEDED_OFS    = 6,
-    MZ_ZIP_CDH_BIT_FLAG_OFS          = 8,
-    MZ_ZIP_CDH_METHOD_OFS            = 10,
-    MZ_ZIP_CDH_FILE_TIME_OFS         = 12,
-    MZ_ZIP_CDH_FILE_DATE_OFS         = 14,
-    MZ_ZIP_CDH_CRC32_OFS             = 16,
-    MZ_ZIP_CDH_COMPRESSED_SIZE_OFS   = 20,
-    MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24,
-    MZ_ZIP_CDH_FILENAME_LEN_OFS      = 28,
-    MZ_ZIP_CDH_EXTRA_LEN_OFS         = 30,
-    MZ_ZIP_CDH_COMMENT_LEN_OFS       = 32,
-    MZ_ZIP_CDH_DISK_START_OFS        = 34,
-    MZ_ZIP_CDH_INTERNAL_ATTR_OFS     = 36,
-    MZ_ZIP_CDH_EXTERNAL_ATTR_OFS     = 38,
-    MZ_ZIP_CDH_LOCAL_HEADER_OFS      = 42,
-
-    /* Local directory header offsets */
-    MZ_ZIP_LDH_SIG_OFS               = 0,
-    MZ_ZIP_LDH_VERSION_NEEDED_OFS    = 4,
-    MZ_ZIP_LDH_BIT_FLAG_OFS          = 6,
-    MZ_ZIP_LDH_METHOD_OFS            = 8,
-    MZ_ZIP_LDH_FILE_TIME_OFS         = 10,
-    MZ_ZIP_LDH_FILE_DATE_OFS         = 12,
-    MZ_ZIP_LDH_CRC32_OFS             = 14,
-    MZ_ZIP_LDH_COMPRESSED_SIZE_OFS   = 18,
-    MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22,
-    MZ_ZIP_LDH_FILENAME_LEN_OFS      = 26,
-    MZ_ZIP_LDH_EXTRA_LEN_OFS         = 28,
-    MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR  = 1 << 3,
-
-    /* End of central directory offsets */
-    MZ_ZIP_ECDH_SIG_OFS                      = 0,
-    MZ_ZIP_ECDH_NUM_THIS_DISK_OFS            = 4,
-    MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS            = 6,
-    MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8,
-    MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS       = 10,
-    MZ_ZIP_ECDH_CDIR_SIZE_OFS                = 12,
-    MZ_ZIP_ECDH_CDIR_OFS_OFS                 = 16,
-    MZ_ZIP_ECDH_COMMENT_SIZE_OFS             = 20,
-
-    /* ZIP64 End of central directory locator offsets */
-    MZ_ZIP64_ECDL_SIG_OFS                   = 0,  /* 4 bytes */
-    MZ_ZIP64_ECDL_NUM_DISK_CDIR_OFS         = 4,  /* 4 bytes */
-    MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS = 8,  /* 8 bytes */
-    MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS = 16, /* 4 bytes */
-
-    /* ZIP64 End of central directory header offsets */
-    MZ_ZIP64_ECDH_SIG_OFS                                  = 0,  /* 4 bytes */
-    MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS                       = 4,  /* 8 bytes */
-    MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS                      = 12, /* 2 bytes */
-    MZ_ZIP64_ECDH_VERSION_NEEDED_OFS                       = 14, /* 2 bytes */
-    MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS                        = 16, /* 4 bytes */
-    MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS                        = 20, /* 4 bytes */
-    MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS             = 24, /* 8 bytes */
-    MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS                   = 32, /* 8 bytes */
-    MZ_ZIP64_ECDH_CDIR_SIZE_OFS                            = 40, /* 8 bytes */
-    MZ_ZIP64_ECDH_CDIR_OFS_OFS                             = 48, /* 8 bytes */
-    MZ_ZIP_VERSION_MADE_BY_DOS_FILESYSTEM_ID               = 0,
-    MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG                       = 0x10,
-    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED           = 1,
-    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG  = 32,
-    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION = 64,
-    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED    = 8192,
-    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8                   = 1 << 11
-};
-
-typedef struct {
-    void*   m_p;
-    size_t  m_size, m_capacity;
-    mz_uint m_element_size;
-} mz_zip_array;
-
-struct mz_zip_internal_state_tag {
-    mz_zip_array m_central_dir;
-    mz_zip_array m_central_dir_offsets;
-    mz_zip_array m_sorted_central_dir_offsets;
-
-    /* The flags passed in when the archive is initially opened. */
-    mz_uint32 m_init_flags;
-
-    /* MZ_TRUE if the archive has a zip64 end of central directory headers, etc.
-   */
-    mz_bool m_zip64;
-
-    /* MZ_TRUE if we found zip64 extended info in the central directory (m_zip64
-   * will also be slammed to true too, even if we didn't find a zip64 end of
-   * central dir header, etc.) */
-    mz_bool m_zip64_has_extended_info_fields;
-
-    /* These fields are used by the file, FILE, memory, and memory/heap read/write
-   * helpers. */
-    MZ_FILE*  m_pFile;
-    mz_uint64 m_file_archive_start_ofs;
-
-    void*  m_pMem;
-    size_t m_mem_size;
-    size_t m_mem_capacity;
-};
-
-        #define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) \
-            (array_ptr)->m_element_size = element_size
-
-        #if defined(DEBUG) || defined(_DEBUG)
-static MZ_FORCEINLINE mz_uint mz_zip_array_range_check(const mz_zip_array* pArray, mz_uint index) {
-    MZ_ASSERT(index < pArray->m_size);
-    return index;
-}
-            #define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \
-                ((element_type*) ((array_ptr)->m_p))[mz_zip_array_range_check(array_ptr, index)]
-        #else
-            #define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \
-                ((element_type*) ((array_ptr)->m_p))[index]
-        #endif
-
-static MZ_FORCEINLINE void mz_zip_array_init(mz_zip_array* pArray, mz_uint32 element_size) {
-    memset(pArray, 0, sizeof(mz_zip_array));
-    pArray->m_element_size = element_size;
-}
-
-static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive* pZip, mz_zip_array* pArray) {
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p);
-    memset(pArray, 0, sizeof(mz_zip_array));
-}
-
-static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive* pZip,
-                                            mz_zip_array*   pArray,
-                                            size_t          min_new_capacity,
-                                            mz_uint         growing) {
-    void*  pNew_p;
-    size_t new_capacity = min_new_capacity;
-    MZ_ASSERT(pArray->m_element_size);
-    if (pArray->m_capacity >= min_new_capacity)
-        return MZ_TRUE;
-    if (growing)
-    {
-        new_capacity = MZ_MAX(1, pArray->m_capacity);
-        while (new_capacity < min_new_capacity)
-            new_capacity *= 2;
-    }
-    if (NULL
-        == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size,
-                                      new_capacity)))
-        return MZ_FALSE;
-    pArray->m_p        = pNew_p;
-    pArray->m_capacity = new_capacity;
-    return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive* pZip,
-                                                   mz_zip_array*   pArray,
-                                                   size_t          new_capacity,
-                                                   mz_uint         growing) {
-    if (new_capacity > pArray->m_capacity)
-    {
-        if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing))
-            return MZ_FALSE;
-    }
-    return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive* pZip,
-                                                  mz_zip_array*   pArray,
-                                                  size_t          new_size,
-                                                  mz_uint         growing) {
-    if (new_size > pArray->m_capacity)
-    {
-        if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing))
-            return MZ_FALSE;
-    }
-    pArray->m_size = new_size;
-    return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive* pZip,
-                                                       mz_zip_array*   pArray,
-                                                       size_t          n) {
-    return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE);
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive* pZip,
-                                                     mz_zip_array*   pArray,
-                                                     const void*     pElements,
-                                                     size_t          n) {
-    size_t orig_size = pArray->m_size;
-    if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE))
-        return MZ_FALSE;
-    if (n > 0)
-        memcpy((mz_uint8*) pArray->m_p + orig_size * pArray->m_element_size, pElements,
-               n * pArray->m_element_size);
-    return MZ_TRUE;
-}
-
-        #ifndef MINIZ_NO_TIME
-static MZ_TIME_T mz_zip_dos_to_time_t(int dos_time, int dos_date) {
-    struct tm tm;
-    memset(&tm, 0, sizeof(tm));
-    tm.tm_isdst = -1;
-    tm.tm_year  = ((dos_date >> 9) & 127) + 1980 - 1900;
-    tm.tm_mon   = ((dos_date >> 5) & 15) - 1;
-    tm.tm_mday  = dos_date & 31;
-    tm.tm_hour  = (dos_time >> 11) & 31;
-    tm.tm_min   = (dos_time >> 5) & 63;
-    tm.tm_sec   = (dos_time << 1) & 62;
-    return mktime(&tm);
-}
-
-            #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-static void mz_zip_time_t_to_dos_time(MZ_TIME_T time, mz_uint16* pDOS_time, mz_uint16* pDOS_date) {
-                #ifdef _MSC_VER
-    struct tm  tm_struct;
-    struct tm* tm  = &tm_struct;
-    errno_t    err = localtime_s(tm, &time);
-    if (err)
-    {
-        *pDOS_date = 0;
-        *pDOS_time = 0;
-        return;
-    }
-                #else
-    struct tm* tm = localtime(&time);
-                #endif /* #ifdef _MSC_VER */
-
-    *pDOS_time = (mz_uint16) (((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1));
-    *pDOS_date =
-      (mz_uint16) (((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday);
-}
-            #endif /* MINIZ_NO_ARCHIVE_WRITING_APIS */
-
-            #ifndef MINIZ_NO_STDIO
-                #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-static mz_bool mz_zip_get_file_modified_time(const char* pFilename, MZ_TIME_T* pTime) {
-    struct MZ_FILE_STAT_STRUCT file_stat;
-
-    /* On Linux with x86 glibc, this call will fail on large files (I think >=
-   * 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. */
-    if (MZ_FILE_STAT(pFilename, &file_stat) != 0)
-        return MZ_FALSE;
-
-    *pTime = file_stat.st_mtime;
-
-    return MZ_TRUE;
-}
-                #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS*/
-
-static mz_bool
-mz_zip_set_file_times(const char* pFilename, MZ_TIME_T access_time, MZ_TIME_T modified_time) {
-    struct utimbuf t;
-
-    memset(&t, 0, sizeof(t));
-    t.actime  = access_time;
-    t.modtime = modified_time;
-
-    return !utime(pFilename, &t);
-}
-            #endif /* #ifndef MINIZ_NO_STDIO */
-        #endif     /* #ifndef MINIZ_NO_TIME */
-
-static MZ_FORCEINLINE mz_bool mz_zip_set_error(mz_zip_archive* pZip, mz_zip_error err_num) {
-    if (pZip)
-        pZip->m_last_error = err_num;
-    return MZ_FALSE;
-}
-
-static mz_bool mz_zip_reader_init_internal(mz_zip_archive* pZip, mz_uint flags) {
-    (void) flags;
-    if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (!pZip->m_pAlloc)
-        pZip->m_pAlloc = miniz_def_alloc_func;
-    if (!pZip->m_pFree)
-        pZip->m_pFree = miniz_def_free_func;
-    if (!pZip->m_pRealloc)
-        pZip->m_pRealloc = miniz_def_realloc_func;
-
-    pZip->m_archive_size               = 0;
-    pZip->m_central_directory_file_ofs = 0;
-    pZip->m_total_files                = 0;
-    pZip->m_last_error                 = MZ_ZIP_NO_ERROR;
-
-    if (NULL
-        == (pZip->m_pState = (mz_zip_internal_state*) pZip->m_pAlloc(
-              pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
-        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-    memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
-    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
-    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
-    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
-    pZip->m_pState->m_init_flags                     = flags;
-    pZip->m_pState->m_zip64                          = MZ_FALSE;
-    pZip->m_pState->m_zip64_has_extended_info_fields = MZ_FALSE;
-
-    pZip->m_zip_mode = MZ_ZIP_MODE_READING;
-
-    return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array* pCentral_dir_array,
-                                                          const mz_zip_array* pCentral_dir_offsets,
-                                                          mz_uint             l_index,
-                                                          mz_uint             r_index) {
-    const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(
-                     pCentral_dir_array, mz_uint8,
-                     MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)),
-                   *pE;
-    const mz_uint8* pR = &MZ_ZIP_ARRAY_ELEMENT(
-      pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index));
-    mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS),
-            r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    mz_uint8 l = 0, r = 0;
-    pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-    pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-    pE = pL + MZ_MIN(l_len, r_len);
-    while (pL < pE)
-    {
-        if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
-            break;
-        pL++;
-        pR++;
-    }
-    return (pL == pE) ? (l_len < r_len) : (l < r);
-}
-
-        #define MZ_SWAP_UINT32(a, b) \
-            do \
-            { \
-                mz_uint32 t = a; \
-                a           = b; \
-                b           = t; \
-            } \
-            MZ_MACRO_END
-
-/* Heap sort of lowercased filenames, used to help accelerate plain central
- * directory searches by mz_zip_reader_locate_file(). (Could also use qsort(),
- * but it could allocate memory.) */
-static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive* pZip) {
-    mz_zip_internal_state* pState               = pZip->m_pState;
-    const mz_zip_array*    pCentral_dir_offsets = &pState->m_central_dir_offsets;
-    const mz_zip_array*    pCentral_dir         = &pState->m_central_dir;
-    mz_uint32*             pIndices;
-    mz_uint32              start, end;
-    const mz_uint32        size = pZip->m_total_files;
-
-    if (size <= 1U)
-        return;
-
-    pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);
-
-    start = (size - 2U) >> 1U;
-    for (;;)
-    {
-        mz_uint64 child, root = start;
-        for (;;)
-        {
-            if ((child = (root << 1U) + 1U) >= size)
-                break;
-            child += (((child + 1U) < size)
-                      && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                                      pIndices[child], pIndices[child + 1U])));
-            if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root],
-                                             pIndices[child]))
-                break;
-            MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
-            root = child;
-        }
-        if (!start)
-            break;
-        start--;
-    }
-
-    end = size - 1;
-    while (end > 0)
-    {
-        mz_uint64 child, root = 0;
-        MZ_SWAP_UINT32(pIndices[end], pIndices[0]);
-        for (;;)
-        {
-            if ((child = (root << 1U) + 1U) >= end)
-                break;
-            child += (((child + 1U) < end)
-                      && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets,
-                                                     pIndices[child], pIndices[child + 1U]));
-            if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root],
-                                             pIndices[child]))
-                break;
-            MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
-            root = child;
-        }
-        end--;
-    }
-}
-
-static mz_bool mz_zip_reader_locate_header_sig(mz_zip_archive* pZip,
-                                               mz_uint32       record_sig,
-                                               mz_uint32       record_size,
-                                               mz_int64*       pOfs) {
-    mz_int64  cur_file_ofs;
-    mz_uint32 buf_u32[4096 / sizeof(mz_uint32)];
-    mz_uint8* pBuf = (mz_uint8*) buf_u32;
-
-    /* Basic sanity checks - reject files which are too small */
-    if (pZip->m_archive_size < record_size)
-        return MZ_FALSE;
-
-    /* Find the record by scanning the file from the end towards the beginning. */
-    cur_file_ofs = MZ_MAX((mz_int64) pZip->m_archive_size - (mz_int64) sizeof(buf_u32), 0);
-    for (;;)
-    {
-        int i, n = (int) MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs);
-
-        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint) n)
-            return MZ_FALSE;
-
-        for (i = n - 4; i >= 0; --i)
-        {
-            mz_uint s = MZ_READ_LE32(pBuf + i);
-            if (s == record_sig)
-            {
-                if ((pZip->m_archive_size - (cur_file_ofs + i)) >= record_size)
-                    break;
-            }
-        }
-
-        if (i >= 0)
-        {
-            cur_file_ofs += i;
-            break;
-        }
-
-        /* Give up if we've searched the entire file, or we've gone back "too far"
-     * (~64kb) */
-        if ((!cur_file_ofs)
-            || ((pZip->m_archive_size - cur_file_ofs)
-                >= ((mz_uint64) (MZ_UINT16_MAX) + record_size)))
-            return MZ_FALSE;
-
-        cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0);
-    }
-
-    *pOfs = cur_file_ofs;
-    return MZ_TRUE;
-}
-
-static mz_bool mz_zip_reader_eocd64_valid(mz_zip_archive* pZip, uint64_t offset, uint8_t* buf) {
-    if (pZip->m_pRead(pZip->m_pIO_opaque, offset, buf, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)
-        == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)
-    {
-        if (MZ_READ_LE32(buf + MZ_ZIP64_ECDH_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG)
-        {
-            return MZ_TRUE;
-        }
-    }
-
-    return MZ_FALSE;
-}
-
-static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive* pZip, mz_uint flags) {
-    mz_uint   cdir_size = 0, cdir_entries_on_this_disk = 0, num_this_disk = 0, cdir_disk_index = 0;
-    mz_uint64 cdir_ofs = 0, eocd_ofs = 0, archive_ofs = 0;
-    mz_int64  cur_file_ofs = 0;
-    const mz_uint8* p;
-
-    mz_uint32 buf_u32[4096 / sizeof(mz_uint32)];
-    mz_uint8* pBuf             = (mz_uint8*) buf_u32;
-    mz_bool   sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0);
-    mz_uint32 zip64_end_of_central_dir_locator_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE
-                                                    + sizeof(mz_uint32) - 1)
-                                                   / sizeof(mz_uint32)];
-    mz_uint8* pZip64_locator = (mz_uint8*) zip64_end_of_central_dir_locator_u32;
-
-    mz_uint32 zip64_end_of_central_dir_header_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE
-                                                   + sizeof(mz_uint32) - 1)
-                                                  / sizeof(mz_uint32)];
-    mz_uint8* pZip64_end_of_central_dir = (mz_uint8*) zip64_end_of_central_dir_header_u32;
-
-    mz_uint64 zip64_end_of_central_dir_ofs = 0;
-
-    /* Basic sanity checks - reject files which are too small, and check the first
-   * 4 bytes of the file to make sure a local header is there. */
-    if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
-
-    if (!mz_zip_reader_locate_header_sig(pZip, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG,
-                                         MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE, &cur_file_ofs))
-        return mz_zip_set_error(pZip, MZ_ZIP_FAILED_FINDING_CENTRAL_DIR);
-
-    eocd_ofs = cur_file_ofs;
-    /* Read and verify the end of central directory record. */
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-        != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-
-    if (MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG)
-        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
-
-    if (cur_file_ofs
-        >= (MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE))
-    {
-        if (pZip->m_pRead(pZip->m_pIO_opaque,
-                          cur_file_ofs - MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE, pZip64_locator,
-                          MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE)
-            == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE)
-        {
-            if (MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_SIG_OFS)
-                == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG)
-            {
-                pZip->m_pState->m_zip64 = MZ_TRUE;
-            }
-        }
-    }
-
-    if (pZip->m_pState->m_zip64)
-    {
-        /* Try locating the EOCD64 right before the EOCD64 locator. This works even
-     * when the effective start of the zip header is not yet known. */
-        if (cur_file_ofs
-            < MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)
-            return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
-
-        zip64_end_of_central_dir_ofs = cur_file_ofs - MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE
-                                     - MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE;
-
-        if (!mz_zip_reader_eocd64_valid(pZip, zip64_end_of_central_dir_ofs,
-                                        pZip64_end_of_central_dir))
-        {
-            /* That failed, try reading where the locator tells us to. */
-            zip64_end_of_central_dir_ofs =
-              MZ_READ_LE64(pZip64_locator + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS);
-
-            if (zip64_end_of_central_dir_ofs
-                > (pZip->m_archive_size - MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE))
-                return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
-
-            if (!mz_zip_reader_eocd64_valid(pZip, zip64_end_of_central_dir_ofs,
-                                            pZip64_end_of_central_dir))
-                return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
-        }
-    }
-
-    pZip->m_total_files       = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS);
-    cdir_entries_on_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS);
-    num_this_disk             = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS);
-    cdir_disk_index           = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS);
-    cdir_size                 = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS);
-    cdir_ofs                  = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS);
-
-    if (pZip->m_pState->m_zip64)
-    {
-        mz_uint32 zip64_total_num_of_disks =
-          MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS);
-        mz_uint64 zip64_cdir_total_entries =
-          MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS);
-        mz_uint64 zip64_cdir_total_entries_on_this_disk =
-          MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS);
-        mz_uint64 zip64_size_of_end_of_central_dir_record =
-          MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS);
-        mz_uint64 zip64_size_of_central_directory =
-          MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_SIZE_OFS);
-
-        if (zip64_size_of_end_of_central_dir_record
-            < (MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - 12))
-            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-        if (zip64_total_num_of_disks != 1U)
-            return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK);
-
-        /* Check for miniz's practical limits */
-        if (zip64_cdir_total_entries > MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-
-        pZip->m_total_files = (mz_uint32) zip64_cdir_total_entries;
-
-        if (zip64_cdir_total_entries_on_this_disk > MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-
-        cdir_entries_on_this_disk = (mz_uint32) zip64_cdir_total_entries_on_this_disk;
-
-        /* Check for miniz's current practical limits (sorry, this should be enough
-     * for millions of files) */
-        if (zip64_size_of_central_directory > MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
-
-        cdir_size = (mz_uint32) zip64_size_of_central_directory;
-
-        num_this_disk = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS);
-
-        cdir_disk_index = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS);
-
-        cdir_ofs = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_OFS_OFS);
-    }
-
-    if (pZip->m_total_files != cdir_entries_on_this_disk)
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK);
-
-    if (((num_this_disk | cdir_disk_index) != 0)
-        && ((num_this_disk != 1) || (cdir_disk_index != 1)))
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK);
-
-    if (cdir_size < (mz_uint64) pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    if ((cdir_ofs + (mz_uint64) cdir_size) > pZip->m_archive_size)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    if (eocd_ofs < cdir_ofs + cdir_size)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    /* The end of central dir follows the central dir, unless the zip file has
-   * some trailing data (e.g. it is appended to an executable file). */
-    archive_ofs = eocd_ofs - (cdir_ofs + cdir_size);
-    if (pZip->m_pState->m_zip64)
-    {
-        if (archive_ofs
-            < MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE)
-            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-        archive_ofs -=
-          MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE;
-    }
-
-    /* Update the archive start position, but only if not specified. */
-    if ((pZip->m_zip_type == MZ_ZIP_TYPE_FILE || pZip->m_zip_type == MZ_ZIP_TYPE_CFILE)
-        && pZip->m_pState->m_file_archive_start_ofs == 0)
-    {
-        pZip->m_pState->m_file_archive_start_ofs = archive_ofs;
-        pZip->m_archive_size -= archive_ofs;
-    }
-
-    pZip->m_central_directory_file_ofs = cdir_ofs;
-
-    if (pZip->m_total_files)
-    {
-        mz_uint i, n;
-        /* Read the entire central directory into a heap block, and allocate another
-     * heap block to hold the unsorted central dir file record offsets, and
-     * possibly another to hold the sorted indices. */
-        if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE))
-            || (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets,
-                                     pZip->m_total_files, MZ_FALSE)))
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-        if (sort_central_dir)
-        {
-            if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets,
-                                     pZip->m_total_files, MZ_FALSE))
-                return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-
-        if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p,
-                          cdir_size)
-            != cdir_size)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-
-        /* Now create an index into the central directory file records, do some
-     * basic sanity checking on each record */
-        p = (const mz_uint8*) pZip->m_pState->m_central_dir.m_p;
-        for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i)
-        {
-            mz_uint   total_header_size, disk_index, bit_flags, filename_size, ext_data_size;
-            mz_uint64 comp_size, decomp_size, local_header_ofs;
-
-            if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)
-                || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG))
-                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-            MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) =
-              (mz_uint32) (p - (const mz_uint8*) pZip->m_pState->m_central_dir.m_p);
-
-            if (sort_central_dir)
-                MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) =
-                  i;
-
-            comp_size        = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-            decomp_size      = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
-            local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
-            filename_size    = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-            ext_data_size    = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS);
-
-            if ((!pZip->m_pState->m_zip64_has_extended_info_fields) && (ext_data_size)
-                && (MZ_MAX(MZ_MAX(comp_size, decomp_size), local_header_ofs) == MZ_UINT32_MAX))
-            {
-                /* Attempt to find zip64 extended information field in the entry's extra
-         * data */
-                mz_uint32 extra_size_remaining = ext_data_size;
-
-                if (extra_size_remaining)
-                {
-                    const mz_uint8* pExtra_data;
-                    void*           buf = NULL;
-
-                    if (MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + ext_data_size > n)
-                    {
-                        buf = MZ_MALLOC(ext_data_size);
-                        if (buf == NULL)
-                            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-                        if (pZip->m_pRead(pZip->m_pIO_opaque,
-                                          cdir_ofs + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size,
-                                          buf, ext_data_size)
-                            != ext_data_size)
-                        {
-                            MZ_FREE(buf);
-                            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-                        }
-
-                        pExtra_data = (mz_uint8*) buf;
-                    }
-                    else
-                    {
-                        pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size;
-                    }
-
-                    do
-                    {
-                        mz_uint32 field_id;
-                        mz_uint32 field_data_size;
-
-                        if (extra_size_remaining < (sizeof(mz_uint16) * 2))
-                        {
-                            MZ_FREE(buf);
-                            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-                        }
-
-                        field_id        = MZ_READ_LE16(pExtra_data);
-                        field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
-
-                        if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining)
-                        {
-                            MZ_FREE(buf);
-                            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-                        }
-
-                        if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
-                        {
-                            /* Ok, the archive didn't have any zip64 headers but it uses a
-               * zip64 extended information field so mark it as zip64 anyway
-               * (this can occur with infozip's zip util when it reads
-               * compresses files from stdin). */
-                            pZip->m_pState->m_zip64                          = MZ_TRUE;
-                            pZip->m_pState->m_zip64_has_extended_info_fields = MZ_TRUE;
-                            break;
-                        }
-
-                        pExtra_data += sizeof(mz_uint16) * 2 + field_data_size;
-                        extra_size_remaining =
-                          extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size;
-                    } while (extra_size_remaining);
-
-                    MZ_FREE(buf);
-                }
-            }
-
-            /* I've seen archives that aren't marked as zip64 that uses zip64 ext
-       * data, argh */
-            if ((comp_size != MZ_UINT32_MAX) && (decomp_size != MZ_UINT32_MAX))
-            {
-                if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size))
-                    || (decomp_size && !comp_size))
-                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-            }
-
-            disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS);
-            if ((disk_index == MZ_UINT16_MAX)
-                || ((disk_index != num_this_disk) && (disk_index != 1)))
-                return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK);
-
-            if (comp_size != MZ_UINT32_MAX)
-            {
-                if (((mz_uint64) MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS)
-                     + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size)
-                    > pZip->m_archive_size)
-                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-            }
-
-            bit_flags = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
-            if (bit_flags & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED)
-                return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
-
-            if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE
-                                   + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS)
-                                   + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS)
-                                   + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS))
-                > n)
-                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-            n -= total_header_size;
-            p += total_header_size;
-        }
-    }
-
-    if (sort_central_dir)
-        mz_zip_reader_sort_central_dir_offsets_by_filename(pZip);
-
-    return MZ_TRUE;
-}
-
-void mz_zip_zero_struct(mz_zip_archive* pZip) {
-    if (pZip)
-        MZ_CLEAR_PTR(pZip);
-}
-
-static mz_bool mz_zip_reader_end_internal(mz_zip_archive* pZip, mz_bool set_last_error) {
-    mz_bool status = MZ_TRUE;
-
-    if (!pZip)
-        return MZ_FALSE;
-
-    if ((!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree)
-        || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-    {
-        if (set_last_error)
-            pZip->m_last_error = MZ_ZIP_INVALID_PARAMETER;
-
-        return MZ_FALSE;
-    }
-
-    if (pZip->m_pState)
-    {
-        mz_zip_internal_state* pState = pZip->m_pState;
-        pZip->m_pState                = NULL;
-
-        mz_zip_array_clear(pZip, &pState->m_central_dir);
-        mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
-        mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
-
-        #ifndef MINIZ_NO_STDIO
-        if (pState->m_pFile)
-        {
-            if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE)
-            {
-                if (MZ_FCLOSE(pState->m_pFile) == EOF)
-                {
-                    if (set_last_error)
-                        pZip->m_last_error = MZ_ZIP_FILE_CLOSE_FAILED;
-                    status = MZ_FALSE;
-                }
-            }
-            pState->m_pFile = NULL;
-        }
-        #endif /* #ifndef MINIZ_NO_STDIO */
-
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-    }
-    pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
-
-    return status;
-}
-
-mz_bool mz_zip_reader_end(mz_zip_archive* pZip) {
-    return mz_zip_reader_end_internal(pZip, MZ_TRUE);
-}
-mz_bool mz_zip_reader_init(mz_zip_archive* pZip, mz_uint64 size, mz_uint flags) {
-    if ((!pZip) || (!pZip->m_pRead))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (!mz_zip_reader_init_internal(pZip, flags))
-        return MZ_FALSE;
-
-    pZip->m_zip_type     = MZ_ZIP_TYPE_USER;
-    pZip->m_archive_size = size;
-
-    if (!mz_zip_reader_read_central_dir(pZip, flags))
-    {
-        mz_zip_reader_end_internal(pZip, MZ_FALSE);
-        return MZ_FALSE;
-    }
-
-    return MZ_TRUE;
-}
-
-static size_t mz_zip_mem_read_func(void* pOpaque, mz_uint64 file_ofs, void* pBuf, size_t n) {
-    mz_zip_archive* pZip = (mz_zip_archive*) pOpaque;
-    size_t          s =
-      (file_ofs >= pZip->m_archive_size) ? 0 : (size_t) MZ_MIN(pZip->m_archive_size - file_ofs, n);
-    memcpy(pBuf, (const mz_uint8*) pZip->m_pState->m_pMem + file_ofs, s);
-    return s;
-}
-
-mz_bool mz_zip_reader_init_mem(mz_zip_archive* pZip, const void* pMem, size_t size, mz_uint flags) {
-    if (!pMem)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
-
-    if (!mz_zip_reader_init_internal(pZip, flags))
-        return MZ_FALSE;
-
-    pZip->m_zip_type         = MZ_ZIP_TYPE_MEMORY;
-    pZip->m_archive_size     = size;
-    pZip->m_pRead            = mz_zip_mem_read_func;
-    pZip->m_pIO_opaque       = pZip;
-    pZip->m_pNeeds_keepalive = NULL;
-
-        #ifdef __cplusplus
-    pZip->m_pState->m_pMem = const_cast<void*>(pMem);
-        #else
-    pZip->m_pState->m_pMem = (void*) pMem;
-        #endif
-
-    pZip->m_pState->m_mem_size = size;
-
-    if (!mz_zip_reader_read_central_dir(pZip, flags))
-    {
-        mz_zip_reader_end_internal(pZip, MZ_FALSE);
-        return MZ_FALSE;
-    }
-
-    return MZ_TRUE;
-}
-
-        #ifndef MINIZ_NO_STDIO
-static size_t mz_zip_file_read_func(void* pOpaque, mz_uint64 file_ofs, void* pBuf, size_t n) {
-    mz_zip_archive* pZip    = (mz_zip_archive*) pOpaque;
-    mz_int64        cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
-
-    file_ofs += pZip->m_pState->m_file_archive_start_ofs;
-
-    if (((mz_int64) file_ofs < 0)
-        || (((cur_ofs != (mz_int64) file_ofs))
-            && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64) file_ofs, SEEK_SET))))
-        return 0;
-
-    return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile);
-}
-
-mz_bool mz_zip_reader_init_file(mz_zip_archive* pZip, const char* pFilename, mz_uint32 flags) {
-    return mz_zip_reader_init_file_v2(pZip, pFilename, flags, 0, 0);
-}
-
-mz_bool mz_zip_reader_init_file_v2(mz_zip_archive* pZip,
-                                   const char*     pFilename,
-                                   mz_uint         flags,
-                                   mz_uint64       file_start_ofs,
-                                   mz_uint64       archive_size) {
-    mz_uint64 file_size;
-    MZ_FILE*  pFile;
-
-    if ((!pZip) || (!pFilename)
-        || ((archive_size) && (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    pFile = MZ_FOPEN(pFilename, "rb");
-    if (!pFile)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
-
-    file_size = archive_size;
-    if (!file_size)
-    {
-        if (MZ_FSEEK64(pFile, 0, SEEK_END))
-        {
-            MZ_FCLOSE(pFile);
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED);
-        }
-
-        file_size = MZ_FTELL64(pFile);
-    }
-
-    /* TODO: Better sanity check archive_size and the # of actual remaining bytes
-   */
-
-    if (file_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-    {
-        MZ_FCLOSE(pFile);
-        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
-    }
-
-    if (!mz_zip_reader_init_internal(pZip, flags))
-    {
-        MZ_FCLOSE(pFile);
-        return MZ_FALSE;
-    }
-
-    pZip->m_zip_type                         = MZ_ZIP_TYPE_FILE;
-    pZip->m_pRead                            = mz_zip_file_read_func;
-    pZip->m_pIO_opaque                       = pZip;
-    pZip->m_pState->m_pFile                  = pFile;
-    pZip->m_archive_size                     = file_size;
-    pZip->m_pState->m_file_archive_start_ofs = file_start_ofs;
-
-    if (!mz_zip_reader_read_central_dir(pZip, flags))
-    {
-        mz_zip_reader_end_internal(pZip, MZ_FALSE);
-        return MZ_FALSE;
-    }
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_reader_init_cfile(mz_zip_archive* pZip,
-                                 MZ_FILE*        pFile,
-                                 mz_uint64       archive_size,
-                                 mz_uint         flags) {
-    mz_uint64 cur_file_ofs;
-
-    if ((!pZip) || (!pFile))
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
-
-    cur_file_ofs = MZ_FTELL64(pFile);
-
-    if (!archive_size)
-    {
-        if (MZ_FSEEK64(pFile, 0, SEEK_END))
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED);
-
-        archive_size = MZ_FTELL64(pFile) - cur_file_ofs;
-
-        if (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-            return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);
-    }
-
-    if (!mz_zip_reader_init_internal(pZip, flags))
-        return MZ_FALSE;
-
-    pZip->m_zip_type = MZ_ZIP_TYPE_CFILE;
-    pZip->m_pRead    = mz_zip_file_read_func;
-
-    pZip->m_pIO_opaque                       = pZip;
-    pZip->m_pState->m_pFile                  = pFile;
-    pZip->m_archive_size                     = archive_size;
-    pZip->m_pState->m_file_archive_start_ofs = cur_file_ofs;
-
-    if (!mz_zip_reader_read_central_dir(pZip, flags))
-    {
-        mz_zip_reader_end_internal(pZip, MZ_FALSE);
-        return MZ_FALSE;
-    }
-
-    return MZ_TRUE;
-}
-
-        #endif /* #ifndef MINIZ_NO_STDIO */
-
-static MZ_FORCEINLINE const mz_uint8* mz_zip_get_cdh(mz_zip_archive* pZip, mz_uint file_index) {
-    if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files))
-        return NULL;
-    return &MZ_ZIP_ARRAY_ELEMENT(
-      &pZip->m_pState->m_central_dir, mz_uint8,
-      MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index));
-}
-
-mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive* pZip, mz_uint file_index) {
-    mz_uint         m_bit_flag;
-    const mz_uint8* p = mz_zip_get_cdh(pZip, file_index);
-    if (!p)
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-        return MZ_FALSE;
-    }
-
-    m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
-    return (m_bit_flag
-            & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED
-               | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION))
-        != 0;
-}
-
-mz_bool mz_zip_reader_is_file_supported(mz_zip_archive* pZip, mz_uint file_index) {
-    mz_uint bit_flag;
-    mz_uint method;
-
-    const mz_uint8* p = mz_zip_get_cdh(pZip, file_index);
-    if (!p)
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-        return MZ_FALSE;
-    }
-
-    method   = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS);
-    bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
-
-    if ((method != 0) && (method != MZ_DEFLATED))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
-        return MZ_FALSE;
-    }
-
-    if (bit_flag
-        & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED
-           | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
-        return MZ_FALSE;
-    }
-
-    if (bit_flag & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE);
-        return MZ_FALSE;
-    }
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive* pZip, mz_uint file_index) {
-    mz_uint         filename_len, attribute_mapping_id, external_attr;
-    const mz_uint8* p = mz_zip_get_cdh(pZip, file_index);
-    if (!p)
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-        return MZ_FALSE;
-    }
-
-    filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    if (filename_len)
-    {
-        if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/')
-            return MZ_TRUE;
-    }
-
-    /* Bugfix: This code was also checking if the internal attribute was non-zero,
-   * which wasn't correct. */
-    /* Most/all zip writers (hopefully) set DOS file/directory attributes in the
-   * low 16-bits, so check for the DOS directory flag and ignore the source OS
-   * ID in the created by field. */
-    /* FIXME: Remove this check? Is it necessary - we already check the filename.
-   */
-    attribute_mapping_id = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS) >> 8;
-    (void) attribute_mapping_id;
-
-    external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
-    if ((external_attr & MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG) != 0)
-    {
-        return MZ_TRUE;
-    }
-
-    return MZ_FALSE;
-}
-
-static mz_bool mz_zip_file_stat_internal(mz_zip_archive*           pZip,
-                                         mz_uint                   file_index,
-                                         const mz_uint8*           pCentral_dir_header,
-                                         mz_zip_archive_file_stat* pStat,
-                                         mz_bool*                  pFound_zip64_extra_data) {
-    mz_uint         n;
-    const mz_uint8* p = pCentral_dir_header;
-
-    if (pFound_zip64_extra_data)
-        *pFound_zip64_extra_data = MZ_FALSE;
-
-    if ((!p) || (!pStat))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    /* Extract fields from the central directory record. */
-    pStat->m_file_index = file_index;
-    pStat->m_central_dir_ofs =
-      MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index);
-    pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS);
-    pStat->m_version_needed  = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS);
-    pStat->m_bit_flag        = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
-    pStat->m_method          = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS);
-        #ifndef MINIZ_NO_TIME
-    pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS),
-                                         MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS));
-        #endif
-    pStat->m_crc32            = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS);
-    pStat->m_comp_size        = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
-    pStat->m_uncomp_size      = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
-    pStat->m_internal_attr    = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS);
-    pStat->m_external_attr    = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
-    pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
-
-    /* Copy as much of the filename and comment as possible. */
-    n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1);
-    memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
-    pStat->m_filename[n] = '\0';
-
-    n                     = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-    n                     = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1);
-    pStat->m_comment_size = n;
-    memcpy(pStat->m_comment,
-           p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS)
-             + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS),
-           n);
-    pStat->m_comment[n] = '\0';
-
-    /* Set some flags for convienance */
-    pStat->m_is_directory = mz_zip_reader_is_file_a_directory(pZip, file_index);
-    pStat->m_is_encrypted = mz_zip_reader_is_file_encrypted(pZip, file_index);
-    pStat->m_is_supported = mz_zip_reader_is_file_supported(pZip, file_index);
-
-    /* See if we need to read any zip64 extended information fields. */
-    /* Confusingly, these zip64 fields can be present even on non-zip64 archives
-   * (Debian zip on a huge files from stdin piped to stdout creates them). */
-    if (MZ_MAX(MZ_MAX(pStat->m_comp_size, pStat->m_uncomp_size), pStat->m_local_header_ofs)
-        == MZ_UINT32_MAX)
-    {
-        /* Attempt to find zip64 extended information field in the entry's extra
-     * data */
-        mz_uint32 extra_size_remaining = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS);
-
-        if (extra_size_remaining)
-        {
-            const mz_uint8* pExtra_data =
-              p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-
-            do
-            {
-                mz_uint32 field_id;
-                mz_uint32 field_data_size;
-
-                if (extra_size_remaining < (sizeof(mz_uint16) * 2))
-                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-                field_id        = MZ_READ_LE16(pExtra_data);
-                field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
-
-                if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining)
-                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-                if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
-                {
-                    const mz_uint8* pField_data          = pExtra_data + sizeof(mz_uint16) * 2;
-                    mz_uint32       field_data_remaining = field_data_size;
-
-                    if (pFound_zip64_extra_data)
-                        *pFound_zip64_extra_data = MZ_TRUE;
-
-                    if (pStat->m_uncomp_size == MZ_UINT32_MAX)
-                    {
-                        if (field_data_remaining < sizeof(mz_uint64))
-                            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-                        pStat->m_uncomp_size = MZ_READ_LE64(pField_data);
-                        pField_data += sizeof(mz_uint64);
-                        field_data_remaining -= sizeof(mz_uint64);
-                    }
-
-                    if (pStat->m_comp_size == MZ_UINT32_MAX)
-                    {
-                        if (field_data_remaining < sizeof(mz_uint64))
-                            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-                        pStat->m_comp_size = MZ_READ_LE64(pField_data);
-                        pField_data += sizeof(mz_uint64);
-                        field_data_remaining -= sizeof(mz_uint64);
-                    }
-
-                    if (pStat->m_local_header_ofs == MZ_UINT32_MAX)
-                    {
-                        if (field_data_remaining < sizeof(mz_uint64))
-                            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-                        pStat->m_local_header_ofs = MZ_READ_LE64(pField_data);
-                        pField_data += sizeof(mz_uint64);
-                        field_data_remaining -= sizeof(mz_uint64);
-                    }
-
-                    break;
-                }
-
-                pExtra_data += sizeof(mz_uint16) * 2 + field_data_size;
-                extra_size_remaining =
-                  extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size;
-            } while (extra_size_remaining);
-        }
-    }
-
-    return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE mz_bool mz_zip_string_equal(const char* pA,
-                                                  const char* pB,
-                                                  mz_uint     len,
-                                                  mz_uint     flags) {
-    mz_uint i;
-    if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE)
-        return 0 == memcmp(pA, pB, len);
-    for (i = 0; i < len; ++i)
-        if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i]))
-            return MZ_FALSE;
-    return MZ_TRUE;
-}
-
-static MZ_FORCEINLINE int mz_zip_filename_compare(const mz_zip_array* pCentral_dir_array,
-                                                  const mz_zip_array* pCentral_dir_offsets,
-                                                  mz_uint             l_index,
-                                                  const char*         pR,
-                                                  mz_uint             r_len) {
-    const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(
-                     pCentral_dir_array, mz_uint8,
-                     MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)),
-                   *pE;
-    mz_uint  l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    mz_uint8 l = 0, r = 0;
-    pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-    pE = pL + MZ_MIN(l_len, r_len);
-    while (pL < pE)
-    {
-        if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
-            break;
-        pL++;
-        pR++;
-    }
-    return (pL == pE) ? (int) (l_len - r_len) : (l - r);
-}
-
-static mz_bool
-mz_zip_locate_file_binary_search(mz_zip_archive* pZip, const char* pFilename, mz_uint32* pIndex) {
-    mz_zip_internal_state* pState               = pZip->m_pState;
-    const mz_zip_array*    pCentral_dir_offsets = &pState->m_central_dir_offsets;
-    const mz_zip_array*    pCentral_dir         = &pState->m_central_dir;
-    mz_uint32*             pIndices =
-      &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);
-    const mz_uint32 size         = pZip->m_total_files;
-    const mz_uint   filename_len = (mz_uint) strlen(pFilename);
-
-    if (pIndex)
-        *pIndex = 0;
-
-    if (size)
-    {
-        /* yes I could use uint32_t's, but then we would have to add some special
-     * case checks in the loop, argh, and */
-        /* honestly the major expense here on 32-bit CPU's will still be the
-     * filename compare */
-        mz_int64 l = 0, h = (mz_int64) size - 1;
-
-        while (l <= h)
-        {
-            mz_int64  m          = l + ((h - l) >> 1);
-            mz_uint32 file_index = pIndices[(mz_uint32) m];
-
-            int comp = mz_zip_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index,
-                                               pFilename, filename_len);
-            if (!comp)
-            {
-                if (pIndex)
-                    *pIndex = file_index;
-                return MZ_TRUE;
-            }
-            else if (comp < 0)
-                l = m + 1;
-            else
-                h = m - 1;
-        }
-    }
-
-    return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND);
-}
-
-int mz_zip_reader_locate_file(mz_zip_archive* pZip,
-                              const char*     pName,
-                              const char*     pComment,
-                              mz_uint         flags) {
-    mz_uint32 index;
-    if (!mz_zip_reader_locate_file_v2(pZip, pName, pComment, flags, &index))
-        return -1;
-    else
-        return (int) index;
-}
-
-mz_bool mz_zip_reader_locate_file_v2(
-  mz_zip_archive* pZip, const char* pName, const char* pComment, mz_uint flags, mz_uint32* pIndex) {
-    mz_uint file_index;
-    size_t  name_len, comment_len;
-
-    if (pIndex)
-        *pIndex = 0;
-
-    if ((!pZip) || (!pZip->m_pState) || (!pName))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    /* See if we can use a binary search */
-    if (((pZip->m_pState->m_init_flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0)
-        && (pZip->m_zip_mode == MZ_ZIP_MODE_READING)
-        && ((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment)
-        && (pZip->m_pState->m_sorted_central_dir_offsets.m_size))
-    {
-        return mz_zip_locate_file_binary_search(pZip, pName, pIndex);
-    }
-
-    /* Locate the entry by scanning the entire central directory */
-    name_len = strlen(pName);
-    if (name_len > MZ_UINT16_MAX)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    comment_len = pComment ? strlen(pComment) : 0;
-    if (comment_len > MZ_UINT16_MAX)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    for (file_index = 0; file_index < pZip->m_total_files; file_index++)
-    {
-        const mz_uint8* pHeader = &MZ_ZIP_ARRAY_ELEMENT(
-          &pZip->m_pState->m_central_dir, mz_uint8,
-          MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index));
-        mz_uint     filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-        const char* pFilename    = (const char*) pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-        if (filename_len < name_len)
-            continue;
-        if (comment_len)
-        {
-            mz_uint file_extra_len    = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS),
-                    file_comment_len  = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-            const char* pFile_comment = pFilename + filename_len + file_extra_len;
-            if ((file_comment_len != comment_len)
-                || (!mz_zip_string_equal(pComment, pFile_comment, file_comment_len, flags)))
-                continue;
-        }
-        if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len))
-        {
-            int ofs = filename_len - 1;
-            do
-            {
-                if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':'))
-                    break;
-            } while (--ofs >= 0);
-            ofs++;
-            pFilename += ofs;
-            filename_len -= ofs;
-        }
-        if ((filename_len == name_len)
-            && (mz_zip_string_equal(pName, pFilename, filename_len, flags)))
-        {
-            if (pIndex)
-                *pIndex = file_index;
-            return MZ_TRUE;
-        }
-    }
-
-    return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND);
-}
-
-static mz_bool mz_zip_reader_extract_to_mem_no_alloc1(mz_zip_archive* pZip,
-                                                      mz_uint         file_index,
-                                                      void*           pBuf,
-                                                      size_t          buf_size,
-                                                      mz_uint         flags,
-                                                      void*           pUser_read_buf,
-                                                      size_t          user_read_buf_size,
-                                                      const mz_zip_archive_file_stat* st) {
-    int       status = TINFL_STATUS_DONE;
-    mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size,
-                                                         read_buf_ofs = 0, read_buf_avail;
-    mz_zip_archive_file_stat file_stat;
-    void*                    pRead_buf;
-    mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
-    mz_uint8*          pLocal_header = (mz_uint8*) local_header_u32;
-    tinfl_decompressor inflator;
-
-    if ((!pZip) || (!pZip->m_pState) || ((buf_size) && (!pBuf))
-        || ((user_read_buf_size) && (!pUser_read_buf)) || (!pZip->m_pRead))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (st)
-    {
-        file_stat = *st;
-    }
-    else if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
-        return MZ_FALSE;
-
-    /* A directory or zero length file */
-    if ((file_stat.m_is_directory) || (!file_stat.m_comp_size))
-        return MZ_TRUE;
-
-    /* Encryption and patch files are not supported. */
-    if (file_stat.m_bit_flag
-        & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED
-           | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION
-           | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG))
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
-
-    /* This function only supports decompressing stored and deflate. */
-    if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0)
-        && (file_stat.m_method != MZ_DEFLATED))
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
-
-    /* Ensure supplied output buffer is large enough. */
-    needed_size =
-      (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size;
-    if (buf_size < needed_size)
-        return mz_zip_set_error(pZip, MZ_ZIP_BUF_TOO_SMALL);
-
-    /* Read and parse the local directory entry. */
-    cur_file_ofs = file_stat.m_local_header_ofs;
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-
-    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    cur_file_ofs += (mz_uint64) (MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-                  + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS)
-                  + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-    if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method))
-    {
-        /* The file is stored or the caller has requested the compressed data. */
-        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t) needed_size)
-            != needed_size)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-        if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) == 0)
-        {
-            if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8*) pBuf, (size_t) file_stat.m_uncomp_size)
-                != file_stat.m_crc32)
-                return mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED);
-        }
-        #endif
-
-        return MZ_TRUE;
-    }
-
-    /* Decompress the file either directly from memory or from a file input
-   * buffer. */
-    tinfl_init(&inflator);
-
-    if (pZip->m_pState->m_pMem)
-    {
-        /* Read directly from the archive in memory. */
-        pRead_buf     = (mz_uint8*) pZip->m_pState->m_pMem + cur_file_ofs;
-        read_buf_size = read_buf_avail = file_stat.m_comp_size;
-        comp_remaining                 = 0;
-    }
-    else if (pUser_read_buf)
-    {
-        /* Use a user provided read buffer. */
-        if (!user_read_buf_size)
-            return MZ_FALSE;
-        pRead_buf      = (mz_uint8*) pUser_read_buf;
-        read_buf_size  = user_read_buf_size;
-        read_buf_avail = 0;
-        comp_remaining = file_stat.m_comp_size;
-    }
-    else
-    {
-        /* Temporarily allocate a read buffer. */
-        read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64) MZ_ZIP_MAX_IO_BUF_SIZE);
-        if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF))
-            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-
-        if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t) read_buf_size)))
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-        read_buf_avail = 0;
-        comp_remaining = file_stat.m_comp_size;
-    }
-
-    do
-    {
-        /* The size_t cast here should be OK because we've verified that the output
-     * buffer is >= file_stat.m_uncomp_size above */
-        size_t in_buf_size, out_buf_size = (size_t) (file_stat.m_uncomp_size - out_buf_ofs);
-        if ((!read_buf_avail) && (!pZip->m_pState->m_pMem))
-        {
-            read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-            if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t) read_buf_avail)
-                != read_buf_avail)
-            {
-                status = TINFL_STATUS_FAILED;
-                mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED);
-                break;
-            }
-            cur_file_ofs += read_buf_avail;
-            comp_remaining -= read_buf_avail;
-            read_buf_ofs = 0;
-        }
-        in_buf_size = (size_t) read_buf_avail;
-        status = tinfl_decompress(&inflator, (mz_uint8*) pRead_buf + read_buf_ofs, &in_buf_size,
-                                  (mz_uint8*) pBuf, (mz_uint8*) pBuf + out_buf_ofs, &out_buf_size,
-                                  TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF
-                                    | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0));
-        read_buf_avail -= in_buf_size;
-        read_buf_ofs += in_buf_size;
-        out_buf_ofs += out_buf_size;
-    } while (status == TINFL_STATUS_NEEDS_MORE_INPUT);
-
-    if (status == TINFL_STATUS_DONE)
-    {
-        /* Make sure the entire file was decompressed, and check its CRC. */
-        if (out_buf_ofs != file_stat.m_uncomp_size)
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE);
-            status = TINFL_STATUS_FAILED;
-        }
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-        else if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8*) pBuf, (size_t) file_stat.m_uncomp_size)
-                 != file_stat.m_crc32)
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED);
-            status = TINFL_STATUS_FAILED;
-        }
-        #endif
-    }
-
-    if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf))
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-
-    return status == TINFL_STATUS_DONE;
-}
-
-mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive* pZip,
-                                              mz_uint         file_index,
-                                              void*           pBuf,
-                                              size_t          buf_size,
-                                              mz_uint         flags,
-                                              void*           pUser_read_buf,
-                                              size_t          user_read_buf_size) {
-    return mz_zip_reader_extract_to_mem_no_alloc1(pZip, file_index, pBuf, buf_size, flags,
-                                                  pUser_read_buf, user_read_buf_size, NULL);
-}
-
-mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive* pZip,
-                                                   const char*     pFilename,
-                                                   void*           pBuf,
-                                                   size_t          buf_size,
-                                                   mz_uint         flags,
-                                                   void*           pUser_read_buf,
-                                                   size_t          user_read_buf_size) {
-    mz_uint32 file_index;
-    if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index))
-        return MZ_FALSE;
-    return mz_zip_reader_extract_to_mem_no_alloc1(pZip, file_index, pBuf, buf_size, flags,
-                                                  pUser_read_buf, user_read_buf_size, NULL);
-}
-
-mz_bool mz_zip_reader_extract_to_mem(
-  mz_zip_archive* pZip, mz_uint file_index, void* pBuf, size_t buf_size, mz_uint flags) {
-    return mz_zip_reader_extract_to_mem_no_alloc1(pZip, file_index, pBuf, buf_size, flags, NULL, 0,
-                                                  NULL);
-}
-
-mz_bool mz_zip_reader_extract_file_to_mem(
-  mz_zip_archive* pZip, const char* pFilename, void* pBuf, size_t buf_size, mz_uint flags) {
-    return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL,
-                                                      0);
-}
-
-void* mz_zip_reader_extract_to_heap(mz_zip_archive* pZip,
-                                    mz_uint         file_index,
-                                    size_t*         pSize,
-                                    mz_uint         flags) {
-    mz_zip_archive_file_stat file_stat;
-    mz_uint64                alloc_size;
-    void*                    pBuf;
-
-    if (pSize)
-        *pSize = 0;
-
-    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
-        return NULL;
-
-    alloc_size =
-      (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size;
-    if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-        return NULL;
-    }
-
-    if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t) alloc_size)))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        return NULL;
-    }
-
-    if (!mz_zip_reader_extract_to_mem_no_alloc1(pZip, file_index, pBuf, (size_t) alloc_size, flags,
-                                                NULL, 0, &file_stat))
-    {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-        return NULL;
-    }
-
-    if (pSize)
-        *pSize = (size_t) alloc_size;
-    return pBuf;
-}
-
-void* mz_zip_reader_extract_file_to_heap(mz_zip_archive* pZip,
-                                         const char*     pFilename,
-                                         size_t*         pSize,
-                                         mz_uint         flags) {
-    mz_uint32 file_index;
-    if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index))
-    {
-        if (pSize)
-            *pSize = 0;
-        return MZ_FALSE;
-    }
-    return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags);
-}
-
-mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive*    pZip,
-                                          mz_uint            file_index,
-                                          mz_file_write_func pCallback,
-                                          void*              pOpaque,
-                                          mz_uint            flags) {
-    int status = TINFL_STATUS_DONE;
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-    mz_uint file_crc32 = MZ_CRC32_INIT;
-        #endif
-    mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0,
-                             cur_file_ofs;
-    mz_zip_archive_file_stat file_stat;
-    void*                    pRead_buf  = NULL;
-    void*                    pWrite_buf = NULL;
-    mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
-    mz_uint8* pLocal_header = (mz_uint8*) local_header_u32;
-
-    if ((!pZip) || (!pZip->m_pState) || (!pCallback) || (!pZip->m_pRead))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
-        return MZ_FALSE;
-
-    /* A directory or zero length file */
-    if ((file_stat.m_is_directory) || (!file_stat.m_comp_size))
-        return MZ_TRUE;
-
-    /* Encryption and patch files are not supported. */
-    if (file_stat.m_bit_flag
-        & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED
-           | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION
-           | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG))
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
-
-    /* This function only supports decompressing stored and deflate. */
-    if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0)
-        && (file_stat.m_method != MZ_DEFLATED))
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
-
-    /* Read and do some minimal validation of the local directory entry (this
-   * doesn't crack the zip64 stuff, which we already have from the central dir)
-   */
-    cur_file_ofs = file_stat.m_local_header_ofs;
-    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-
-    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    cur_file_ofs += (mz_uint64) (MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-                  + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS)
-                  + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-    if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    /* Decompress the file either directly from memory or from a file input
-   * buffer. */
-    if (pZip->m_pState->m_pMem)
-    {
-        pRead_buf     = (mz_uint8*) pZip->m_pState->m_pMem + cur_file_ofs;
-        read_buf_size = read_buf_avail = file_stat.m_comp_size;
-        comp_remaining                 = 0;
-    }
-    else
-    {
-        read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64) MZ_ZIP_MAX_IO_BUF_SIZE);
-        if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t) read_buf_size)))
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-        read_buf_avail = 0;
-        comp_remaining = file_stat.m_comp_size;
-    }
-
-    if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method))
-    {
-        /* The file is stored or the caller has requested the compressed data. */
-        if (pZip->m_pState->m_pMem)
-        {
-            if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > MZ_UINT32_MAX))
-                return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-
-            if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t) file_stat.m_comp_size)
-                != file_stat.m_comp_size)
-            {
-                mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED);
-                status = TINFL_STATUS_FAILED;
-            }
-            else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-            {
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-                file_crc32 = (mz_uint32) mz_crc32(file_crc32, (const mz_uint8*) pRead_buf,
-                                                  (size_t) file_stat.m_comp_size);
-        #endif
-            }
-
-            cur_file_ofs += file_stat.m_comp_size;
-            out_buf_ofs += file_stat.m_comp_size;
-            comp_remaining = 0;
-        }
-        else
-        {
-            while (comp_remaining)
-            {
-                read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-                if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf,
-                                  (size_t) read_buf_avail)
-                    != read_buf_avail)
-                {
-                    mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-                    status = TINFL_STATUS_FAILED;
-                    break;
-                }
-
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-                if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-                {
-                    file_crc32 = (mz_uint32) mz_crc32(file_crc32, (const mz_uint8*) pRead_buf,
-                                                      (size_t) read_buf_avail);
-                }
-        #endif
-
-                if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t) read_buf_avail)
-                    != read_buf_avail)
-                {
-                    mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED);
-                    status = TINFL_STATUS_FAILED;
-                    break;
-                }
-
-                cur_file_ofs += read_buf_avail;
-                out_buf_ofs += read_buf_avail;
-                comp_remaining -= read_buf_avail;
-            }
-        }
-    }
-    else
-    {
-        tinfl_decompressor inflator;
-        tinfl_init(&inflator);
-
-        if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE)))
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-            status = TINFL_STATUS_FAILED;
-        }
-        else
-        {
-            do
-            {
-                mz_uint8* pWrite_buf_cur =
-                  (mz_uint8*) pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
-                size_t in_buf_size,
-                  out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
-                if ((!read_buf_avail) && (!pZip->m_pState->m_pMem))
-                {
-                    read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
-                    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf,
-                                      (size_t) read_buf_avail)
-                        != read_buf_avail)
-                    {
-                        mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-                        status = TINFL_STATUS_FAILED;
-                        break;
-                    }
-                    cur_file_ofs += read_buf_avail;
-                    comp_remaining -= read_buf_avail;
-                    read_buf_ofs = 0;
-                }
-
-                in_buf_size = (size_t) read_buf_avail;
-                status =
-                  tinfl_decompress(&inflator, (const mz_uint8*) pRead_buf + read_buf_ofs,
-                                   &in_buf_size, (mz_uint8*) pWrite_buf, pWrite_buf_cur,
-                                   &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0);
-                read_buf_avail -= in_buf_size;
-                read_buf_ofs += in_buf_size;
-
-                if (out_buf_size)
-                {
-                    if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size)
-                        != out_buf_size)
-                    {
-                        mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED);
-                        status = TINFL_STATUS_FAILED;
-                        break;
-                    }
-
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-                    file_crc32 = (mz_uint32) mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size);
-        #endif
-                    if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size)
-                    {
-                        mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED);
-                        status = TINFL_STATUS_FAILED;
-                        break;
-                    }
-                }
-            } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT)
-                     || (status == TINFL_STATUS_HAS_MORE_OUTPUT));
-        }
-    }
-
-    if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)))
-    {
-        /* Make sure the entire file was decompressed, and check its CRC. */
-        if (out_buf_ofs != file_stat.m_uncomp_size)
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE);
-            status = TINFL_STATUS_FAILED;
-        }
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-        else if (file_crc32 != file_stat.m_crc32)
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED);
-            status = TINFL_STATUS_FAILED;
-        }
-        #endif
-    }
-
-    if (!pZip->m_pState->m_pMem)
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-
-    if (pWrite_buf)
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf);
-
-    return status == TINFL_STATUS_DONE;
-}
-
-mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive*    pZip,
-                                               const char*        pFilename,
-                                               mz_file_write_func pCallback,
-                                               void*              pOpaque,
-                                               mz_uint            flags) {
-    mz_uint32 file_index;
-    if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index))
-        return MZ_FALSE;
-
-    return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags);
-}
-
-mz_zip_reader_extract_iter_state*
-mz_zip_reader_extract_iter_new(mz_zip_archive* pZip, mz_uint file_index, mz_uint flags) {
-    mz_zip_reader_extract_iter_state* pState;
-    mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
-    mz_uint8* pLocal_header = (mz_uint8*) local_header_u32;
-
-    /* Argument sanity check */
-    if ((!pZip) || (!pZip->m_pState))
-        return NULL;
-
-    /* Allocate an iterator status structure */
-    pState = (mz_zip_reader_extract_iter_state*) pZip->m_pAlloc(
-      pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_reader_extract_iter_state));
-    if (!pState)
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        return NULL;
-    }
-
-    /* Fetch file details */
-    if (!mz_zip_reader_file_stat(pZip, file_index, &pState->file_stat))
-    {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-        return NULL;
-    }
-
-    /* Encryption and patch files are not supported. */
-    if (pState->file_stat.m_bit_flag
-        & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED
-           | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION
-           | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-        return NULL;
-    }
-
-    /* This function only supports decompressing stored and deflate. */
-    if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (pState->file_stat.m_method != 0)
-        && (pState->file_stat.m_method != MZ_DEFLATED))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-        return NULL;
-    }
-
-    /* Init state - save args */
-    pState->pZip  = pZip;
-    pState->flags = flags;
-
-    /* Init state - reset variables to defaults */
-    pState->status = TINFL_STATUS_DONE;
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-    pState->file_crc32 = MZ_CRC32_INIT;
-        #endif
-    pState->read_buf_ofs   = 0;
-    pState->out_buf_ofs    = 0;
-    pState->pRead_buf      = NULL;
-    pState->pWrite_buf     = NULL;
-    pState->out_blk_remain = 0;
-
-    /* Read and parse the local directory entry. */
-    pState->cur_file_ofs = pState->file_stat.m_local_header_ofs;
-    if (pZip->m_pRead(pZip->m_pIO_opaque, pState->cur_file_ofs, pLocal_header,
-                      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-        return NULL;
-    }
-
-    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-        return NULL;
-    }
-
-    pState->cur_file_ofs += (mz_uint64) (MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-                          + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS)
-                          + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-    if ((pState->cur_file_ofs + pState->file_stat.m_comp_size) > pZip->m_archive_size)
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-        return NULL;
-    }
-
-    /* Decompress the file either directly from memory or from a file input
-   * buffer. */
-    if (pZip->m_pState->m_pMem)
-    {
-        pState->pRead_buf     = (mz_uint8*) pZip->m_pState->m_pMem + pState->cur_file_ofs;
-        pState->read_buf_size = pState->read_buf_avail = pState->file_stat.m_comp_size;
-        pState->comp_remaining                         = pState->file_stat.m_comp_size;
-    }
-    else
-    {
-        if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method)))
-        {
-            /* Decompression required, therefore intermediate read buffer required */
-            pState->read_buf_size =
-              MZ_MIN(pState->file_stat.m_comp_size, (mz_uint64) MZ_ZIP_MAX_IO_BUF_SIZE);
-            if (NULL
-                == (pState->pRead_buf =
-                      pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t) pState->read_buf_size)))
-            {
-                mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-                pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-                return NULL;
-            }
-        }
-        else
-        {
-            /* Decompression not required - we will be reading directly into user
-       * buffer, no temp buf required */
-            pState->read_buf_size = 0;
-        }
-        pState->read_buf_avail = 0;
-        pState->comp_remaining = pState->file_stat.m_comp_size;
-    }
-
-    if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method)))
-    {
-        /* Decompression required, init decompressor */
-        tinfl_init(&pState->inflator);
-
-        /* Allocate write buffer */
-        if (NULL
-            == (pState->pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE)))
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-            if (pState->pRead_buf)
-                pZip->m_pFree(pZip->m_pAlloc_opaque, pState->pRead_buf);
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-            return NULL;
-        }
-    }
-
-    return pState;
-}
-
-mz_zip_reader_extract_iter_state*
-mz_zip_reader_extract_file_iter_new(mz_zip_archive* pZip, const char* pFilename, mz_uint flags) {
-    mz_uint32 file_index;
-
-    /* Locate file index by name */
-    if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index))
-        return NULL;
-
-    /* Construct iterator */
-    return mz_zip_reader_extract_iter_new(pZip, file_index, flags);
-}
-
-size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState,
-                                       void*                             pvBuf,
-                                       size_t                            buf_size) {
-    size_t copied_to_caller = 0;
-
-    /* Argument sanity check */
-    if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState) || (!pvBuf))
-        return 0;
-
-    if ((pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method))
-    {
-        /* The file is stored or the caller has requested the compressed data, calc
-     * amount to return. */
-        copied_to_caller = (size_t) MZ_MIN(buf_size, pState->comp_remaining);
-
-        /* Zip is in memory....or requires reading from a file? */
-        if (pState->pZip->m_pState->m_pMem)
-        {
-            /* Copy data to caller's buffer */
-            memcpy(pvBuf, pState->pRead_buf, copied_to_caller);
-            pState->pRead_buf = ((mz_uint8*) pState->pRead_buf) + copied_to_caller;
-        }
-        else
-        {
-            /* Read directly into caller's buffer */
-            if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pvBuf,
-                                      copied_to_caller)
-                != copied_to_caller)
-            {
-                /* Failed to read all that was asked for, flag failure and alert user */
-                mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED);
-                pState->status   = TINFL_STATUS_FAILED;
-                copied_to_caller = 0;
-            }
-        }
-
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-        /* Compute CRC if not returning compressed data only */
-        if (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-            pState->file_crc32 =
-              (mz_uint32) mz_crc32(pState->file_crc32, (const mz_uint8*) pvBuf, copied_to_caller);
-        #endif
-
-        /* Advance offsets, dec counters */
-        pState->cur_file_ofs += copied_to_caller;
-        pState->out_buf_ofs += copied_to_caller;
-        pState->comp_remaining -= copied_to_caller;
-    }
-    else
-    {
-        do
-        {
-            /* Calc ptr to write buffer - given current output pos and block size */
-            mz_uint8* pWrite_buf_cur =
-              (mz_uint8*) pState->pWrite_buf + (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
-
-            /* Calc max output size - given current output pos and block size */
-            size_t in_buf_size,
-              out_buf_size = TINFL_LZ_DICT_SIZE - (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
-
-            if (!pState->out_blk_remain)
-            {
-                /* Read more data from file if none available (and reading from file) */
-                if ((!pState->read_buf_avail) && (!pState->pZip->m_pState->m_pMem))
-                {
-                    /* Calc read size */
-                    pState->read_buf_avail = MZ_MIN(pState->read_buf_size, pState->comp_remaining);
-                    if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs,
-                                              pState->pRead_buf, (size_t) pState->read_buf_avail)
-                        != pState->read_buf_avail)
-                    {
-                        mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED);
-                        pState->status = TINFL_STATUS_FAILED;
-                        break;
-                    }
-
-                    /* Advance offsets, dec counters */
-                    pState->cur_file_ofs += pState->read_buf_avail;
-                    pState->comp_remaining -= pState->read_buf_avail;
-                    pState->read_buf_ofs = 0;
-                }
-
-                /* Perform decompression */
-                in_buf_size    = (size_t) pState->read_buf_avail;
-                pState->status = tinfl_decompress(
-                  &pState->inflator, (const mz_uint8*) pState->pRead_buf + pState->read_buf_ofs,
-                  &in_buf_size, (mz_uint8*) pState->pWrite_buf, pWrite_buf_cur, &out_buf_size,
-                  pState->comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0);
-                pState->read_buf_avail -= in_buf_size;
-                pState->read_buf_ofs += in_buf_size;
-
-                /* Update current output block size remaining */
-                pState->out_blk_remain = out_buf_size;
-            }
-
-            if (pState->out_blk_remain)
-            {
-                /* Calc amount to return. */
-                size_t to_copy = MZ_MIN((buf_size - copied_to_caller), pState->out_blk_remain);
-
-                /* Copy data to caller's buffer */
-                memcpy((mz_uint8*) pvBuf + copied_to_caller, pWrite_buf_cur, to_copy);
-
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-                /* Perform CRC */
-                pState->file_crc32 =
-                  (mz_uint32) mz_crc32(pState->file_crc32, pWrite_buf_cur, to_copy);
-        #endif
-
-                /* Decrement data consumed from block */
-                pState->out_blk_remain -= to_copy;
-
-                /* Inc output offset, while performing sanity check */
-                if ((pState->out_buf_ofs += to_copy) > pState->file_stat.m_uncomp_size)
-                {
-                    mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED);
-                    pState->status = TINFL_STATUS_FAILED;
-                    break;
-                }
-
-                /* Increment counter of data copied to caller */
-                copied_to_caller += to_copy;
-            }
-        } while ((copied_to_caller < buf_size)
-                 && ((pState->status == TINFL_STATUS_NEEDS_MORE_INPUT)
-                     || (pState->status == TINFL_STATUS_HAS_MORE_OUTPUT)));
-    }
-
-    /* Return how many bytes were copied into user buffer */
-    return copied_to_caller;
-}
-
-mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState) {
-    int status;
-
-    /* Argument sanity check */
-    if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState))
-        return MZ_FALSE;
-
-    /* Was decompression completed and requested? */
-    if ((pState->status == TINFL_STATUS_DONE) && (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA)))
-    {
-        /* Make sure the entire file was decompressed, and check its CRC. */
-        if (pState->out_buf_ofs != pState->file_stat.m_uncomp_size)
-        {
-            mz_zip_set_error(pState->pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE);
-            pState->status = TINFL_STATUS_FAILED;
-        }
-        #ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
-        else if (pState->file_crc32 != pState->file_stat.m_crc32)
-        {
-            mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED);
-            pState->status = TINFL_STATUS_FAILED;
-        }
-        #endif
-    }
-
-    /* Free buffers */
-    if (!pState->pZip->m_pState->m_pMem)
-        pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pRead_buf);
-    if (pState->pWrite_buf)
-        pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pWrite_buf);
-
-    /* Save status */
-    status = pState->status;
-
-    /* Free context */
-    pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState);
-
-    return status == TINFL_STATUS_DONE;
-}
-
-        #ifndef MINIZ_NO_STDIO
-static size_t mz_zip_file_write_callback(void* pOpaque, mz_uint64 ofs, const void* pBuf, size_t n) {
-    (void) ofs;
-
-    return MZ_FWRITE(pBuf, 1, n, (MZ_FILE*) pOpaque);
-}
-
-mz_bool mz_zip_reader_extract_to_file(mz_zip_archive* pZip,
-                                      mz_uint         file_index,
-                                      const char*     pDst_filename,
-                                      mz_uint         flags) {
-    mz_bool                  status;
-    mz_zip_archive_file_stat file_stat;
-    MZ_FILE*                 pFile;
-
-    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
-        return MZ_FALSE;
-
-    if ((file_stat.m_is_directory) || (!file_stat.m_is_supported))
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE);
-
-    pFile = MZ_FOPEN(pDst_filename, "wb");
-    if (!pFile)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
-
-    status =
-      mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags);
-
-    if (MZ_FCLOSE(pFile) == EOF)
-    {
-        if (status)
-            mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED);
-
-        status = MZ_FALSE;
-    }
-
-            #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO)
-    if (status)
-        mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time);
-            #endif
-
-    return status;
-}
-
-mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive* pZip,
-                                           const char*     pArchive_filename,
-                                           const char*     pDst_filename,
-                                           mz_uint         flags) {
-    mz_uint32 file_index;
-    if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index))
-        return MZ_FALSE;
-
-    return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags);
-}
-
-mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive* pZip,
-                                       mz_uint         file_index,
-                                       MZ_FILE*        pFile,
-                                       mz_uint         flags) {
-    mz_zip_archive_file_stat file_stat;
-
-    if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
-        return MZ_FALSE;
-
-    if ((file_stat.m_is_directory) || (!file_stat.m_is_supported))
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE);
-
-    return mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile,
-                                             flags);
-}
-
-mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive* pZip,
-                                            const char*     pArchive_filename,
-                                            MZ_FILE*        pFile,
-                                            mz_uint         flags) {
-    mz_uint32 file_index;
-    if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index))
-        return MZ_FALSE;
-
-    return mz_zip_reader_extract_to_cfile(pZip, file_index, pFile, flags);
-}
-        #endif /* #ifndef MINIZ_NO_STDIO */
-
-static size_t
-mz_zip_compute_crc32_callback(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n) {
-    mz_uint32* p = (mz_uint32*) pOpaque;
-    (void) file_ofs;
-    *p = (mz_uint32) mz_crc32(*p, (const mz_uint8*) pBuf, n);
-    return n;
-}
-
-mz_bool mz_zip_validate_file(mz_zip_archive* pZip, mz_uint file_index, mz_uint flags) {
-    mz_zip_archive_file_stat file_stat;
-    mz_zip_internal_state*   pState;
-    const mz_uint8*          pCentral_dir_header;
-    mz_bool                  found_zip64_ext_data_in_cdir = MZ_FALSE;
-    mz_bool                  found_zip64_ext_data_in_ldir = MZ_FALSE;
-    mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
-    mz_uint8* pLocal_header    = (mz_uint8*) local_header_u32;
-    mz_uint64 local_header_ofs = 0;
-    mz_uint32 local_header_filename_len, local_header_extra_len, local_header_crc32;
-    mz_uint64 local_header_comp_size, local_header_uncomp_size;
-    mz_uint32 uncomp_crc32 = MZ_CRC32_INIT;
-    mz_bool   has_data_descriptor;
-    mz_uint32 local_header_bit_flags;
-
-    mz_zip_array file_data_array;
-    mz_zip_array_init(&file_data_array, 1);
-
-    if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (file_index > pZip->m_total_files)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    pState = pZip->m_pState;
-
-    pCentral_dir_header = mz_zip_get_cdh(pZip, file_index);
-
-    if (!mz_zip_file_stat_internal(pZip, file_index, pCentral_dir_header, &file_stat,
-                                   &found_zip64_ext_data_in_cdir))
-        return MZ_FALSE;
-
-    /* A directory or zero length file */
-    if ((file_stat.m_is_directory) || (!file_stat.m_uncomp_size))
-        return MZ_TRUE;
-
-    /* Encryption and patch files are not supported. */
-    if (file_stat.m_is_encrypted)
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION);
-
-    /* This function only supports stored and deflate. */
-    if ((file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED))
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD);
-
-    if (!file_stat.m_is_supported)
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE);
-
-    /* Read and parse the local directory entry. */
-    local_header_ofs = file_stat.m_local_header_ofs;
-    if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs, pLocal_header,
-                      MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-
-    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    local_header_filename_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS);
-    local_header_extra_len    = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-    local_header_comp_size    = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS);
-    local_header_uncomp_size  = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS);
-    local_header_crc32        = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_CRC32_OFS);
-    local_header_bit_flags    = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS);
-    has_data_descriptor       = (local_header_bit_flags & 8) != 0;
-
-    if (local_header_filename_len != strlen(file_stat.m_filename))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    if ((local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len
-         + local_header_extra_len + file_stat.m_comp_size)
-        > pZip->m_archive_size)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    if (!mz_zip_array_resize(pZip, &file_data_array,
-                             MZ_MAX(local_header_filename_len, local_header_extra_len), MZ_FALSE))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        goto handle_failure;
-    }
-
-    if (local_header_filename_len)
-    {
-        if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE,
-                          file_data_array.m_p, local_header_filename_len)
-            != local_header_filename_len)
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-            goto handle_failure;
-        }
-
-        /* I've seen 1 archive that had the same pathname, but used backslashes in
-     * the local dir and forward slashes in the central dir. Do we care about
-     * this? For now, this case will fail validation. */
-        if (memcmp(file_stat.m_filename, file_data_array.m_p, local_header_filename_len) != 0)
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
-            goto handle_failure;
-        }
-    }
-
-    if ((local_header_extra_len)
-        && ((local_header_comp_size == MZ_UINT32_MAX)
-            || (local_header_uncomp_size == MZ_UINT32_MAX)))
-    {
-        mz_uint32       extra_size_remaining = local_header_extra_len;
-        const mz_uint8* pExtra_data          = (const mz_uint8*) file_data_array.m_p;
-
-        if (pZip->m_pRead(pZip->m_pIO_opaque,
-                          local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE
-                            + local_header_filename_len,
-                          file_data_array.m_p, local_header_extra_len)
-            != local_header_extra_len)
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-            goto handle_failure;
-        }
-
-        do
-        {
-            mz_uint32 field_id, field_data_size, field_total_size;
-
-            if (extra_size_remaining < (sizeof(mz_uint16) * 2))
-            {
-                mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-                goto handle_failure;
-            }
-
-            field_id         = MZ_READ_LE16(pExtra_data);
-            field_data_size  = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
-            field_total_size = field_data_size + sizeof(mz_uint16) * 2;
-
-            if (field_total_size > extra_size_remaining)
-            {
-                mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-                goto handle_failure;
-            }
-
-            if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
-            {
-                const mz_uint8* pSrc_field_data = pExtra_data + sizeof(mz_uint32);
-
-                if (field_data_size < sizeof(mz_uint64) * 2)
-                {
-                    mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-                    goto handle_failure;
-                }
-
-                local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data);
-                local_header_comp_size   = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64));
-
-                found_zip64_ext_data_in_ldir = MZ_TRUE;
-                break;
-            }
-
-            pExtra_data += field_total_size;
-            extra_size_remaining -= field_total_size;
-        } while (extra_size_remaining);
-    }
-
-    /* TODO: parse local header extra data when local_header_comp_size is
-   * 0xFFFFFFFF! (big_descriptor.zip) */
-    /* I've seen zips in the wild with the data descriptor bit set, but proper
-   * local header values and bogus data descriptors */
-    if ((has_data_descriptor) && (!local_header_comp_size) && (!local_header_crc32))
-    {
-        mz_uint8        descriptor_buf[32];
-        mz_bool         has_id;
-        const mz_uint8* pSrc;
-        mz_uint32       file_crc32;
-        mz_uint64       comp_size = 0, uncomp_size = 0;
-
-        mz_uint32 num_descriptor_uint32s =
-          ((pState->m_zip64) || (found_zip64_ext_data_in_ldir)) ? 6 : 4;
-
-        if (pZip->m_pRead(pZip->m_pIO_opaque,
-                          local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE
-                            + local_header_filename_len + local_header_extra_len
-                            + file_stat.m_comp_size,
-                          descriptor_buf, sizeof(mz_uint32) * num_descriptor_uint32s)
-            != (sizeof(mz_uint32) * num_descriptor_uint32s))
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-            goto handle_failure;
-        }
-
-        has_id = (MZ_READ_LE32(descriptor_buf) == MZ_ZIP_DATA_DESCRIPTOR_ID);
-        pSrc   = has_id ? (descriptor_buf + sizeof(mz_uint32)) : descriptor_buf;
-
-        file_crc32 = MZ_READ_LE32(pSrc);
-
-        if ((pState->m_zip64) || (found_zip64_ext_data_in_ldir))
-        {
-            comp_size   = MZ_READ_LE64(pSrc + sizeof(mz_uint32));
-            uncomp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32) + sizeof(mz_uint64));
-        }
-        else
-        {
-            comp_size   = MZ_READ_LE32(pSrc + sizeof(mz_uint32));
-            uncomp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32) + sizeof(mz_uint32));
-        }
-
-        if ((file_crc32 != file_stat.m_crc32) || (comp_size != file_stat.m_comp_size)
-            || (uncomp_size != file_stat.m_uncomp_size))
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
-            goto handle_failure;
-        }
-    }
-    else
-    {
-        if ((local_header_crc32 != file_stat.m_crc32)
-            || (local_header_comp_size != file_stat.m_comp_size)
-            || (local_header_uncomp_size != file_stat.m_uncomp_size))
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
-            goto handle_failure;
-        }
-    }
-
-    mz_zip_array_clear(pZip, &file_data_array);
-
-    if ((flags & MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY) == 0)
-    {
-        if (!mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_compute_crc32_callback,
-                                               &uncomp_crc32, 0))
-            return MZ_FALSE;
-
-        /* 1 more check to be sure, although the extract checks too. */
-        if (uncomp_crc32 != file_stat.m_crc32)
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
-            return MZ_FALSE;
-        }
-    }
-
-    return MZ_TRUE;
-
-handle_failure:
-    mz_zip_array_clear(pZip, &file_data_array);
-    return MZ_FALSE;
-}
-
-mz_bool mz_zip_validate_archive(mz_zip_archive* pZip, mz_uint flags) {
-    mz_zip_internal_state* pState;
-    mz_uint32              i;
-
-    if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    pState = pZip->m_pState;
-
-    /* Basic sanity checks */
-    if (!pState->m_zip64)
-    {
-        if (pZip->m_total_files > MZ_UINT16_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-
-        if (pZip->m_archive_size > MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-    }
-    else
-    {
-        if (pState->m_central_dir.m_size >= MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-    }
-
-    for (i = 0; i < pZip->m_total_files; i++)
-    {
-        if (MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG & flags)
-        {
-            mz_uint32                found_index;
-            mz_zip_archive_file_stat stat;
-
-            if (!mz_zip_reader_file_stat(pZip, i, &stat))
-                return MZ_FALSE;
-
-            if (!mz_zip_reader_locate_file_v2(pZip, stat.m_filename, NULL, 0, &found_index))
-                return MZ_FALSE;
-
-            /* This check can fail if there are duplicate filenames in the archive
-       * (which we don't check for when writing - that's up to the user) */
-            if (found_index != i)
-                return mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED);
-        }
-
-        if (!mz_zip_validate_file(pZip, i, flags))
-            return MZ_FALSE;
-    }
-
-    return MZ_TRUE;
-}
-
-mz_bool
-mz_zip_validate_mem_archive(const void* pMem, size_t size, mz_uint flags, mz_zip_error* pErr) {
-    mz_bool        success = MZ_TRUE;
-    mz_zip_archive zip;
-    mz_zip_error   actual_err = MZ_ZIP_NO_ERROR;
-
-    if ((!pMem) || (!size))
-    {
-        if (pErr)
-            *pErr = MZ_ZIP_INVALID_PARAMETER;
-        return MZ_FALSE;
-    }
-
-    mz_zip_zero_struct(&zip);
-
-    if (!mz_zip_reader_init_mem(&zip, pMem, size, flags))
-    {
-        if (pErr)
-            *pErr = zip.m_last_error;
-        return MZ_FALSE;
-    }
-
-    if (!mz_zip_validate_archive(&zip, flags))
-    {
-        actual_err = zip.m_last_error;
-        success    = MZ_FALSE;
-    }
-
-    if (!mz_zip_reader_end_internal(&zip, success))
-    {
-        if (!actual_err)
-            actual_err = zip.m_last_error;
-        success = MZ_FALSE;
-    }
-
-    if (pErr)
-        *pErr = actual_err;
-
-    return success;
-}
-
-        #ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_validate_file_archive(const char* pFilename, mz_uint flags, mz_zip_error* pErr) {
-    mz_bool        success = MZ_TRUE;
-    mz_zip_archive zip;
-    mz_zip_error   actual_err = MZ_ZIP_NO_ERROR;
-
-    if (!pFilename)
-    {
-        if (pErr)
-            *pErr = MZ_ZIP_INVALID_PARAMETER;
-        return MZ_FALSE;
-    }
-
-    mz_zip_zero_struct(&zip);
-
-    if (!mz_zip_reader_init_file_v2(&zip, pFilename, flags, 0, 0))
-    {
-        if (pErr)
-            *pErr = zip.m_last_error;
-        return MZ_FALSE;
-    }
-
-    if (!mz_zip_validate_archive(&zip, flags))
-    {
-        actual_err = zip.m_last_error;
-        success    = MZ_FALSE;
-    }
-
-    if (!mz_zip_reader_end_internal(&zip, success))
-    {
-        if (!actual_err)
-            actual_err = zip.m_last_error;
-        success = MZ_FALSE;
-    }
-
-    if (pErr)
-        *pErr = actual_err;
-
-    return success;
-}
-        #endif /* #ifndef MINIZ_NO_STDIO */
-
-    /* ------------------- .ZIP archive writing */
-
-        #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-
-static MZ_FORCEINLINE void mz_write_le16(mz_uint8* p, mz_uint16 v) {
-    p[0] = (mz_uint8) v;
-    p[1] = (mz_uint8) (v >> 8);
-}
-static MZ_FORCEINLINE void mz_write_le32(mz_uint8* p, mz_uint32 v) {
-    p[0] = (mz_uint8) v;
-    p[1] = (mz_uint8) (v >> 8);
-    p[2] = (mz_uint8) (v >> 16);
-    p[3] = (mz_uint8) (v >> 24);
-}
-static MZ_FORCEINLINE void mz_write_le64(mz_uint8* p, mz_uint64 v) {
-    mz_write_le32(p, (mz_uint32) v);
-    mz_write_le32(p + sizeof(mz_uint32), (mz_uint32) (v >> 32));
-}
-
-            #define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8*) (p), (mz_uint16) (v))
-            #define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8*) (p), (mz_uint32) (v))
-            #define MZ_WRITE_LE64(p, v) mz_write_le64((mz_uint8*) (p), (mz_uint64) (v))
-
-static size_t
-mz_zip_heap_write_func(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n) {
-    mz_zip_archive*        pZip     = (mz_zip_archive*) pOpaque;
-    mz_zip_internal_state* pState   = pZip->m_pState;
-    mz_uint64              new_size = MZ_MAX(file_ofs + n, pState->m_mem_size);
-
-    if (!n)
-        return 0;
-
-    /* An allocation this big is likely to just fail on 32-bit systems, so don't
-   * even go there. */
-    if ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE);
-        return 0;
-    }
-
-    if (new_size > pState->m_mem_capacity)
-    {
-        void*  pNew_block;
-        size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity);
-
-        while (new_capacity < new_size)
-            new_capacity *= 2;
-
-        if (NULL
-            == (pNew_block =
-                  pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity)))
-        {
-            mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-            return 0;
-        }
-
-        pState->m_pMem         = pNew_block;
-        pState->m_mem_capacity = new_capacity;
-    }
-    memcpy((mz_uint8*) pState->m_pMem + file_ofs, pBuf, n);
-    pState->m_mem_size = (size_t) new_size;
-    return n;
-}
-
-static mz_bool mz_zip_writer_end_internal(mz_zip_archive* pZip, mz_bool set_last_error) {
-    mz_zip_internal_state* pState;
-    mz_bool                status = MZ_TRUE;
-
-    if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree)
-        || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)
-            && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)))
-    {
-        if (set_last_error)
-            mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-        return MZ_FALSE;
-    }
-
-    pState         = pZip->m_pState;
-    pZip->m_pState = NULL;
-    mz_zip_array_clear(pZip, &pState->m_central_dir);
-    mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
-    mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
-
-            #ifndef MINIZ_NO_STDIO
-    if (pState->m_pFile)
-    {
-        if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE)
-        {
-            if (MZ_FCLOSE(pState->m_pFile) == EOF)
-            {
-                if (set_last_error)
-                    mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED);
-                status = MZ_FALSE;
-            }
-        }
-
-        pState->m_pFile = NULL;
-    }
-            #endif /* #ifndef MINIZ_NO_STDIO */
-
-    if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem))
-    {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem);
-        pState->m_pMem = NULL;
-    }
-
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
-    pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
-    return status;
-}
-
-mz_bool mz_zip_writer_init_v2(mz_zip_archive* pZip, mz_uint64 existing_size, mz_uint flags) {
-    mz_bool zip64 = (flags & MZ_ZIP_FLAG_WRITE_ZIP64) != 0;
-
-    if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite)
-        || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING)
-    {
-        if (!pZip->m_pRead)
-            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-    }
-
-    if (pZip->m_file_offset_alignment)
-    {
-        /* Ensure user specified file offset alignment is a power of 2. */
-        if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1))
-            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-    }
-
-    if (!pZip->m_pAlloc)
-        pZip->m_pAlloc = miniz_def_alloc_func;
-    if (!pZip->m_pFree)
-        pZip->m_pFree = miniz_def_free_func;
-    if (!pZip->m_pRealloc)
-        pZip->m_pRealloc = miniz_def_realloc_func;
-
-    pZip->m_archive_size               = existing_size;
-    pZip->m_central_directory_file_ofs = 0;
-    pZip->m_total_files                = 0;
-
-    if (NULL
-        == (pZip->m_pState = (mz_zip_internal_state*) pZip->m_pAlloc(
-              pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
-        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-    memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
-
-    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
-    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
-    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
-
-    pZip->m_pState->m_zip64                          = zip64;
-    pZip->m_pState->m_zip64_has_extended_info_fields = zip64;
-
-    pZip->m_zip_type = MZ_ZIP_TYPE_USER;
-    pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_init(mz_zip_archive* pZip, mz_uint64 existing_size) {
-    return mz_zip_writer_init_v2(pZip, existing_size, 0);
-}
-
-mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive* pZip,
-                                   size_t          size_to_reserve_at_beginning,
-                                   size_t          initial_allocation_size,
-                                   mz_uint         flags) {
-    pZip->m_pWrite           = mz_zip_heap_write_func;
-    pZip->m_pNeeds_keepalive = NULL;
-
-    if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING)
-        pZip->m_pRead = mz_zip_mem_read_func;
-
-    pZip->m_pIO_opaque = pZip;
-
-    if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags))
-        return MZ_FALSE;
-
-    pZip->m_zip_type = MZ_ZIP_TYPE_HEAP;
-
-    if (0
-        != (initial_allocation_size =
-              MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning)))
-    {
-        if (NULL
-            == (pZip->m_pState->m_pMem =
-                  pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size)))
-        {
-            mz_zip_writer_end_internal(pZip, MZ_FALSE);
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-        pZip->m_pState->m_mem_capacity = initial_allocation_size;
-    }
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_init_heap(mz_zip_archive* pZip,
-                                size_t          size_to_reserve_at_beginning,
-                                size_t          initial_allocation_size) {
-    return mz_zip_writer_init_heap_v2(pZip, size_to_reserve_at_beginning, initial_allocation_size,
-                                      0);
-}
-
-            #ifndef MINIZ_NO_STDIO
-static size_t
-mz_zip_file_write_func(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n) {
-    mz_zip_archive* pZip    = (mz_zip_archive*) pOpaque;
-    mz_int64        cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
-
-    file_ofs += pZip->m_pState->m_file_archive_start_ofs;
-
-    if (((mz_int64) file_ofs < 0)
-        || (((cur_ofs != (mz_int64) file_ofs))
-            && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64) file_ofs, SEEK_SET))))
-    {
-        mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED);
-        return 0;
-    }
-
-    return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile);
-}
-
-mz_bool mz_zip_writer_init_file(mz_zip_archive* pZip,
-                                const char*     pFilename,
-                                mz_uint64       size_to_reserve_at_beginning) {
-    return mz_zip_writer_init_file_v2(pZip, pFilename, size_to_reserve_at_beginning, 0);
-}
-
-mz_bool mz_zip_writer_init_file_v2(mz_zip_archive* pZip,
-                                   const char*     pFilename,
-                                   mz_uint64       size_to_reserve_at_beginning,
-                                   mz_uint         flags) {
-    MZ_FILE* pFile;
-
-    pZip->m_pWrite           = mz_zip_file_write_func;
-    pZip->m_pNeeds_keepalive = NULL;
-
-    if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING)
-        pZip->m_pRead = mz_zip_file_read_func;
-
-    pZip->m_pIO_opaque = pZip;
-
-    if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags))
-        return MZ_FALSE;
-
-    if (NULL
-        == (pFile = MZ_FOPEN(pFilename, (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) ? "w+b" : "wb")))
-    {
-        mz_zip_writer_end(pZip);
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
-    }
-
-    pZip->m_pState->m_pFile = pFile;
-    pZip->m_zip_type        = MZ_ZIP_TYPE_FILE;
-
-    if (size_to_reserve_at_beginning)
-    {
-        mz_uint64 cur_ofs = 0;
-        char      buf[4096];
-
-        MZ_CLEAR_ARR(buf);
-
-        do
-        {
-            size_t n = (size_t) MZ_MIN(sizeof(buf), size_to_reserve_at_beginning);
-            if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n)
-            {
-                mz_zip_writer_end(pZip);
-                return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-            }
-            cur_ofs += n;
-            size_to_reserve_at_beginning -= n;
-        } while (size_to_reserve_at_beginning);
-    }
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_init_cfile(mz_zip_archive* pZip, MZ_FILE* pFile, mz_uint flags) {
-    pZip->m_pWrite           = mz_zip_file_write_func;
-    pZip->m_pNeeds_keepalive = NULL;
-
-    if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING)
-        pZip->m_pRead = mz_zip_file_read_func;
-
-    pZip->m_pIO_opaque = pZip;
-
-    if (!mz_zip_writer_init_v2(pZip, 0, flags))
-        return MZ_FALSE;
-
-    pZip->m_pState->m_pFile                  = pFile;
-    pZip->m_pState->m_file_archive_start_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
-    pZip->m_zip_type                         = MZ_ZIP_TYPE_CFILE;
-
-    return MZ_TRUE;
-}
-            #endif /* #ifndef MINIZ_NO_STDIO */
-
-mz_bool
-mz_zip_writer_init_from_reader_v2(mz_zip_archive* pZip, const char* pFilename, mz_uint flags) {
-    mz_zip_internal_state* pState;
-
-    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (flags & MZ_ZIP_FLAG_WRITE_ZIP64)
-    {
-        /* We don't support converting a non-zip64 file to zip64 - this seems like
-     * more trouble than it's worth. (What about the existing 32-bit data
-     * descriptors that could follow the compressed data?) */
-        if (!pZip->m_pState->m_zip64)
-            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-    }
-
-    /* No sense in trying to write to an archive that's already at the support max
-   * size */
-    if (pZip->m_pState->m_zip64)
-    {
-        if (pZip->m_total_files == MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-    }
-    else
-    {
-        if (pZip->m_total_files == MZ_UINT16_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-
-        if ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-            > MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE);
-    }
-
-    pState = pZip->m_pState;
-
-    if (pState->m_pFile)
-    {
-            #ifdef MINIZ_NO_STDIO
-        (void) pFilename;
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-            #else
-        if (pZip->m_pIO_opaque != pZip)
-            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-        if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE)
-        {
-            if (!pFilename)
-                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-            /* Archive is being read from stdio and was originally opened only for
-       * reading. Try to reopen as writable. */
-            if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile)))
-            {
-                /* The mz_zip_archive is now in a bogus state because pState->m_pFile is
-         * NULL, so just close it. */
-                mz_zip_reader_end_internal(pZip, MZ_FALSE);
-                return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
-            }
-        }
-
-        pZip->m_pWrite           = mz_zip_file_write_func;
-        pZip->m_pNeeds_keepalive = NULL;
-            #endif /* #ifdef MINIZ_NO_STDIO */
-    }
-    else if (pState->m_pMem)
-    {
-        /* Archive lives in a memory block. Assume it's from the heap that we can
-     * resize using the realloc callback. */
-        if (pZip->m_pIO_opaque != pZip)
-            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-        pState->m_mem_capacity   = pState->m_mem_size;
-        pZip->m_pWrite           = mz_zip_heap_write_func;
-        pZip->m_pNeeds_keepalive = NULL;
-    }
-    /* Archive is being read via a user provided read function - make sure the
-     user has specified a write function too. */
-    else if (!pZip->m_pWrite)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    /* Start writing new files at the archive's current central directory
-   * location. */
-    /* TODO: We could add a flag that lets the user start writing immediately
-   * AFTER the existing central dir - this would be safer. */
-    pZip->m_archive_size               = pZip->m_central_directory_file_ofs;
-    pZip->m_central_directory_file_ofs = 0;
-
-    /* Clear the sorted central dir offsets, they aren't useful or maintained now.
-   */
-    /* Even though we're now in write mode, files can still be extracted and
-   * verified, but file locates will be slow. */
-    /* TODO: We could easily maintain the sorted central directory offsets. */
-    mz_zip_array_clear(pZip, &pZip->m_pState->m_sorted_central_dir_offsets);
-
-    pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_init_from_reader(mz_zip_archive* pZip, const char* pFilename) {
-    return mz_zip_writer_init_from_reader_v2(pZip, pFilename, 0);
-}
-
-/* TODO: pArchive_name is a terrible name here! */
-mz_bool mz_zip_writer_add_mem(mz_zip_archive* pZip,
-                              const char*     pArchive_name,
-                              const void*     pBuf,
-                              size_t          buf_size,
-                              mz_uint         level_and_flags) {
-    return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags,
-                                    0, 0);
-}
-
-typedef struct {
-    mz_zip_archive* m_pZip;
-    mz_uint64       m_cur_archive_file_ofs;
-    mz_uint64       m_comp_size;
-} mz_zip_writer_add_state;
-
-static mz_bool mz_zip_writer_add_put_buf_callback(const void* pBuf, int len, void* pUser) {
-    mz_zip_writer_add_state* pState = (mz_zip_writer_add_state*) pUser;
-    if ((int) pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs,
-                                       pBuf, len)
-        != len)
-        return MZ_FALSE;
-
-    pState->m_cur_archive_file_ofs += len;
-    pState->m_comp_size += len;
-    return MZ_TRUE;
-}
-
-            #define MZ_ZIP64_MAX_LOCAL_EXTRA_FIELD_SIZE \
-                (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 2)
-            #define MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE \
-                (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 3)
-static mz_uint32 mz_zip_writer_create_zip64_extra_data(mz_uint8*  pBuf,
-                                                       mz_uint64* pUncomp_size,
-                                                       mz_uint64* pComp_size,
-                                                       mz_uint64* pLocal_header_ofs) {
-    mz_uint8* pDst       = pBuf;
-    mz_uint32 field_size = 0;
-
-    MZ_WRITE_LE16(pDst + 0, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID);
-    MZ_WRITE_LE16(pDst + 2, 0);
-    pDst += sizeof(mz_uint16) * 2;
-
-    if (pUncomp_size)
-    {
-        MZ_WRITE_LE64(pDst, *pUncomp_size);
-        pDst += sizeof(mz_uint64);
-        field_size += sizeof(mz_uint64);
-    }
-
-    if (pComp_size)
-    {
-        MZ_WRITE_LE64(pDst, *pComp_size);
-        pDst += sizeof(mz_uint64);
-        field_size += sizeof(mz_uint64);
-    }
-
-    if (pLocal_header_ofs)
-    {
-        MZ_WRITE_LE64(pDst, *pLocal_header_ofs);
-        pDst += sizeof(mz_uint64);
-        field_size += sizeof(mz_uint64);
-    }
-
-    MZ_WRITE_LE16(pBuf + 2, field_size);
-
-    return (mz_uint32) (pDst - pBuf);
-}
-
-static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive* pZip,
-                                                     mz_uint8*       pDst,
-                                                     mz_uint16       filename_size,
-                                                     mz_uint16       extra_size,
-                                                     mz_uint64       uncomp_size,
-                                                     mz_uint64       comp_size,
-                                                     mz_uint32       uncomp_crc32,
-                                                     mz_uint16       method,
-                                                     mz_uint16       bit_flags,
-                                                     mz_uint16       dos_time,
-                                                     mz_uint16       dos_date) {
-    (void) pZip;
-    memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE);
-    MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date);
-    MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32);
-    MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX));
-    MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX));
-    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size);
-    return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive* pZip,
-                                                       mz_uint8*       pDst,
-                                                       mz_uint16       filename_size,
-                                                       mz_uint16       extra_size,
-                                                       mz_uint16       comment_size,
-                                                       mz_uint64       uncomp_size,
-                                                       mz_uint64       comp_size,
-                                                       mz_uint32       uncomp_crc32,
-                                                       mz_uint16       method,
-                                                       mz_uint16       bit_flags,
-                                                       mz_uint16       dos_time,
-                                                       mz_uint16       dos_date,
-                                                       mz_uint64       local_header_ofs,
-                                                       mz_uint32       ext_attributes) {
-    (void) pZip;
-    memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
-    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_MADE_BY_OFS, 0x031E);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date);
-    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32);
-    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX));
-    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX));
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size);
-    MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size);
-    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes);
-    MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_MIN(local_header_ofs, MZ_UINT32_MAX));
-    return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive* pZip,
-                                                const char*     pFilename,
-                                                mz_uint16       filename_size,
-                                                const void*     pExtra,
-                                                mz_uint16       extra_size,
-                                                const void*     pComment,
-                                                mz_uint16       comment_size,
-                                                mz_uint64       uncomp_size,
-                                                mz_uint64       comp_size,
-                                                mz_uint32       uncomp_crc32,
-                                                mz_uint16       method,
-                                                mz_uint16       bit_flags,
-                                                mz_uint16       dos_time,
-                                                mz_uint16       dos_date,
-                                                mz_uint64       local_header_ofs,
-                                                mz_uint32       ext_attributes,
-                                                const char*     user_extra_data,
-                                                mz_uint         user_extra_data_len) {
-    mz_zip_internal_state* pState                = pZip->m_pState;
-    mz_uint32              central_dir_ofs       = (mz_uint32) pState->m_central_dir.m_size;
-    size_t                 orig_central_dir_size = pState->m_central_dir.m_size;
-    mz_uint8               central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
-
-    if (!pZip->m_pState->m_zip64)
-    {
-        if (local_header_ofs > 0xFFFFFFFF)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE);
-    }
-
-    /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */
-    if (((mz_uint64) pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size
-         + extra_size + user_extra_data_len + comment_size)
-        >= MZ_UINT32_MAX)
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
-
-    if (!mz_zip_writer_create_central_dir_header(
-          pZip, central_dir_header, filename_size, (mz_uint16) (extra_size + user_extra_data_len),
-          comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date,
-          local_header_ofs, ext_attributes))
-        return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-
-    if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header,
-                                 MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
-        || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size))
-        || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size))
-        || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, user_extra_data,
-                                    user_extra_data_len))
-        || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size))
-        || (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &central_dir_ofs, 1)))
-    {
-        /* Try to resize the central directory array back into its original state.
-     */
-        mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
-        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-    }
-
-    return MZ_TRUE;
-}
-
-static mz_bool mz_zip_writer_validate_archive_name(const char* pArchive_name) {
-    /* Basic ZIP archive filename validity checks: Valid filenames cannot start
-   * with a forward slash, cannot contain a drive letter, and cannot use
-   * DOS-style backward slashes. */
-    if (*pArchive_name == '/')
-        return MZ_FALSE;
-
-    /* Making sure the name does not contain drive letters or DOS style backward
-   * slashes is the responsibility of the program using miniz*/
-
-    return MZ_TRUE;
-}
-
-static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive* pZip) {
-    mz_uint32 n;
-    if (!pZip->m_file_offset_alignment)
-        return 0;
-    n = (mz_uint32) (pZip->m_archive_size & (pZip->m_file_offset_alignment - 1));
-    return (mz_uint) ((pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1));
-}
-
-static mz_bool
-mz_zip_writer_write_zeros(mz_zip_archive* pZip, mz_uint64 cur_file_ofs, mz_uint32 n) {
-    char buf[4096];
-    memset(buf, 0, MZ_MIN(sizeof(buf), n));
-    while (n)
-    {
-        mz_uint32 s = MZ_MIN(sizeof(buf), n);
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        cur_file_ofs += s;
-        n -= s;
-    }
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive* pZip,
-                                 const char*     pArchive_name,
-                                 const void*     pBuf,
-                                 size_t          buf_size,
-                                 const void*     pComment,
-                                 mz_uint16       comment_size,
-                                 mz_uint         level_and_flags,
-                                 mz_uint64       uncomp_size,
-                                 mz_uint32       uncomp_crc32) {
-    return mz_zip_writer_add_mem_ex_v2(pZip, pArchive_name, pBuf, buf_size, pComment, comment_size,
-                                       level_and_flags, uncomp_size, uncomp_crc32, NULL, NULL, 0,
-                                       NULL, 0);
-}
-
-mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive* pZip,
-                                    const char*     pArchive_name,
-                                    const void*     pBuf,
-                                    size_t          buf_size,
-                                    const void*     pComment,
-                                    mz_uint16       comment_size,
-                                    mz_uint         level_and_flags,
-                                    mz_uint64       uncomp_size,
-                                    mz_uint32       uncomp_crc32,
-                                    MZ_TIME_T*      last_modified,
-                                    const char*     user_extra_data,
-                                    mz_uint         user_extra_data_len,
-                                    const char*     user_extra_data_central,
-                                    mz_uint         user_extra_data_central_len) {
-    mz_uint16 method = 0, dos_time = 0, dos_date = 0;
-    mz_uint   level, ext_attributes = 0, num_alignment_padding_bytes;
-    mz_uint64 local_dir_header_ofs = pZip->m_archive_size,
-              cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0;
-    size_t                 archive_name_size;
-    mz_uint8               local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
-    tdefl_compressor*      pComp = NULL;
-    mz_bool                store_data_uncompressed;
-    mz_zip_internal_state* pState;
-    mz_uint8*              pExtra_data = NULL;
-    mz_uint32              extra_size  = 0;
-    mz_uint8               extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE];
-    mz_uint16              bit_flags = 0;
-
-    if ((int) level_and_flags < 0)
-        level_and_flags = MZ_DEFAULT_LEVEL;
-
-    if (uncomp_size || (buf_size && !(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)))
-        bit_flags |= MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR;
-
-    if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME))
-        bit_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8;
-
-    level                   = level_and_flags & 0xF;
-    store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA));
-
-    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)
-        || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment))
-        || (level > MZ_UBER_COMPRESSION))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    pState = pZip->m_pState;
-
-    if (pState->m_zip64)
-    {
-        if (pZip->m_total_files == MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-    }
-    else
-    {
-        if (pZip->m_total_files == MZ_UINT16_MAX)
-        {
-            pState->m_zip64 = MZ_TRUE;
-            /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */
-        }
-        if (((mz_uint64) buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF))
-        {
-            pState->m_zip64 = MZ_TRUE;
-            /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */
-        }
-    }
-
-    if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (!mz_zip_writer_validate_archive_name(pArchive_name))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME);
-
-            #ifndef MINIZ_NO_TIME
-    if (last_modified != NULL)
-    {
-        mz_zip_time_t_to_dos_time(*last_modified, &dos_time, &dos_date);
-    }
-    else
-    {
-        MZ_TIME_T cur_time;
-        time(&cur_time);
-        mz_zip_time_t_to_dos_time(cur_time, &dos_time, &dos_date);
-    }
-            #else
-    (void) last_modified;
-            #endif /* #ifndef MINIZ_NO_TIME */
-
-    if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-    {
-        uncomp_crc32 = (mz_uint32) mz_crc32(MZ_CRC32_INIT, (const mz_uint8*) pBuf, buf_size);
-        uncomp_size  = buf_size;
-        if (uncomp_size <= 3)
-        {
-            level                   = 0;
-            store_data_uncompressed = MZ_TRUE;
-        }
-    }
-
-    archive_name_size = strlen(pArchive_name);
-    if (archive_name_size > MZ_UINT16_MAX)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME);
-
-    num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-    /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */
-    if (((mz_uint64) pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE
-         + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size)
-        >= MZ_UINT32_MAX)
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
-
-    if (!pState->m_zip64)
-    {
-        /* Bail early if the archive would obviously become too large */
-        if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE
-             + archive_name_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size
-             + user_extra_data_len + pState->m_central_dir.m_size
-             + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + user_extra_data_central_len
-             + MZ_ZIP_DATA_DESCRIPTER_SIZE32)
-            > 0xFFFFFFFF)
-        {
-            pState->m_zip64 = MZ_TRUE;
-            /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */
-        }
-    }
-
-    if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/'))
-    {
-        /* Set DOS Subdirectory attribute bit. */
-        ext_attributes |= MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG;
-
-        /* Subdirectories cannot contain data. */
-        if ((buf_size) || (uncomp_size))
-            return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-    }
-
-    /* Try to do any allocations before writing to the archive, so if an
-   * allocation fails the file remains unmodified. (A good idea if we're doing
-   * an in-place modification.) */
-    if ((!mz_zip_array_ensure_room(
-          pZip, &pState->m_central_dir,
-          MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size
-            + (pState->m_zip64 ? MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE : 0)))
-        || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1)))
-        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-    if ((!store_data_uncompressed) && (buf_size))
-    {
-        if (NULL
-            == (pComp = (tdefl_compressor*) pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                                           sizeof(tdefl_compressor))))
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-    }
-
-    if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes))
-    {
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-        return MZ_FALSE;
-    }
-
-    local_dir_header_ofs += num_alignment_padding_bytes;
-    if (pZip->m_file_offset_alignment)
-    {
-        MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0);
-    }
-    cur_archive_file_ofs += num_alignment_padding_bytes;
-
-    MZ_CLEAR_ARR(local_dir_header);
-
-    if (!store_data_uncompressed || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
-    {
-        method = MZ_DEFLATED;
-    }
-
-    if (pState->m_zip64)
-    {
-        if (uncomp_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX)
-        {
-            pExtra_data = extra_data;
-            extra_size  = mz_zip_writer_create_zip64_extra_data(
-              extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
-              (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL,
-              (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
-        }
-
-        if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header,
-                                                   (mz_uint16) archive_name_size,
-                                                   (mz_uint16) (extra_size + user_extra_data_len),
-                                                   0, 0, 0, method, bit_flags, dos_time, dos_date))
-            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header,
-                           sizeof(local_dir_header))
-            != sizeof(local_dir_header))
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        cur_archive_file_ofs += sizeof(local_dir_header);
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name,
-                           archive_name_size)
-            != archive_name_size)
-        {
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-        }
-        cur_archive_file_ofs += archive_name_size;
-
-        if (pExtra_data != NULL)
-        {
-            if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size)
-                != extra_size)
-                return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-            cur_archive_file_ofs += extra_size;
-        }
-    }
-    else
-    {
-        if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX))
-            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-        if (!mz_zip_writer_create_local_dir_header(
-              pZip, local_dir_header, (mz_uint16) archive_name_size,
-              (mz_uint16) user_extra_data_len, 0, 0, 0, method, bit_flags, dos_time, dos_date))
-            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header,
-                           sizeof(local_dir_header))
-            != sizeof(local_dir_header))
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        cur_archive_file_ofs += sizeof(local_dir_header);
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name,
-                           archive_name_size)
-            != archive_name_size)
-        {
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-        }
-        cur_archive_file_ofs += archive_name_size;
-    }
-
-    if (user_extra_data_len > 0)
-    {
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data,
-                           user_extra_data_len)
-            != user_extra_data_len)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        cur_archive_file_ofs += user_extra_data_len;
-    }
-
-    if (store_data_uncompressed)
-    {
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size)
-        {
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-        }
-
-        cur_archive_file_ofs += buf_size;
-        comp_size = buf_size;
-    }
-    else if (buf_size)
-    {
-        mz_zip_writer_add_state state;
-
-        state.m_pZip                 = pZip;
-        state.m_cur_archive_file_ofs = cur_archive_file_ofs;
-        state.m_comp_size            = 0;
-
-        if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state,
-                        tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY))
-             != TDEFL_STATUS_OKAY)
-            || (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE))
-        {
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-            return mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED);
-        }
-
-        comp_size            = state.m_comp_size;
-        cur_archive_file_ofs = state.m_cur_archive_file_ofs;
-    }
-
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-    pComp = NULL;
-
-    if (uncomp_size)
-    {
-        mz_uint8  local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64];
-        mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32;
-
-        MZ_ASSERT(bit_flags & MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR);
-
-        MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID);
-        MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32);
-        if (pExtra_data == NULL)
-        {
-            if (comp_size > MZ_UINT32_MAX)
-                return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-
-            MZ_WRITE_LE32(local_dir_footer + 8, comp_size);
-            MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size);
-        }
-        else
-        {
-            MZ_WRITE_LE64(local_dir_footer + 8, comp_size);
-            MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size);
-            local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64;
-        }
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer,
-                           local_dir_footer_size)
-            != local_dir_footer_size)
-            return MZ_FALSE;
-
-        cur_archive_file_ofs += local_dir_footer_size;
-    }
-
-    if (pExtra_data != NULL)
-    {
-        extra_size = mz_zip_writer_create_zip64_extra_data(
-          extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
-          (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL,
-          (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
-    }
-
-    if (!mz_zip_writer_add_to_central_dir(
-          pZip, pArchive_name, (mz_uint16) archive_name_size, pExtra_data, (mz_uint16) extra_size,
-          pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time,
-          dos_date, local_dir_header_ofs, ext_attributes, user_extra_data_central,
-          user_extra_data_central_len))
-        return MZ_FALSE;
-
-    pZip->m_total_files++;
-    pZip->m_archive_size = cur_archive_file_ofs;
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive*   pZip,
-                                            const char*       pArchive_name,
-                                            mz_file_read_func read_callback,
-                                            void*             callback_opaque,
-                                            mz_uint64         max_size,
-                                            const MZ_TIME_T*  pFile_time,
-                                            const void*       pComment,
-                                            mz_uint16         comment_size,
-                                            mz_uint           level_and_flags,
-                                            mz_uint32         ext_attributes,
-                                            const char*       user_extra_data,
-                                            mz_uint           user_extra_data_len,
-                                            const char*       user_extra_data_central,
-                                            mz_uint           user_extra_data_central_len) {
-    mz_uint16 gen_flags;
-    mz_uint   uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes;
-    mz_uint16 method = 0, dos_time = 0, dos_date = 0;
-    mz_uint64 local_dir_header_ofs, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0,
-                                    comp_size = 0;
-    size_t                 archive_name_size;
-    mz_uint8               local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
-    mz_uint8*              pExtra_data = NULL;
-    mz_uint32              extra_size  = 0;
-    mz_uint8               extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE];
-    mz_zip_internal_state* pState;
-    mz_uint64              file_ofs = 0, cur_archive_header_file_ofs;
-
-    if ((int) level_and_flags < 0)
-        level_and_flags = MZ_DEFAULT_LEVEL;
-    level = level_and_flags & 0xF;
-
-    gen_flags =
-      (level_and_flags & MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE) ? 0 : MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR;
-
-    if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME))
-        gen_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8;
-
-    /* Sanity checks */
-    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)
-        || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    pState = pZip->m_pState;
-
-    if ((!pState->m_zip64) && (max_size > MZ_UINT32_MAX))
-    {
-        /* Source file is too large for non-zip64 */
-        /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */
-        pState->m_zip64 = MZ_TRUE;
-    }
-
-    /* We could support this, but why? */
-    if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (!mz_zip_writer_validate_archive_name(pArchive_name))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME);
-
-    if (pState->m_zip64)
-    {
-        if (pZip->m_total_files == MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-    }
-    else
-    {
-        if (pZip->m_total_files == MZ_UINT16_MAX)
-        {
-            pState->m_zip64 = MZ_TRUE;
-            /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */
-        }
-    }
-
-    archive_name_size = strlen(pArchive_name);
-    if (archive_name_size > MZ_UINT16_MAX)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME);
-
-    num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-    /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */
-    if (((mz_uint64) pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE
-         + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size)
-        >= MZ_UINT32_MAX)
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
-
-    if (!pState->m_zip64)
-    {
-        /* Bail early if the archive would obviously become too large */
-        if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE
-             + archive_name_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size
-             + user_extra_data_len + pState->m_central_dir.m_size
-             + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 1024 + MZ_ZIP_DATA_DESCRIPTER_SIZE32
-             + user_extra_data_central_len)
-            > 0xFFFFFFFF)
-        {
-            pState->m_zip64 = MZ_TRUE;
-            /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */
-        }
-    }
-
-            #ifndef MINIZ_NO_TIME
-    if (pFile_time)
-    {
-        mz_zip_time_t_to_dos_time(*pFile_time, &dos_time, &dos_date);
-    }
-            #else
-    (void) pFile_time;
-            #endif
-
-    if (max_size <= 3)
-        level = 0;
-
-    if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes))
-    {
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-    }
-
-    cur_archive_file_ofs += num_alignment_padding_bytes;
-    local_dir_header_ofs = cur_archive_file_ofs;
-
-    if (pZip->m_file_offset_alignment)
-    {
-        MZ_ASSERT((cur_archive_file_ofs & (pZip->m_file_offset_alignment - 1)) == 0);
-    }
-
-    if (max_size && level)
-    {
-        method = MZ_DEFLATED;
-    }
-
-    MZ_CLEAR_ARR(local_dir_header);
-    if (pState->m_zip64)
-    {
-        if (max_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX)
-        {
-            pExtra_data = extra_data;
-            if (level_and_flags & MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE)
-                extra_size = mz_zip_writer_create_zip64_extra_data(
-                  extra_data, (max_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
-                  (max_size >= MZ_UINT32_MAX) ? &comp_size : NULL,
-                  (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
-            else
-                extra_size = mz_zip_writer_create_zip64_extra_data(
-                  extra_data, NULL, NULL,
-                  (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
-        }
-
-        if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header,
-                                                   (mz_uint16) archive_name_size,
-                                                   (mz_uint16) (extra_size + user_extra_data_len),
-                                                   0, 0, 0, method, gen_flags, dos_time, dos_date))
-            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header,
-                           sizeof(local_dir_header))
-            != sizeof(local_dir_header))
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        cur_archive_file_ofs += sizeof(local_dir_header);
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name,
-                           archive_name_size)
-            != archive_name_size)
-        {
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-        }
-
-        cur_archive_file_ofs += archive_name_size;
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size)
-            != extra_size)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        cur_archive_file_ofs += extra_size;
-    }
-    else
-    {
-        if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX))
-            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-        if (!mz_zip_writer_create_local_dir_header(
-              pZip, local_dir_header, (mz_uint16) archive_name_size,
-              (mz_uint16) user_extra_data_len, 0, 0, 0, method, gen_flags, dos_time, dos_date))
-            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header,
-                           sizeof(local_dir_header))
-            != sizeof(local_dir_header))
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        cur_archive_file_ofs += sizeof(local_dir_header);
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name,
-                           archive_name_size)
-            != archive_name_size)
-        {
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-        }
-
-        cur_archive_file_ofs += archive_name_size;
-    }
-
-    if (user_extra_data_len > 0)
-    {
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data,
-                           user_extra_data_len)
-            != user_extra_data_len)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        cur_archive_file_ofs += user_extra_data_len;
-    }
-
-    if (max_size)
-    {
-        void* pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE);
-        if (!pRead_buf)
-        {
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-
-        if (!level)
-        {
-            while (1)
-            {
-                size_t n =
-                  read_callback(callback_opaque, file_ofs, pRead_buf, MZ_ZIP_MAX_IO_BUF_SIZE);
-                if (n == 0)
-                    break;
-
-                if ((n > MZ_ZIP_MAX_IO_BUF_SIZE) || (file_ofs + n > max_size))
-                {
-                    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-                    return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-                }
-                if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n)
-                {
-                    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-                    return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-                }
-                file_ofs += n;
-                uncomp_crc32 = (mz_uint32) mz_crc32(uncomp_crc32, (const mz_uint8*) pRead_buf, n);
-                cur_archive_file_ofs += n;
-            }
-            uncomp_size = file_ofs;
-            comp_size   = uncomp_size;
-        }
-        else
-        {
-            mz_bool                 result = MZ_FALSE;
-            mz_zip_writer_add_state state;
-            tdefl_compressor* pComp = (tdefl_compressor*) pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                                                         sizeof(tdefl_compressor));
-            if (!pComp)
-            {
-                pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-                return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-            }
-
-            state.m_pZip                 = pZip;
-            state.m_cur_archive_file_ofs = cur_archive_file_ofs;
-            state.m_comp_size            = 0;
-
-            if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state,
-                           tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY))
-                != TDEFL_STATUS_OKAY)
-            {
-                pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-                pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-                return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-            }
-
-            for (;;)
-            {
-                tdefl_status status;
-                tdefl_flush  flush = TDEFL_NO_FLUSH;
-
-                size_t n =
-                  read_callback(callback_opaque, file_ofs, pRead_buf, MZ_ZIP_MAX_IO_BUF_SIZE);
-                if ((n > MZ_ZIP_MAX_IO_BUF_SIZE) || (file_ofs + n > max_size))
-                {
-                    mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-                    break;
-                }
-
-                file_ofs += n;
-                uncomp_crc32 = (mz_uint32) mz_crc32(uncomp_crc32, (const mz_uint8*) pRead_buf, n);
-
-                if (pZip->m_pNeeds_keepalive != NULL
-                    && pZip->m_pNeeds_keepalive(pZip->m_pIO_opaque))
-                    flush = TDEFL_FULL_FLUSH;
-
-                if (n == 0)
-                    flush = TDEFL_FINISH;
-
-                status = tdefl_compress_buffer(pComp, pRead_buf, n, flush);
-                if (status == TDEFL_STATUS_DONE)
-                {
-                    result = MZ_TRUE;
-                    break;
-                }
-                else if (status != TDEFL_STATUS_OKAY)
-                {
-                    mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED);
-                    break;
-                }
-            }
-
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
-
-            if (!result)
-            {
-                pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-                return MZ_FALSE;
-            }
-
-            uncomp_size          = file_ofs;
-            comp_size            = state.m_comp_size;
-            cur_archive_file_ofs = state.m_cur_archive_file_ofs;
-        }
-
-        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
-    }
-
-    if (!(level_and_flags & MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE))
-    {
-        mz_uint8  local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64];
-        mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32;
-
-        MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID);
-        MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32);
-        if (pExtra_data == NULL)
-        {
-            if (comp_size > MZ_UINT32_MAX)
-                return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-
-            MZ_WRITE_LE32(local_dir_footer + 8, comp_size);
-            MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size);
-        }
-        else
-        {
-            MZ_WRITE_LE64(local_dir_footer + 8, comp_size);
-            MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size);
-            local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64;
-        }
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer,
-                           local_dir_footer_size)
-            != local_dir_footer_size)
-            return MZ_FALSE;
-
-        cur_archive_file_ofs += local_dir_footer_size;
-    }
-
-    if (level_and_flags & MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE)
-    {
-        if (pExtra_data != NULL)
-        {
-            extra_size = mz_zip_writer_create_zip64_extra_data(
-              extra_data, (max_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
-              (max_size >= MZ_UINT32_MAX) ? &comp_size : NULL,
-              (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
-        }
-
-        if (!mz_zip_writer_create_local_dir_header(
-              pZip, local_dir_header, (mz_uint16) archive_name_size,
-              (mz_uint16) (extra_size + user_extra_data_len),
-              (max_size >= MZ_UINT32_MAX) ? MZ_UINT32_MAX : uncomp_size,
-              (max_size >= MZ_UINT32_MAX) ? MZ_UINT32_MAX : comp_size, uncomp_crc32, method,
-              gen_flags, dos_time, dos_date))
-            return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR);
-
-        cur_archive_header_file_ofs = local_dir_header_ofs;
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_header_file_ofs, local_dir_header,
-                           sizeof(local_dir_header))
-            != sizeof(local_dir_header))
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        if (pExtra_data != NULL)
-        {
-            cur_archive_header_file_ofs += sizeof(local_dir_header);
-
-            if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_header_file_ofs, pArchive_name,
-                               archive_name_size)
-                != archive_name_size)
-            {
-                return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-            }
-
-            cur_archive_header_file_ofs += archive_name_size;
-
-            if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_header_file_ofs, extra_data,
-                               extra_size)
-                != extra_size)
-                return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-            cur_archive_header_file_ofs += extra_size;
-        }
-    }
-
-    if (pExtra_data != NULL)
-    {
-        extra_size = mz_zip_writer_create_zip64_extra_data(
-          extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL,
-          (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL,
-          (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
-    }
-
-    if (!mz_zip_writer_add_to_central_dir(
-          pZip, pArchive_name, (mz_uint16) archive_name_size, pExtra_data, (mz_uint16) extra_size,
-          pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, gen_flags, dos_time,
-          dos_date, local_dir_header_ofs, ext_attributes, user_extra_data_central,
-          user_extra_data_central_len))
-        return MZ_FALSE;
-
-    pZip->m_total_files++;
-    pZip->m_archive_size = cur_archive_file_ofs;
-
-    return MZ_TRUE;
-}
-
-            #ifndef MINIZ_NO_STDIO
-
-static size_t mz_file_read_func_stdio(void* pOpaque, mz_uint64 file_ofs, void* pBuf, size_t n) {
-    MZ_FILE* pSrc_file = (MZ_FILE*) pOpaque;
-    mz_int64 cur_ofs   = MZ_FTELL64(pSrc_file);
-
-    if (((mz_int64) file_ofs < 0)
-        || (((cur_ofs != (mz_int64) file_ofs))
-            && (MZ_FSEEK64(pSrc_file, (mz_int64) file_ofs, SEEK_SET))))
-        return 0;
-
-    return MZ_FREAD(pBuf, 1, n, pSrc_file);
-}
-
-mz_bool mz_zip_writer_add_cfile(mz_zip_archive*  pZip,
-                                const char*      pArchive_name,
-                                MZ_FILE*         pSrc_file,
-                                mz_uint64        max_size,
-                                const MZ_TIME_T* pFile_time,
-                                const void*      pComment,
-                                mz_uint16        comment_size,
-                                mz_uint          level_and_flags,
-                                mz_uint32        ext_attributes,
-                                const char*      user_extra_data,
-                                mz_uint          user_extra_data_len,
-                                const char*      user_extra_data_central,
-                                mz_uint          user_extra_data_central_len) {
-    return mz_zip_writer_add_read_buf_callback(
-      pZip, pArchive_name, mz_file_read_func_stdio, pSrc_file, max_size, pFile_time, pComment,
-      comment_size, level_and_flags, ext_attributes, user_extra_data, user_extra_data_len,
-      user_extra_data_central, user_extra_data_central_len);
-}
-
-mz_bool mz_zip_writer_add_file(mz_zip_archive* pZip,
-                               const char*     pArchive_name,
-                               const char*     pSrc_filename,
-                               const void*     pComment,
-                               mz_uint16       comment_size,
-                               mz_uint         level_and_flags,
-                               mz_uint32       ext_attributes) {
-    MZ_FILE*   pSrc_file   = NULL;
-    mz_uint64  uncomp_size = 0;
-    MZ_TIME_T  file_modified_time;
-    MZ_TIME_T* pFile_time = NULL;
-    mz_bool    status;
-
-    memset(&file_modified_time, 0, sizeof(file_modified_time));
-
-                #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO)
-    pFile_time = &file_modified_time;
-    if (!mz_zip_get_file_modified_time(pSrc_filename, &file_modified_time))
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_STAT_FAILED);
-                #endif
-
-    pSrc_file = MZ_FOPEN(pSrc_filename, "rb");
-    if (!pSrc_file)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
-
-    MZ_FSEEK64(pSrc_file, 0, SEEK_END);
-    uncomp_size = MZ_FTELL64(pSrc_file);
-    MZ_FSEEK64(pSrc_file, 0, SEEK_SET);
-
-    status =
-      mz_zip_writer_add_cfile(pZip, pArchive_name, pSrc_file, uncomp_size, pFile_time, pComment,
-                              comment_size, level_and_flags, ext_attributes, NULL, 0, NULL, 0);
-
-    MZ_FCLOSE(pSrc_file);
-
-    return status;
-}
-            #endif /* #ifndef MINIZ_NO_STDIO */
-
-static mz_bool mz_zip_writer_update_zip64_extension_block(mz_zip_array*   pNew_ext,
-                                                          mz_zip_archive* pZip,
-                                                          const mz_uint8* pExt,
-                                                          mz_uint32       ext_len,
-                                                          mz_uint64*      pComp_size,
-                                                          mz_uint64*      pUncomp_size,
-                                                          mz_uint64*      pLocal_header_ofs,
-                                                          mz_uint32*      pDisk_start) {
-    /* + 64 should be enough for any new zip64 data */
-    if (!mz_zip_array_reserve(pZip, pNew_ext, ext_len + 64, MZ_FALSE))
-        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-    mz_zip_array_resize(pZip, pNew_ext, 0, MZ_FALSE);
-
-    if ((pUncomp_size) || (pComp_size) || (pLocal_header_ofs) || (pDisk_start))
-    {
-        mz_uint8  new_ext_block[64];
-        mz_uint8* pDst = new_ext_block;
-        mz_write_le16(pDst, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID);
-        mz_write_le16(pDst + sizeof(mz_uint16), 0);
-        pDst += sizeof(mz_uint16) * 2;
-
-        if (pUncomp_size)
-        {
-            mz_write_le64(pDst, *pUncomp_size);
-            pDst += sizeof(mz_uint64);
-        }
-
-        if (pComp_size)
-        {
-            mz_write_le64(pDst, *pComp_size);
-            pDst += sizeof(mz_uint64);
-        }
-
-        if (pLocal_header_ofs)
-        {
-            mz_write_le64(pDst, *pLocal_header_ofs);
-            pDst += sizeof(mz_uint64);
-        }
-
-        if (pDisk_start)
-        {
-            mz_write_le32(pDst, *pDisk_start);
-            pDst += sizeof(mz_uint32);
-        }
-
-        mz_write_le16(new_ext_block + sizeof(mz_uint16),
-                      (mz_uint16) ((pDst - new_ext_block) - sizeof(mz_uint16) * 2));
-
-        if (!mz_zip_array_push_back(pZip, pNew_ext, new_ext_block, pDst - new_ext_block))
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-    }
-
-    if ((pExt) && (ext_len))
-    {
-        mz_uint32       extra_size_remaining = ext_len;
-        const mz_uint8* pExtra_data          = pExt;
-
-        do
-        {
-            mz_uint32 field_id, field_data_size, field_total_size;
-
-            if (extra_size_remaining < (sizeof(mz_uint16) * 2))
-                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-            field_id         = MZ_READ_LE16(pExtra_data);
-            field_data_size  = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
-            field_total_size = field_data_size + sizeof(mz_uint16) * 2;
-
-            if (field_total_size > extra_size_remaining)
-                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-            if (field_id != MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
-            {
-                if (!mz_zip_array_push_back(pZip, pNew_ext, pExtra_data, field_total_size))
-                    return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-            }
-
-            pExtra_data += field_total_size;
-            extra_size_remaining -= field_total_size;
-        } while (extra_size_remaining);
-    }
-
-    return MZ_TRUE;
-}
-
-/* TODO: This func is now pretty freakin complex due to zip64, split it up? */
-mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive* pZip,
-                                          mz_zip_archive* pSource_zip,
-                                          mz_uint         src_file_index) {
-    mz_uint   n, bit_flags, num_alignment_padding_bytes, src_central_dir_following_data_size;
-    mz_uint64 src_archive_bytes_remaining, local_dir_header_ofs;
-    mz_uint64 cur_src_file_ofs, cur_dst_file_ofs;
-    mz_uint32
-      local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
-    mz_uint8*                pLocal_header = (mz_uint8*) local_header_u32;
-    mz_uint8                 new_central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
-    size_t                   orig_central_dir_size;
-    mz_zip_internal_state*   pState;
-    void*                    pBuf;
-    const mz_uint8*          pSrc_central_header;
-    mz_zip_archive_file_stat src_file_stat;
-    mz_uint32                src_filename_len, src_comment_len, src_ext_len;
-    mz_uint32                local_header_filename_size, local_header_extra_len;
-    mz_uint64                local_header_comp_size, local_header_uncomp_size;
-    mz_bool                  found_zip64_ext_data_in_ldir = MZ_FALSE;
-
-    /* Sanity checks */
-    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)
-        || (!pSource_zip->m_pRead))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    pState = pZip->m_pState;
-
-    /* Don't support copying files from zip64 archives to non-zip64, even though
-   * in some cases this is possible */
-    if ((pSource_zip->m_pState->m_zip64) && (!pZip->m_pState->m_zip64))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    /* Get pointer to the source central dir header and crack it */
-    if (NULL == (pSrc_central_header = mz_zip_get_cdh(pSource_zip, src_file_index)))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_SIG_OFS) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    src_filename_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    src_comment_len  = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS);
-    src_ext_len      = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS);
-    src_central_dir_following_data_size = src_filename_len + src_ext_len + src_comment_len;
-
-    /* TODO: We don't support central dir's >= MZ_UINT32_MAX bytes right now (+32
-   * fudge factor in case we need to add more extra data) */
-    if ((pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE
-         + src_central_dir_following_data_size + 32)
-        >= MZ_UINT32_MAX)
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
-
-    num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
-
-    if (!pState->m_zip64)
-    {
-        if (pZip->m_total_files == MZ_UINT16_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-    }
-    else
-    {
-        /* TODO: Our zip64 support still has some 32-bit limits that may not be
-     * worth fixing. */
-        if (pZip->m_total_files == MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-    }
-
-    if (!mz_zip_file_stat_internal(pSource_zip, src_file_index, pSrc_central_header, &src_file_stat,
-                                   NULL))
-        return MZ_FALSE;
-
-    cur_src_file_ofs = src_file_stat.m_local_header_ofs;
-    cur_dst_file_ofs = pZip->m_archive_size;
-
-    /* Read the source archive's local dir header */
-    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header,
-                             MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-
-    if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-
-    cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
-
-    /* Compute the total size we need to copy (filename+extra data+compressed
-   * data) */
-    local_header_filename_size = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS);
-    local_header_extra_len     = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
-    local_header_comp_size     = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS);
-    local_header_uncomp_size   = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS);
-    src_archive_bytes_remaining =
-      src_file_stat.m_comp_size + local_header_filename_size + local_header_extra_len;
-
-    /* Try to find a zip64 extended information field */
-    if ((local_header_extra_len)
-        && ((local_header_comp_size == MZ_UINT32_MAX)
-            || (local_header_uncomp_size == MZ_UINT32_MAX)))
-    {
-        mz_zip_array    file_data_array;
-        const mz_uint8* pExtra_data;
-        mz_uint32       extra_size_remaining = local_header_extra_len;
-
-        mz_zip_array_init(&file_data_array, 1);
-        if (!mz_zip_array_resize(pZip, &file_data_array, local_header_extra_len, MZ_FALSE))
-        {
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-
-        if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque,
-                                 src_file_stat.m_local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE
-                                   + local_header_filename_size,
-                                 file_data_array.m_p, local_header_extra_len)
-            != local_header_extra_len)
-        {
-            mz_zip_array_clear(pZip, &file_data_array);
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-        }
-
-        pExtra_data = (const mz_uint8*) file_data_array.m_p;
-
-        do
-        {
-            mz_uint32 field_id, field_data_size, field_total_size;
-
-            if (extra_size_remaining < (sizeof(mz_uint16) * 2))
-            {
-                mz_zip_array_clear(pZip, &file_data_array);
-                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-            }
-
-            field_id         = MZ_READ_LE16(pExtra_data);
-            field_data_size  = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16));
-            field_total_size = field_data_size + sizeof(mz_uint16) * 2;
-
-            if (field_total_size > extra_size_remaining)
-            {
-                mz_zip_array_clear(pZip, &file_data_array);
-                return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-            }
-
-            if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID)
-            {
-                const mz_uint8* pSrc_field_data = pExtra_data + sizeof(mz_uint32);
-
-                if (field_data_size < sizeof(mz_uint64) * 2)
-                {
-                    mz_zip_array_clear(pZip, &file_data_array);
-                    return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED);
-                }
-
-                local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data);
-                local_header_comp_size   = MZ_READ_LE64(
-                  pSrc_field_data + sizeof(mz_uint64)); /* may be 0 if there's a descriptor */
-
-                found_zip64_ext_data_in_ldir = MZ_TRUE;
-                break;
-            }
-
-            pExtra_data += field_total_size;
-            extra_size_remaining -= field_total_size;
-        } while (extra_size_remaining);
-
-        mz_zip_array_clear(pZip, &file_data_array);
-    }
-
-    if (!pState->m_zip64)
-    {
-        /* Try to detect if the new archive will most likely wind up too big and
-     * bail early (+(sizeof(mz_uint32) * 4) is for the optional descriptor which
-     * could be present, +64 is a fudge factor). */
-        /* We also check when the archive is finalized so this doesn't need to be
-     * perfect. */
-        mz_uint64 approx_new_archive_size =
-          cur_dst_file_ofs + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE
-          + src_archive_bytes_remaining + (sizeof(mz_uint32) * 4) + pState->m_central_dir.m_size
-          + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size
-          + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 64;
-
-        if (approx_new_archive_size >= MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-    }
-
-    /* Write dest archive padding */
-    if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes))
-        return MZ_FALSE;
-
-    cur_dst_file_ofs += num_alignment_padding_bytes;
-
-    local_dir_header_ofs = cur_dst_file_ofs;
-    if (pZip->m_file_offset_alignment)
-    {
-        MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0);
-    }
-
-    /* The original zip's local header+ext block doesn't change, even with zip64,
-   * so we can just copy it over to the dest zip */
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header,
-                       MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-    cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
-
-    /* Copy over the source archive bytes to the dest archive, also ensure we have
-   * enough buf space to handle optional data descriptor */
-    if (NULL
-        == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1,
-                                  (size_t) MZ_MAX(32U, MZ_MIN((mz_uint64) MZ_ZIP_MAX_IO_BUF_SIZE,
-                                                              src_archive_bytes_remaining)))))
-        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-    while (src_archive_bytes_remaining)
-    {
-        n = (mz_uint) MZ_MIN((mz_uint64) MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining);
-        if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n)
-        {
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-        }
-        cur_src_file_ofs += n;
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n)
-        {
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-        }
-        cur_dst_file_ofs += n;
-
-        src_archive_bytes_remaining -= n;
-    }
-
-    /* Now deal with the optional data descriptor */
-    bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS);
-    if (bit_flags & 8)
-    {
-        /* Copy data descriptor */
-        if ((pSource_zip->m_pState->m_zip64) || (found_zip64_ext_data_in_ldir))
-        {
-            /* src is zip64, dest must be zip64 */
-
-            /* name			uint32_t's */
-            /* id				1 (optional in zip64?) */
-            /* crc			1 */
-            /* comp_size	2 */
-            /* uncomp_size 2 */
-            if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf,
-                                     (sizeof(mz_uint32) * 6))
-                != (sizeof(mz_uint32) * 6))
-            {
-                pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-                return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-            }
-
-            n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID) ? 6 : 5);
-        }
-        else
-        {
-            /* src is NOT zip64 */
-            mz_bool has_id;
-
-            if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf,
-                                     sizeof(mz_uint32) * 4)
-                != sizeof(mz_uint32) * 4)
-            {
-                pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-                return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);
-            }
-
-            has_id = (MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID);
-
-            if (pZip->m_pState->m_zip64)
-            {
-                /* dest is zip64, so upgrade the data descriptor */
-                const mz_uint8* pSrc_descriptor =
-                  (const mz_uint8*) pBuf + (has_id ? sizeof(mz_uint32) : 0);
-                const mz_uint32 src_crc32     = MZ_READ_LE32(pSrc_descriptor);
-                const mz_uint64 src_comp_size = MZ_READ_LE32(pSrc_descriptor + sizeof(mz_uint32));
-                const mz_uint64 src_uncomp_size =
-                  MZ_READ_LE32(pSrc_descriptor + 2 * sizeof(mz_uint32));
-
-                mz_write_le32((mz_uint8*) pBuf, MZ_ZIP_DATA_DESCRIPTOR_ID);
-                mz_write_le32((mz_uint8*) pBuf + sizeof(mz_uint32) * 1, src_crc32);
-                mz_write_le64((mz_uint8*) pBuf + sizeof(mz_uint32) * 2, src_comp_size);
-                mz_write_le64((mz_uint8*) pBuf + sizeof(mz_uint32) * 4, src_uncomp_size);
-
-                n = sizeof(mz_uint32) * 6;
-            }
-            else
-            {
-                /* dest is NOT zip64, just copy it as-is */
-                n = sizeof(mz_uint32) * (has_id ? 4 : 3);
-            }
-        }
-
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n)
-        {
-            pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-        }
-
-        cur_src_file_ofs += n;
-        cur_dst_file_ofs += n;
-    }
-    pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
-
-    /* Finally, add the new central dir header */
-    orig_central_dir_size = pState->m_central_dir.m_size;
-
-    memcpy(new_central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
-
-    if (pState->m_zip64)
-    {
-        /* This is the painful part: We need to write a new central dir header + ext
-     * block with updated zip64 fields, and ensure the old fields (if any) are
-     * not included. */
-        const mz_uint8* pSrc_ext =
-          pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len;
-        mz_zip_array new_ext_block;
-
-        mz_zip_array_init(&new_ext_block, sizeof(mz_uint8));
-
-        MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_UINT32_MAX);
-        MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_UINT32_MAX);
-        MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_UINT32_MAX);
-
-        if (!mz_zip_writer_update_zip64_extension_block(
-              &new_ext_block, pZip, pSrc_ext, src_ext_len, &src_file_stat.m_comp_size,
-              &src_file_stat.m_uncomp_size, &local_dir_header_ofs, NULL))
-        {
-            mz_zip_array_clear(pZip, &new_ext_block);
-            return MZ_FALSE;
-        }
-
-        MZ_WRITE_LE16(new_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS, new_ext_block.m_size);
-
-        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header,
-                                    MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
-        {
-            mz_zip_array_clear(pZip, &new_ext_block);
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-
-        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir,
-                                    pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE,
-                                    src_filename_len))
-        {
-            mz_zip_array_clear(pZip, &new_ext_block);
-            mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-
-        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_ext_block.m_p,
-                                    new_ext_block.m_size))
-        {
-            mz_zip_array_clear(pZip, &new_ext_block);
-            mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-
-        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir,
-                                    pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE
-                                      + src_filename_len + src_ext_len,
-                                    src_comment_len))
-        {
-            mz_zip_array_clear(pZip, &new_ext_block);
-            mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-
-        mz_zip_array_clear(pZip, &new_ext_block);
-    }
-    else
-    {
-        /* sanity checks */
-        if (cur_dst_file_ofs > MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-
-        if (local_dir_header_ofs >= MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE);
-
-        MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs);
-
-        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header,
-                                    MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-
-        if (!mz_zip_array_push_back(pZip, &pState->m_central_dir,
-                                    pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE,
-                                    src_central_dir_following_data_size))
-        {
-            mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
-            return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-        }
-    }
-
-    /* This shouldn't trigger unless we screwed up during the initial sanity
-   * checks */
-    if (pState->m_central_dir.m_size >= MZ_UINT32_MAX)
-    {
-        /* TODO: Support central dirs >= 32-bits in size */
-        mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
-        return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE);
-    }
-
-    n = (mz_uint32) orig_central_dir_size;
-    if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1))
-    {
-        mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
-        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);
-    }
-
-    pZip->m_total_files++;
-    pZip->m_archive_size = cur_dst_file_ofs;
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_finalize_archive(mz_zip_archive* pZip) {
-    mz_zip_internal_state* pState;
-    mz_uint64              central_dir_ofs, central_dir_size;
-    mz_uint8               hdr[256];
-
-    if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    pState = pZip->m_pState;
-
-    if (pState->m_zip64)
-    {
-        if ((mz_uint64) pState->m_central_dir.m_size >= MZ_UINT32_MAX)
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-    }
-    else
-    {
-        if ((pZip->m_total_files > MZ_UINT16_MAX)
-            || ((pZip->m_archive_size + pState->m_central_dir.m_size
-                 + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-                > MZ_UINT32_MAX))
-            return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES);
-    }
-
-    central_dir_ofs  = 0;
-    central_dir_size = 0;
-    if (pZip->m_total_files)
-    {
-        /* Write central directory */
-        central_dir_ofs                    = pZip->m_archive_size;
-        central_dir_size                   = pState->m_central_dir.m_size;
-        pZip->m_central_directory_file_ofs = central_dir_ofs;
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p,
-                           (size_t) central_dir_size)
-            != central_dir_size)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        pZip->m_archive_size += central_dir_size;
-    }
-
-    if (pState->m_zip64)
-    {
-        /* Write zip64 end of central directory header */
-        mz_uint64 rel_ofs_to_zip64_ecdr = pZip->m_archive_size;
-
-        MZ_CLEAR_ARR(hdr);
-        MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDH_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG);
-        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS,
-                      MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - sizeof(mz_uint32)
-                        - sizeof(mz_uint64));
-        MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS, 0x031E); /* TODO: always Unix */
-        MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_NEEDED_OFS, 0x002D);
-        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files);
-        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files);
-        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_SIZE_OFS, central_dir_size);
-        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_OFS_OFS, central_dir_ofs);
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr,
-                           MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)
-            != MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE;
-
-        /* Write zip64 end of central directory locator */
-        MZ_CLEAR_ARR(hdr);
-        MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG);
-        MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS, rel_ofs_to_zip64_ecdr);
-        MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS, 1);
-        if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr,
-                           MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE)
-            != MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE)
-            return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-        pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE;
-    }
-
-    /* Write end of central directory record */
-    MZ_CLEAR_ARR(hdr);
-    MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG);
-    MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS,
-                  MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files));
-    MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS,
-                  MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files));
-    MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_size));
-    MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_ofs));
-
-    if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr,
-                       MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-        != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED);
-
-            #ifndef MINIZ_NO_STDIO
-    if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF))
-        return mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED);
-            #endif /* #ifndef MINIZ_NO_STDIO */
-
-    pZip->m_archive_size += MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE;
-
-    pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED;
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive* pZip, void** ppBuf, size_t* pSize) {
-    if ((!ppBuf) || (!pSize))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    *ppBuf = NULL;
-    *pSize = 0;
-
-    if ((!pZip) || (!pZip->m_pState))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (pZip->m_pWrite != mz_zip_heap_write_func)
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    if (!mz_zip_writer_finalize_archive(pZip))
-        return MZ_FALSE;
-
-    *ppBuf                     = pZip->m_pState->m_pMem;
-    *pSize                     = pZip->m_pState->m_mem_size;
-    pZip->m_pState->m_pMem     = NULL;
-    pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0;
-
-    return MZ_TRUE;
-}
-
-mz_bool mz_zip_writer_end(mz_zip_archive* pZip) {
-    return mz_zip_writer_end_internal(pZip, MZ_TRUE);
-}
-
-            #ifndef MINIZ_NO_STDIO
-mz_bool mz_zip_add_mem_to_archive_file_in_place(const char* pZip_filename,
-                                                const char* pArchive_name,
-                                                const void* pBuf,
-                                                size_t      buf_size,
-                                                const void* pComment,
-                                                mz_uint16   comment_size,
-                                                mz_uint     level_and_flags) {
-    return mz_zip_add_mem_to_archive_file_in_place_v2(
-      pZip_filename, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, NULL);
-}
-
-mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char*   pZip_filename,
-                                                   const char*   pArchive_name,
-                                                   const void*   pBuf,
-                                                   size_t        buf_size,
-                                                   const void*   pComment,
-                                                   mz_uint16     comment_size,
-                                                   mz_uint       level_and_flags,
-                                                   mz_zip_error* pErr) {
-    mz_bool                    status, created_new_archive = MZ_FALSE;
-    mz_zip_archive             zip_archive;
-    struct MZ_FILE_STAT_STRUCT file_stat;
-    mz_zip_error               actual_err = MZ_ZIP_NO_ERROR;
-
-    mz_zip_zero_struct(&zip_archive);
-    if ((int) level_and_flags < 0)
-        level_and_flags = MZ_DEFAULT_LEVEL;
-
-    if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf))
-        || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION))
-    {
-        if (pErr)
-            *pErr = MZ_ZIP_INVALID_PARAMETER;
-        return MZ_FALSE;
-    }
-
-    if (!mz_zip_writer_validate_archive_name(pArchive_name))
-    {
-        if (pErr)
-            *pErr = MZ_ZIP_INVALID_FILENAME;
-        return MZ_FALSE;
-    }
-
-    /* Important: The regular non-64 bit version of stat() can fail here if the
-   * file is very large, which could cause the archive to be overwritten. */
-    /* So be sure to compile with _LARGEFILE64_SOURCE 1 */
-    if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0)
-    {
-        /* Create a new archive. */
-        if (!mz_zip_writer_init_file_v2(&zip_archive, pZip_filename, 0, level_and_flags))
-        {
-            if (pErr)
-                *pErr = zip_archive.m_last_error;
-            return MZ_FALSE;
-        }
-
-        created_new_archive = MZ_TRUE;
-    }
-    else
-    {
-        /* Append to an existing archive. */
-        if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename,
-                                        level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY,
-                                        0, 0))
-        {
-            if (pErr)
-                *pErr = zip_archive.m_last_error;
-            return MZ_FALSE;
-        }
-
-        if (!mz_zip_writer_init_from_reader_v2(&zip_archive, pZip_filename, level_and_flags))
-        {
-            if (pErr)
-                *pErr = zip_archive.m_last_error;
-
-            mz_zip_reader_end_internal(&zip_archive, MZ_FALSE);
-
-            return MZ_FALSE;
-        }
-    }
-
-    status     = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment,
-                                          comment_size, level_and_flags, 0, 0);
-    actual_err = zip_archive.m_last_error;
-
-    /* Always finalize, even if adding failed for some reason, so we have a valid
-   * central directory. (This may not always succeed, but we can try.) */
-    if (!mz_zip_writer_finalize_archive(&zip_archive))
-    {
-        if (!actual_err)
-            actual_err = zip_archive.m_last_error;
-
-        status = MZ_FALSE;
-    }
-
-    if (!mz_zip_writer_end_internal(&zip_archive, status))
-    {
-        if (!actual_err)
-            actual_err = zip_archive.m_last_error;
-
-        status = MZ_FALSE;
-    }
-
-    if ((!status) && (created_new_archive))
-    {
-        /* It's a new archive and something went wrong, so just delete it. */
-        int ignoredStatus = MZ_DELETE_FILE(pZip_filename);
-        (void) ignoredStatus;
-    }
-
-    if (pErr)
-        *pErr = actual_err;
-
-    return status;
-}
-
-void* mz_zip_extract_archive_file_to_heap_v2(const char*   pZip_filename,
-                                             const char*   pArchive_name,
-                                             const char*   pComment,
-                                             size_t*       pSize,
-                                             mz_uint       flags,
-                                             mz_zip_error* pErr) {
-    mz_uint32      file_index;
-    mz_zip_archive zip_archive;
-    void*          p = NULL;
-
-    if (pSize)
-        *pSize = 0;
-
-    if ((!pZip_filename) || (!pArchive_name))
-    {
-        if (pErr)
-            *pErr = MZ_ZIP_INVALID_PARAMETER;
-
-        return NULL;
-    }
-
-    mz_zip_zero_struct(&zip_archive);
-    if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename,
-                                    flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0))
-    {
-        if (pErr)
-            *pErr = zip_archive.m_last_error;
-
-        return NULL;
-    }
-
-    if (mz_zip_reader_locate_file_v2(&zip_archive, pArchive_name, pComment, flags, &file_index))
-    {
-        p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags);
-    }
-
-    mz_zip_reader_end_internal(&zip_archive, p != NULL);
-
-    if (pErr)
-        *pErr = zip_archive.m_last_error;
-
-    return p;
-}
-
-void* mz_zip_extract_archive_file_to_heap(const char* pZip_filename,
-                                          const char* pArchive_name,
-                                          size_t*     pSize,
-                                          mz_uint     flags) {
-    return mz_zip_extract_archive_file_to_heap_v2(pZip_filename, pArchive_name, NULL, pSize, flags,
-                                                  NULL);
-}
-
-            #endif /* #ifndef MINIZ_NO_STDIO */
-
-        #endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */
-
-/* ------------------- Misc utils */
-
-mz_zip_mode mz_zip_get_mode(mz_zip_archive* pZip) {
-    return pZip ? pZip->m_zip_mode : MZ_ZIP_MODE_INVALID;
-}
-
-mz_zip_type mz_zip_get_type(mz_zip_archive* pZip) {
-    return pZip ? pZip->m_zip_type : MZ_ZIP_TYPE_INVALID;
-}
-
-mz_zip_error mz_zip_set_last_error(mz_zip_archive* pZip, mz_zip_error err_num) {
-    mz_zip_error prev_err;
-
-    if (!pZip)
-        return MZ_ZIP_INVALID_PARAMETER;
-
-    prev_err = pZip->m_last_error;
-
-    pZip->m_last_error = err_num;
-    return prev_err;
-}
-
-mz_zip_error mz_zip_peek_last_error(mz_zip_archive* pZip) {
-    if (!pZip)
-        return MZ_ZIP_INVALID_PARAMETER;
-
-    return pZip->m_last_error;
-}
-
-mz_zip_error mz_zip_clear_last_error(mz_zip_archive* pZip) {
-    return mz_zip_set_last_error(pZip, MZ_ZIP_NO_ERROR);
-}
-
-mz_zip_error mz_zip_get_last_error(mz_zip_archive* pZip) {
-    mz_zip_error prev_err;
-
-    if (!pZip)
-        return MZ_ZIP_INVALID_PARAMETER;
-
-    prev_err = pZip->m_last_error;
-
-    pZip->m_last_error = MZ_ZIP_NO_ERROR;
-    return prev_err;
-}
-
-const char* mz_zip_get_error_string(mz_zip_error mz_err) {
-    switch (mz_err)
-    {
-    case MZ_ZIP_NO_ERROR :
-        return "no error";
-    case MZ_ZIP_UNDEFINED_ERROR :
-        return "undefined error";
-    case MZ_ZIP_TOO_MANY_FILES :
-        return "too many files";
-    case MZ_ZIP_FILE_TOO_LARGE :
-        return "file too large";
-    case MZ_ZIP_UNSUPPORTED_METHOD :
-        return "unsupported method";
-    case MZ_ZIP_UNSUPPORTED_ENCRYPTION :
-        return "unsupported encryption";
-    case MZ_ZIP_UNSUPPORTED_FEATURE :
-        return "unsupported feature";
-    case MZ_ZIP_FAILED_FINDING_CENTRAL_DIR :
-        return "failed finding central directory";
-    case MZ_ZIP_NOT_AN_ARCHIVE :
-        return "not a ZIP archive";
-    case MZ_ZIP_INVALID_HEADER_OR_CORRUPTED :
-        return "invalid header or archive is corrupted";
-    case MZ_ZIP_UNSUPPORTED_MULTIDISK :
-        return "unsupported multidisk archive";
-    case MZ_ZIP_DECOMPRESSION_FAILED :
-        return "decompression failed or archive is corrupted";
-    case MZ_ZIP_COMPRESSION_FAILED :
-        return "compression failed";
-    case MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE :
-        return "unexpected decompressed size";
-    case MZ_ZIP_CRC_CHECK_FAILED :
-        return "CRC-32 check failed";
-    case MZ_ZIP_UNSUPPORTED_CDIR_SIZE :
-        return "unsupported central directory size";
-    case MZ_ZIP_ALLOC_FAILED :
-        return "allocation failed";
-    case MZ_ZIP_FILE_OPEN_FAILED :
-        return "file open failed";
-    case MZ_ZIP_FILE_CREATE_FAILED :
-        return "file create failed";
-    case MZ_ZIP_FILE_WRITE_FAILED :
-        return "file write failed";
-    case MZ_ZIP_FILE_READ_FAILED :
-        return "file read failed";
-    case MZ_ZIP_FILE_CLOSE_FAILED :
-        return "file close failed";
-    case MZ_ZIP_FILE_SEEK_FAILED :
-        return "file seek failed";
-    case MZ_ZIP_FILE_STAT_FAILED :
-        return "file stat failed";
-    case MZ_ZIP_INVALID_PARAMETER :
-        return "invalid parameter";
-    case MZ_ZIP_INVALID_FILENAME :
-        return "invalid filename";
-    case MZ_ZIP_BUF_TOO_SMALL :
-        return "buffer too small";
-    case MZ_ZIP_INTERNAL_ERROR :
-        return "internal error";
-    case MZ_ZIP_FILE_NOT_FOUND :
-        return "file not found";
-    case MZ_ZIP_ARCHIVE_TOO_LARGE :
-        return "archive is too large";
-    case MZ_ZIP_VALIDATION_FAILED :
-        return "validation failed";
-    case MZ_ZIP_WRITE_CALLBACK_FAILED :
-        return "write callback failed";
-    case MZ_ZIP_TOTAL_ERRORS :
-        return "total errors";
-    default :
-        break;
-    }
-
-    return "unknown error";
-}
-
-/* Note: Just because the archive is not zip64 doesn't necessarily mean it
- * doesn't have Zip64 extended information extra field, argh. */
-mz_bool mz_zip_is_zip64(mz_zip_archive* pZip) {
-    if ((!pZip) || (!pZip->m_pState))
-        return MZ_FALSE;
-
-    return pZip->m_pState->m_zip64;
-}
-
-size_t mz_zip_get_central_dir_size(mz_zip_archive* pZip) {
-    if ((!pZip) || (!pZip->m_pState))
-        return 0;
-
-    return pZip->m_pState->m_central_dir.m_size;
-}
-
-mz_uint mz_zip_reader_get_num_files(mz_zip_archive* pZip) { return pZip ? pZip->m_total_files : 0; }
-
-mz_uint64 mz_zip_get_archive_size(mz_zip_archive* pZip) {
-    if (!pZip)
-        return 0;
-    return pZip->m_archive_size;
-}
-
-mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive* pZip) {
-    if ((!pZip) || (!pZip->m_pState))
-        return 0;
-    return pZip->m_pState->m_file_archive_start_ofs;
-}
-
-MZ_FILE* mz_zip_get_cfile(mz_zip_archive* pZip) {
-    if ((!pZip) || (!pZip->m_pState))
-        return 0;
-    return pZip->m_pState->m_pFile;
-}
-
-size_t mz_zip_read_archive_data(mz_zip_archive* pZip, mz_uint64 file_ofs, void* pBuf, size_t n) {
-    if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pZip->m_pRead))
-        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-
-    return pZip->m_pRead(pZip->m_pIO_opaque, file_ofs, pBuf, n);
-}
-
-mz_uint mz_zip_reader_get_filename(mz_zip_archive* pZip,
-                                   mz_uint         file_index,
-                                   char*           pFilename,
-                                   mz_uint         filename_buf_size) {
-    mz_uint         n;
-    const mz_uint8* p = mz_zip_get_cdh(pZip, file_index);
-    if (!p)
-    {
-        if (filename_buf_size)
-            pFilename[0] = '\0';
-        mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
-        return 0;
-    }
-    n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    if (filename_buf_size)
-    {
-        n = MZ_MIN(n, filename_buf_size - 1);
-        memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
-        pFilename[n] = '\0';
-    }
-    return n + 1;
-}
-
-mz_bool
-mz_zip_reader_file_stat(mz_zip_archive* pZip, mz_uint file_index, mz_zip_archive_file_stat* pStat) {
-    return mz_zip_file_stat_internal(pZip, file_index, mz_zip_get_cdh(pZip, file_index), pStat,
-                                     NULL);
-}
-
-mz_bool mz_zip_end(mz_zip_archive* pZip) {
-    if (!pZip)
-        return MZ_FALSE;
-
-    if (pZip->m_zip_mode == MZ_ZIP_MODE_READING)
-        return mz_zip_reader_end(pZip);
-        #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
-    else if ((pZip->m_zip_mode == MZ_ZIP_MODE_WRITING)
-             || (pZip->m_zip_mode == MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))
-        return mz_zip_writer_end(pZip);
-        #endif
-
-    return MZ_FALSE;
-}
-
-        #ifdef __cplusplus
-}
-        #endif
-
-    #endif /*#ifndef MINIZ_NO_ARCHIVE_APIS*/
-
-#endif  // MINIZ_HEADER_FILE_ONLY
diff --git a/src/external/zip.cpp b/src/external/zip.cpp
deleted file mode 100644
index 4cbeb916..00000000
--- a/src/external/zip.cpp
+++ /dev/null
@@ -1,2305 +0,0 @@
-/*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#define __STDC_WANT_LIB_EXT1__ 1
-
-#include <errno.h>
-#include <sys/stat.h>
-#include <time.h>
-
-#if defined(_WIN32) || defined(__WIN32__) || defined(_MSC_VER) || defined(__MINGW32__)
-    /* Win32, DOS, MSVC, MSVS */
-    #include <direct.h>
-
-    #define HAS_DEVICE(P) \
-        ((((P)[0] >= 'A' && (P)[0] <= 'Z') || ((P)[0] >= 'a' && (P)[0] <= 'z')) && (P)[1] == ':')
-    #define FILESYSTEM_PREFIX_LEN(P) (HAS_DEVICE(P) ? 2 : 0)
-
-#else
-
-    #include <unistd.h>  // needed for symlink()
-
-#endif
-
-#ifdef __MINGW32__
-    #include <sys/types.h>
-    #include <unistd.h>
-#endif
-
-#include "miniz.h"
-#include "zip.h"
-
-#ifdef _MSC_VER
-    #include <io.h>
-
-    #define ftruncate(fd, sz) (-(_chsize_s((fd), (sz)) != 0))
-    #define fileno _fileno
-#endif
-
-#if defined(__TINYC__) && (defined(_WIN32) || defined(_WIN64))
-    #include <io.h>
-
-    #define ftruncate(fd, sz) (-(_chsize_s((fd), (sz)) != 0))
-    #define fileno _fileno
-#endif
-
-#ifndef HAS_DEVICE
-    #define HAS_DEVICE(P) 0
-#endif
-
-#ifndef FILESYSTEM_PREFIX_LEN
-    #define FILESYSTEM_PREFIX_LEN(P) 0
-#endif
-
-#ifndef ISSLASH
-    #define ISSLASH(C) ((C) == '/' || (C) == '\\')
-#endif
-
-#define CLEANUP(ptr) \
-    do \
-    { \
-        if (ptr) \
-        { \
-            free((void*) ptr); \
-            ptr = NULL; \
-        } \
-    } while (0)
-
-#define UNX_IFDIR 0040000  /* Unix directory */
-#define UNX_IFREG 0100000  /* Unix regular file */
-#define UNX_IFSOCK 0140000 /* Unix socket (BSD, not SysV or Amiga) */
-#define UNX_IFLNK 0120000  /* Unix symbolic link (not SysV, Amiga) */
-#define UNX_IFBLK 0060000  /* Unix block special       (not Amiga) */
-#define UNX_IFCHR 0020000  /* Unix character special   (not Amiga) */
-#define UNX_IFIFO 0010000  /* Unix fifo    (BCC, not MSC or Amiga) */
-
-struct zip_entry_t {
-    ssize_t                 index;
-    char*                   name;
-    mz_uint64               uncomp_size;
-    mz_uint64               comp_size;
-    mz_uint32               uncomp_crc32;
-    mz_uint64               dir_offset;
-    mz_uint8                header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
-    mz_uint64               header_offset;
-    mz_uint16               method;
-    mz_zip_writer_add_state state;
-    tdefl_compressor        comp;
-    mz_uint32               external_attr;
-    time_t                  m_time;
-};
-
-struct zip_t {
-    mz_zip_archive     archive;
-    mz_uint            level;
-    struct zip_entry_t entry;
-};
-
-enum zip_modify_t {
-    MZ_KEEP   = 0,
-    MZ_DELETE = 1,
-    MZ_MOVE   = 2,
-};
-
-struct zip_entry_mark_t {
-    ssize_t           file_index;
-    enum zip_modify_t type;
-    mz_uint64         m_local_header_ofs;
-    size_t            lf_length;
-};
-
-static const char* const zip_errlist[33] = {
-  NULL,
-  "not initialized\0",
-  "invalid entry name\0",
-  "entry not found\0",
-  "invalid zip mode\0",
-  "invalid compression level\0",
-  "no zip 64 support\0",
-  "memset error\0",
-  "cannot write data to entry\0",
-  "cannot initialize tdefl compressor\0",
-  "invalid index\0",
-  "header not found\0",
-  "cannot flush tdefl buffer\0",
-  "cannot write entry header\0",
-  "cannot create entry header\0",
-  "cannot write to central dir\0",
-  "cannot open file\0",
-  "invalid entry type\0",
-  "extracting data using no memory allocation\0",
-  "file not found\0",
-  "no permission\0",
-  "out of memory\0",
-  "invalid zip archive name\0",
-  "make dir error\0",
-  "symlink error\0",
-  "close archive error\0",
-  "capacity size too small\0",
-  "fseek error\0",
-  "fread error\0",
-  "fwrite error\0",
-  "cannot initialize reader\0",
-  "cannot initialize writer\0",
-  "cannot initialize writer from reader\0",
-};
-
-const char* zip_strerror(int errnum) {
-    errnum = -errnum;
-    if (errnum <= 0 || errnum >= 33)
-    {
-        return NULL;
-    }
-
-    return zip_errlist[errnum];
-}
-
-static const char* zip_basename(const char* name) {
-    char const* p;
-    char const* base        = name += FILESYSTEM_PREFIX_LEN(name);
-    int         all_slashes = 1;
-
-    for (p = name; *p; p++)
-    {
-        if (ISSLASH(*p))
-            base = p + 1;
-        else
-            all_slashes = 0;
-    }
-
-    /* If NAME is all slashes, arrange to return `/'. */
-    if (*base == '\0' && ISSLASH(*name) && all_slashes)
-        --base;
-
-    return base;
-}
-
-static int zip_mkpath(char* path) {
-    char* p;
-    char  npath[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE + 1];
-    int   len        = 0;
-    int   has_device = HAS_DEVICE(path);
-
-    memset(npath, 0, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE + 1);
-    if (has_device)
-    {
-        // only on windows
-        npath[0] = path[0];
-        npath[1] = path[1];
-        len      = 2;
-    }
-    for (p = path + len; *p && len < MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE; p++)
-    {
-        if (ISSLASH(*p) && ((!has_device && len > 0) || (has_device && len > 2)))
-        {
-#if defined(_WIN32) || defined(__WIN32__) || defined(_MSC_VER) || defined(__MINGW32__)
-#else
-            if ('\\' == *p)
-            {
-                *p = '/';
-            }
-#endif
-
-            if (MZ_MKDIR(npath) == -1)
-            {
-                if (errno != EEXIST)
-                {
-                    return ZIP_EMKDIR;
-                }
-            }
-        }
-        npath[len++] = *p;
-    }
-
-    return 0;
-}
-
-static char* zip_strclone(const char* str, size_t n) {
-    char   c;
-    size_t i;
-    char*  rpl   = (char*) calloc((1 + n), sizeof(char));
-    char*  begin = rpl;
-    if (!rpl)
-    {
-        return NULL;
-    }
-
-    for (i = 0; (i < n) && (c = *str++); ++i)
-    {
-        *rpl++ = c;
-    }
-
-    return begin;
-}
-
-static char* zip_strrpl(const char* str, size_t n, char oldchar, char newchar) {
-    char   c;
-    size_t i;
-    char*  rpl   = (char*) calloc((1 + n), sizeof(char));
-    char*  begin = rpl;
-    if (!rpl)
-    {
-        return NULL;
-    }
-
-    for (i = 0; (i < n) && (c = *str++); ++i)
-    {
-        if (c == oldchar)
-        {
-            c = newchar;
-        }
-        *rpl++ = c;
-    }
-
-    return begin;
-}
-
-static inline int zip_strchr_match(const char* const str, size_t len, char c) {
-    size_t i;
-    for (i = 0; i < len; ++i)
-    {
-        if (str[i] != c)
-        {
-            return 0;
-        }
-    }
-
-    return 1;
-}
-
-static char* zip_name_normalize(char* name, char* const nname, size_t len) {
-    size_t offn = 0, ncpy = 0;
-    char   c;
-
-    if (name == NULL || nname == NULL || len <= 0)
-    {
-        return NULL;
-    }
-    // skip trailing '/'
-    while (ISSLASH(*name))
-    {
-        name++;
-    }
-
-    while ((c = *name++))
-    {
-        if (ISSLASH(c))
-        {
-            if (ncpy > 0 && !zip_strchr_match(&nname[offn], ncpy, '.'))
-            {
-                offn += ncpy;
-                nname[offn++] = c;  // append '/'
-            }
-            ncpy = 0;
-        }
-        else
-        {
-            nname[offn + ncpy] = c;
-            if (c)
-            {
-                ncpy++;
-            }
-        }
-    }
-
-    if (!zip_strchr_match(&nname[offn], ncpy, '.'))
-    {
-        nname[offn + ncpy] = '\0';
-    }
-    else
-    {
-        nname[offn] = '\0';
-    }
-
-    return nname;
-}
-
-static int zip_archive_truncate(mz_zip_archive* pzip) {
-    mz_zip_internal_state* pState    = pzip->m_pState;
-    mz_uint64              file_size = pzip->m_archive_size;
-    if ((pzip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem))
-    {
-        return 0;
-    }
-    if (pzip->m_zip_mode == MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)
-    {
-        if (pState->m_pFile)
-        {
-            int fd = fileno(pState->m_pFile);
-            return ftruncate(fd, pState->m_file_archive_start_ofs + file_size);
-        }
-    }
-    return 0;
-}
-
-static int zip_archive_extract(mz_zip_archive* zip_archive,
-                               const char*     dir,
-                               int (*on_extract)(const char* filename, void* arg),
-                               void* arg) {
-    int                      err = 0;
-    mz_uint                  i, n;
-    char                     path[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE + 1];
-    char                     symlink_to[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE + 1];
-    mz_zip_archive_file_stat info;
-    size_t                   dirlen = 0, filename_size = MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE;
-    mz_uint32                xattr = 0;
-
-    memset(path, 0, sizeof(path));
-    memset(symlink_to, 0, sizeof(symlink_to));
-
-    dirlen = strlen(dir);
-    if (dirlen + 1 > MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE)
-    {
-        return ZIP_EINVENTNAME;
-    }
-
-    memset((void*) &info, 0, sizeof(mz_zip_archive_file_stat));
-
-#if defined(_MSC_VER)
-    strcpy_s(path, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE, dir);
-#else
-    strncpy(path, dir, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE);
-#endif
-
-    if (!ISSLASH(path[dirlen - 1]))
-    {
-#if defined(_WIN32) || defined(__WIN32__)
-        path[dirlen] = '\\';
-#else
-        path[dirlen] = '/';
-#endif
-        ++dirlen;
-    }
-
-    if (filename_size > MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - dirlen)
-    {
-        filename_size = MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - dirlen;
-    }
-    // Get and print information about each file in the archive.
-    n = mz_zip_reader_get_num_files(zip_archive);
-    for (i = 0; i < n; ++i)
-    {
-        if (!mz_zip_reader_file_stat(zip_archive, i, &info))
-        {
-            // Cannot get information about zip archive;
-            err = ZIP_ENOENT;
-            goto out;
-        }
-
-        if (!zip_name_normalize(info.m_filename, info.m_filename, strlen(info.m_filename)))
-        {
-            // Cannot normalize file name;
-            err = ZIP_EINVENTNAME;
-            goto out;
-        }
-
-#if defined(_MSC_VER)
-        strncpy_s(&path[dirlen], filename_size, info.m_filename, filename_size);
-#else
-        strncpy(&path[dirlen], info.m_filename, filename_size);
-#endif
-        err = zip_mkpath(path);
-        if (err < 0)
-        {
-            // Cannot make a path
-            goto out;
-        }
-
-        if ((((info.m_version_made_by >> 8) == 3)
-             || ((info.m_version_made_by >> 8)
-                 == 19))  // if zip is produced on Unix or macOS (3 and 19 from
-                          // section 4.4.2.2 of zip standard)
-            && info.m_external_attr & (0x20 << 24))
-        {   // and has sym link attribute (0x80 is file,
-            // 0x40 is directory)
-#if defined(_WIN32) || defined(__WIN32__) || defined(_MSC_VER) || defined(__MINGW32__)
-#else
-            if (info.m_uncomp_size > MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE
-                || !mz_zip_reader_extract_to_mem_no_alloc(
-                  zip_archive, i, symlink_to, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE, 0, NULL, 0))
-            {
-                err = ZIP_EMEMNOALLOC;
-                goto out;
-            }
-            symlink_to[info.m_uncomp_size] = '\0';
-            if (symlink(symlink_to, path) != 0)
-            {
-                err = ZIP_ESYMLINK;
-                goto out;
-            }
-#endif
-        }
-        else
-        {
-            if (!mz_zip_reader_is_file_a_directory(zip_archive, i))
-            {
-                if (!mz_zip_reader_extract_to_file(zip_archive, i, path, 0))
-                {
-                    // Cannot extract zip archive to file
-                    err = ZIP_ENOFILE;
-                    goto out;
-                }
-            }
-
-#if defined(_MSC_VER) || defined(PS4)
-            (void) xattr;  // unused
-#else
-            xattr = (info.m_external_attr >> 16) & 0xFFFF;
-            if (xattr > 0 && xattr <= MZ_UINT16_MAX)
-            {
-                if (CHMOD(path, (mode_t) xattr) < 0)
-                {
-                    err = ZIP_ENOPERM;
-                    goto out;
-                }
-            }
-#endif
-        }
-
-        if (on_extract)
-        {
-            if (on_extract(path, arg) < 0)
-            {
-                goto out;
-            }
-        }
-    }
-
-out:
-    // Close the archive, freeing any resources it was using
-    if (!mz_zip_reader_end(zip_archive))
-    {
-        // Cannot end zip reader
-        err = ZIP_ECLSZIP;
-    }
-    return err;
-}
-
-static inline void zip_archive_finalize(mz_zip_archive* pzip) {
-    mz_zip_writer_finalize_archive(pzip);
-    zip_archive_truncate(pzip);
-}
-
-static ssize_t zip_entry_mark(struct zip_t*            zip,
-                              struct zip_entry_mark_t* entry_mark,
-                              const size_t             n,
-                              char* const              entries[],
-                              const size_t             len) {
-    size_t  i   = 0;
-    ssize_t err = 0;
-    if (!zip || !entry_mark || !entries)
-    {
-        return ZIP_ENOINIT;
-    }
-
-    mz_zip_archive_file_stat file_stat;
-    mz_uint64                d_pos = UINT64_MAX;
-    for (i = 0; i < n; ++i)
-    {
-        if ((err = zip_entry_openbyindex(zip, i)))
-        {
-            return (ssize_t) err;
-        }
-
-        mz_bool name_matches = MZ_FALSE;
-        {
-            size_t j;
-            for (j = 0; j < len; ++j)
-            {
-                if (strcmp(zip->entry.name, entries[j]) == 0)
-                {
-                    name_matches = MZ_TRUE;
-                    break;
-                }
-            }
-        }
-        if (name_matches)
-        {
-            entry_mark[i].type = MZ_DELETE;
-        }
-        else
-        {
-            entry_mark[i].type = MZ_KEEP;
-        }
-
-        if (!mz_zip_reader_file_stat(&zip->archive, (mz_uint) i, &file_stat))
-        {
-            return ZIP_ENOENT;
-        }
-
-        zip_entry_close(zip);
-
-        entry_mark[i].m_local_header_ofs = file_stat.m_local_header_ofs;
-        entry_mark[i].file_index         = (ssize_t) -1;
-        entry_mark[i].lf_length          = 0;
-        if ((entry_mark[i].type) == MZ_DELETE && (d_pos > entry_mark[i].m_local_header_ofs))
-        {
-            d_pos = entry_mark[i].m_local_header_ofs;
-        }
-    }
-
-    for (i = 0; i < n; ++i)
-    {
-        if ((entry_mark[i].m_local_header_ofs > d_pos) && (entry_mark[i].type != MZ_DELETE))
-        {
-            entry_mark[i].type = MZ_MOVE;
-        }
-    }
-    return err;
-}
-
-static ssize_t zip_entry_markbyindex(struct zip_t*            zip,
-                                     struct zip_entry_mark_t* entry_mark,
-                                     const size_t             n,
-                                     size_t                   entries[],
-                                     const size_t             len) {
-    size_t  i   = 0;
-    ssize_t err = 0;
-    if (!zip || !entry_mark || !entries)
-    {
-        return ZIP_ENOINIT;
-    }
-
-    mz_zip_archive_file_stat file_stat;
-    mz_uint64                d_pos = UINT64_MAX;
-    for (i = 0; i < n; ++i)
-    {
-        if ((err = zip_entry_openbyindex(zip, i)))
-        {
-            return (ssize_t) err;
-        }
-
-        mz_bool matches = MZ_FALSE;
-        {
-            size_t j;
-            for (j = 0; j < len; ++j)
-            {
-                if (i == entries[j])
-                {
-                    matches = MZ_TRUE;
-                    break;
-                }
-            }
-        }
-        if (matches)
-        {
-            entry_mark[i].type = MZ_DELETE;
-        }
-        else
-        {
-            entry_mark[i].type = MZ_KEEP;
-        }
-
-        if (!mz_zip_reader_file_stat(&zip->archive, (mz_uint) i, &file_stat))
-        {
-            return ZIP_ENOENT;
-        }
-
-        zip_entry_close(zip);
-
-        entry_mark[i].m_local_header_ofs = file_stat.m_local_header_ofs;
-        entry_mark[i].file_index         = (ssize_t) -1;
-        entry_mark[i].lf_length          = 0;
-        if ((entry_mark[i].type) == MZ_DELETE && (d_pos > entry_mark[i].m_local_header_ofs))
-        {
-            d_pos = entry_mark[i].m_local_header_ofs;
-        }
-    }
-
-    for (i = 0; i < n; ++i)
-    {
-        if ((entry_mark[i].m_local_header_ofs > d_pos) && (entry_mark[i].type != MZ_DELETE))
-        {
-            entry_mark[i].type = MZ_MOVE;
-        }
-    }
-    return err;
-}
-static ssize_t zip_index_next(mz_uint64* local_header_ofs_array, ssize_t cur_index) {
-    ssize_t new_index = 0, i;
-    for (i = cur_index - 1; i >= 0; --i)
-    {
-        if (local_header_ofs_array[cur_index] > local_header_ofs_array[i])
-        {
-            new_index = i + 1;
-            return new_index;
-        }
-    }
-    return new_index;
-}
-
-static ssize_t zip_sort(mz_uint64* local_header_ofs_array, ssize_t cur_index) {
-    ssize_t nxt_index = zip_index_next(local_header_ofs_array, cur_index);
-
-    if (nxt_index != cur_index)
-    {
-        mz_uint64 temp = local_header_ofs_array[cur_index];
-        ssize_t   i;
-        for (i = cur_index; i > nxt_index; i--)
-        {
-            local_header_ofs_array[i] = local_header_ofs_array[i - 1];
-        }
-        local_header_ofs_array[nxt_index] = temp;
-    }
-    return nxt_index;
-}
-
-static int
-zip_index_update(struct zip_entry_mark_t* entry_mark, ssize_t last_index, ssize_t nxt_index) {
-    ssize_t j;
-    for (j = 0; j < last_index; j++)
-    {
-        if (entry_mark[j].file_index >= nxt_index)
-        {
-            entry_mark[j].file_index += 1;
-        }
-    }
-    entry_mark[nxt_index].file_index = last_index;
-    return 0;
-}
-
-static int
-zip_entry_finalize(struct zip_t* zip, struct zip_entry_mark_t* entry_mark, const size_t n) {
-    size_t     i                      = 0;
-    mz_uint64* local_header_ofs_array = (mz_uint64*) calloc(n, sizeof(mz_uint64));
-    if (!local_header_ofs_array)
-    {
-        return ZIP_EOOMEM;
-    }
-
-    for (i = 0; i < n; ++i)
-    {
-        local_header_ofs_array[i] = entry_mark[i].m_local_header_ofs;
-        ssize_t index             = zip_sort(local_header_ofs_array, i);
-
-        if ((size_t) index != i)
-        {
-            zip_index_update(entry_mark, i, index);
-        }
-        entry_mark[i].file_index = index;
-    }
-
-    size_t* length = (size_t*) calloc(n, sizeof(size_t));
-    if (!length)
-    {
-        CLEANUP(local_header_ofs_array);
-        return ZIP_EOOMEM;
-    }
-    for (i = 0; i < n - 1; i++)
-    {
-        length[i] = (size_t) (local_header_ofs_array[i + 1] - local_header_ofs_array[i]);
-    }
-    length[n - 1] = (size_t) (zip->archive.m_archive_size - local_header_ofs_array[n - 1]);
-
-    for (i = 0; i < n; i++)
-    {
-        entry_mark[i].lf_length = length[entry_mark[i].file_index];
-    }
-
-    CLEANUP(length);
-    CLEANUP(local_header_ofs_array);
-    return 0;
-}
-
-static ssize_t zip_entry_set(struct zip_t*            zip,
-                             struct zip_entry_mark_t* entry_mark,
-                             size_t                   n,
-                             char* const              entries[],
-                             const size_t             len) {
-    ssize_t err = 0;
-
-    if ((err = zip_entry_mark(zip, entry_mark, n, entries, len)) < 0)
-    {
-        return err;
-    }
-    if ((err = zip_entry_finalize(zip, entry_mark, n)) < 0)
-    {
-        return err;
-    }
-    return 0;
-}
-
-static ssize_t zip_entry_setbyindex(struct zip_t*            zip,
-                                    struct zip_entry_mark_t* entry_mark,
-                                    size_t                   n,
-                                    size_t                   entries[],
-                                    const size_t             len) {
-    ssize_t err = 0;
-
-    if ((err = zip_entry_markbyindex(zip, entry_mark, n, entries, len)) < 0)
-    {
-        return err;
-    }
-    if ((err = zip_entry_finalize(zip, entry_mark, n)) < 0)
-    {
-        return err;
-    }
-    return 0;
-}
-
-static ssize_t zip_mem_move(
-  void* pBuf, size_t bufSize, const mz_uint64 to, const mz_uint64 from, const size_t length) {
-    uint8_t *dst = NULL, *src = NULL, *end = NULL;
-
-    if (!pBuf)
-    {
-        return ZIP_EINVIDX;
-    }
-
-    end = (uint8_t*) pBuf + bufSize;
-
-    if (to > bufSize)
-    {
-        return ZIP_EINVIDX;
-    }
-
-    if (from > bufSize)
-    {
-        return ZIP_EINVIDX;
-    }
-
-    dst = (uint8_t*) pBuf + to;
-    src = (uint8_t*) pBuf + from;
-
-    if (((dst + length) > end) || ((src + length) > end))
-    {
-        return ZIP_EINVIDX;
-    }
-
-    memmove(dst, src, length);
-    return length;
-}
-
-static ssize_t zip_file_move(MZ_FILE*        m_pFile,
-                             const mz_uint64 to,
-                             const mz_uint64 from,
-                             const size_t    length,
-                             mz_uint8*       move_buf,
-                             const size_t    capacity_size) {
-    if (length > capacity_size)
-    {
-        return ZIP_ECAPSIZE;
-    }
-    if (MZ_FSEEK64(m_pFile, from, SEEK_SET))
-    {
-        return ZIP_EFSEEK;
-    }
-    if (fread(move_buf, 1, length, m_pFile) != length)
-    {
-        return ZIP_EFREAD;
-    }
-    if (MZ_FSEEK64(m_pFile, to, SEEK_SET))
-    {
-        return ZIP_EFSEEK;
-    }
-    if (fwrite(move_buf, 1, length, m_pFile) != length)
-    {
-        return ZIP_EFWRITE;
-    }
-    return (ssize_t) length;
-}
-
-static ssize_t
-zip_files_move(struct zip_t* zip, mz_uint64 writen_num, mz_uint64 read_num, size_t length) {
-    ssize_t                n         = 0;
-    const size_t           page_size = 1 << 12;  // 4K
-    mz_zip_internal_state* pState    = zip->archive.m_pState;
-
-    mz_uint8* move_buf = (mz_uint8*) calloc(1, page_size);
-    if (!move_buf)
-    {
-        return ZIP_EOOMEM;
-    }
-
-    ssize_t moved_length = 0;
-    ssize_t move_count   = 0;
-    while ((mz_int64) length > 0)
-    {
-        move_count = (length >= page_size) ? page_size : length;
-
-        if (pState->m_pFile)
-        {
-            n =
-              zip_file_move(pState->m_pFile, writen_num, read_num, move_count, move_buf, page_size);
-        }
-        else if (pState->m_pMem)
-        {
-            n = zip_mem_move(pState->m_pMem, pState->m_mem_size, writen_num, read_num, move_count);
-        }
-        else
-        {
-            return ZIP_ENOFILE;
-        }
-
-        if (n < 0)
-        {
-            moved_length = n;
-            goto cleanup;
-        }
-
-        if (n != move_count)
-        {
-            goto cleanup;
-        }
-
-        writen_num += move_count;
-        read_num += move_count;
-        length -= move_count;
-        moved_length += move_count;
-    }
-
-cleanup:
-    CLEANUP(move_buf);
-    return moved_length;
-}
-
-static int zip_central_dir_move(mz_zip_internal_state* pState, int begin, int end, int entry_num) {
-    if (begin == entry_num)
-    {
-        return 0;
-    }
-
-    size_t    l_size = 0;
-    size_t    r_size = 0;
-    mz_uint32 d_size = 0;
-    mz_uint8* next   = NULL;
-    mz_uint8* deleted =
-      &MZ_ZIP_ARRAY_ELEMENT(&pState->m_central_dir, mz_uint8,
-                            MZ_ZIP_ARRAY_ELEMENT(&pState->m_central_dir_offsets, mz_uint32, begin));
-    l_size = (size_t) (deleted - (mz_uint8*) (pState->m_central_dir.m_p));
-    if (end == entry_num)
-    {
-        r_size = 0;
-    }
-    else
-    {
-        next = &MZ_ZIP_ARRAY_ELEMENT(
-          &pState->m_central_dir, mz_uint8,
-          MZ_ZIP_ARRAY_ELEMENT(&pState->m_central_dir_offsets, mz_uint32, end));
-        r_size = pState->m_central_dir.m_size
-               - (mz_uint32) (next - (mz_uint8*) (pState->m_central_dir.m_p));
-        d_size = (mz_uint32) (next - deleted);
-    }
-
-    if (next && l_size == 0)
-    {
-        memmove(pState->m_central_dir.m_p, next, r_size);
-        pState->m_central_dir.m_p = MZ_REALLOC(pState->m_central_dir.m_p, r_size);
-        {
-            int i;
-            for (i = end; i < entry_num; i++)
-            {
-                MZ_ZIP_ARRAY_ELEMENT(&pState->m_central_dir_offsets, mz_uint32, i) -= d_size;
-            }
-        }
-    }
-
-    if (next && l_size * r_size != 0)
-    {
-        memmove(deleted, next, r_size);
-        {
-            int i;
-            for (i = end; i < entry_num; i++)
-            {
-                MZ_ZIP_ARRAY_ELEMENT(&pState->m_central_dir_offsets, mz_uint32, i) -= d_size;
-            }
-        }
-    }
-
-    pState->m_central_dir.m_size = l_size + r_size;
-    return 0;
-}
-
-static int zip_central_dir_delete(mz_zip_internal_state* pState,
-                                  int*                   deleted_entry_index_array,
-                                  int                    entry_num) {
-    int i     = 0;
-    int begin = 0;
-    int end   = 0;
-    int d_num = 0;
-    while (i < entry_num)
-    {
-        while ((i < entry_num) && (!deleted_entry_index_array[i]))
-        {
-            i++;
-        }
-        begin = i;
-
-        while ((i < entry_num) && (deleted_entry_index_array[i]))
-        {
-            i++;
-        }
-        end = i;
-        zip_central_dir_move(pState, begin, end, entry_num);
-    }
-
-    i = 0;
-    while (i < entry_num)
-    {
-        while ((i < entry_num) && (!deleted_entry_index_array[i]))
-        {
-            i++;
-        }
-        begin = i;
-        if (begin == entry_num)
-        {
-            break;
-        }
-        while ((i < entry_num) && (deleted_entry_index_array[i]))
-        {
-            i++;
-        }
-        end   = i;
-        int k = 0, j;
-        for (j = end; j < entry_num; j++)
-        {
-            MZ_ZIP_ARRAY_ELEMENT(&pState->m_central_dir_offsets, mz_uint32, begin + k) =
-              (mz_uint32) MZ_ZIP_ARRAY_ELEMENT(&pState->m_central_dir_offsets, mz_uint32, j);
-            k++;
-        }
-        d_num += end - begin;
-    }
-
-    pState->m_central_dir_offsets.m_size = sizeof(mz_uint32) * (entry_num - d_num);
-    return 0;
-}
-
-static ssize_t
-zip_entries_delete_mark(struct zip_t* zip, struct zip_entry_mark_t* entry_mark, int entry_num) {
-    mz_uint64 writen_num        = 0;
-    mz_uint64 read_num          = 0;
-    size_t    deleted_length    = 0;
-    size_t    move_length       = 0;
-    int       i                 = 0;
-    size_t    deleted_entry_num = 0;
-    ssize_t   n                 = 0;
-
-    mz_bool* deleted_entry_flag_array = (mz_bool*) calloc(entry_num, sizeof(mz_bool));
-    if (deleted_entry_flag_array == NULL)
-    {
-        return ZIP_EOOMEM;
-    }
-
-    mz_zip_internal_state* pState = zip->archive.m_pState;
-    zip->archive.m_zip_mode       = MZ_ZIP_MODE_WRITING;
-
-    if (pState->m_pFile)
-    {
-        if (MZ_FSEEK64(pState->m_pFile, 0, SEEK_SET))
-        {
-            CLEANUP(deleted_entry_flag_array);
-            return ZIP_ENOENT;
-        }
-    }
-
-    while (i < entry_num)
-    {
-        while ((i < entry_num) && (entry_mark[i].type == MZ_KEEP))
-        {
-            writen_num += entry_mark[i].lf_length;
-            read_num = writen_num;
-            i++;
-        }
-
-        while ((i < entry_num) && (entry_mark[i].type == MZ_DELETE))
-        {
-            deleted_entry_flag_array[i] = MZ_TRUE;
-            read_num += entry_mark[i].lf_length;
-            deleted_length += entry_mark[i].lf_length;
-            i++;
-            deleted_entry_num++;
-        }
-
-        while ((i < entry_num) && (entry_mark[i].type == MZ_MOVE))
-        {
-            move_length += entry_mark[i].lf_length;
-            mz_uint8* p = &MZ_ZIP_ARRAY_ELEMENT(
-              &pState->m_central_dir, mz_uint8,
-              MZ_ZIP_ARRAY_ELEMENT(&pState->m_central_dir_offsets, mz_uint32, i));
-            if (!p)
-            {
-                CLEANUP(deleted_entry_flag_array);
-                return ZIP_ENOENT;
-            }
-            mz_uint32 offset = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
-            offset -= (mz_uint32) deleted_length;
-            MZ_WRITE_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS, offset);
-            i++;
-        }
-
-        n = zip_files_move(zip, writen_num, read_num, move_length);
-        if (n != (ssize_t) move_length)
-        {
-            CLEANUP(deleted_entry_flag_array);
-            return n;
-        }
-        writen_num += move_length;
-        read_num += move_length;
-    }
-
-    zip->archive.m_archive_size -= (mz_uint64) deleted_length;
-    zip->archive.m_total_files = (mz_uint32) entry_num - (mz_uint32) deleted_entry_num;
-
-    zip_central_dir_delete(pState, deleted_entry_flag_array, entry_num);
-    CLEANUP(deleted_entry_flag_array);
-
-    return (ssize_t) deleted_entry_num;
-}
-
-struct zip_t* zip_open(const char* zipname, int level, char mode) {
-    int errnum = 0;
-    return zip_openwitherror(zipname, level, mode, &errnum);
-}
-
-struct zip_t* zip_openwitherror(const char* zipname, int level, char mode, int* errnum) {
-    struct zip_t* zip = NULL;
-    *errnum           = 0;
-
-    if (!zipname || strlen(zipname) < 1)
-    {
-        // zip_t archive name is empty or NULL
-        *errnum = ZIP_EINVZIPNAME;
-        goto cleanup;
-    }
-
-    if (level < 0)
-        level = MZ_DEFAULT_LEVEL;
-    if ((level & 0xF) > MZ_UBER_COMPRESSION)
-    {
-        // Wrong compression level
-        *errnum = ZIP_EINVLVL;
-        goto cleanup;
-    }
-
-    zip = (struct zip_t*) calloc((size_t) 1, sizeof(struct zip_t));
-    if (!zip)
-    {
-        // out of memory
-        *errnum = ZIP_EOOMEM;
-        goto cleanup;
-    }
-
-    zip->level       = (mz_uint) level;
-    zip->entry.index = -1;
-    switch (mode)
-    {
-    case 'w' :
-        // Create a new archive.
-        if (!mz_zip_writer_init_file_v2(&(zip->archive), zipname, 0, MZ_ZIP_FLAG_WRITE_ZIP64))
-        {
-            // Cannot initialize zip_archive writer
-            *errnum = ZIP_EWINIT;
-            goto cleanup;
-        }
-        break;
-
-    case 'r' :
-        if (!mz_zip_reader_init_file_v2(&(zip->archive), zipname,
-                                        zip->level | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0,
-                                        0))
-        {
-            // An archive file does not exist or cannot initialize
-            // zip_archive reader
-            *errnum = ZIP_ERINIT;
-            goto cleanup;
-        }
-        break;
-
-    case 'a' :
-    case 'd' : {
-        MZ_FILE* fp = MZ_FOPEN(zipname, "r+b");
-        if (!fp)
-        {
-            *errnum = ZIP_EOPNFILE;
-            goto cleanup;
-        }
-        if (!mz_zip_reader_init_cfile(&(zip->archive), fp, 0,
-                                      zip->level | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
-        {
-            // An archive file does not exist or cannot initialize zip_archive
-            // reader
-            *errnum = ZIP_ERINIT;
-            fclose(fp);
-            goto cleanup;
-        }
-        if (!mz_zip_writer_init_from_reader_v2(&(zip->archive), zipname, 0))
-        {
-            *errnum = ZIP_EWRINIT;
-            fclose(fp);
-            mz_zip_reader_end(&(zip->archive));
-            goto cleanup;
-        }
-        // The file pointer is now owned by the archive object.
-        zip->archive.m_zip_type = MZ_ZIP_TYPE_FILE;
-    }
-    break;
-
-    default :
-        *errnum = ZIP_EINVMODE;
-        goto cleanup;
-    }
-
-    return zip;
-
-cleanup:
-    CLEANUP(zip);
-    return NULL;
-}
-
-void zip_close(struct zip_t* zip) {
-    if (zip)
-    {
-        mz_zip_archive* pZip = &(zip->archive);
-        // Always finalize, even if adding failed for some reason, so we have a
-        // valid central directory.
-        if (pZip->m_zip_mode == MZ_ZIP_MODE_WRITING)
-        {
-            mz_zip_writer_finalize_archive(pZip);
-        }
-
-        if (pZip->m_zip_mode == MZ_ZIP_MODE_WRITING
-            || pZip->m_zip_mode == MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)
-        {
-            zip_archive_truncate(pZip);
-            mz_zip_writer_end(pZip);
-        }
-        if (pZip->m_zip_mode == MZ_ZIP_MODE_READING)
-        {
-            mz_zip_reader_end(pZip);
-        }
-
-        CLEANUP(zip);
-    }
-}
-
-int zip_is64(struct zip_t* zip) {
-    if (!zip || !zip->archive.m_pState)
-    {
-        // zip_t handler or zip state is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    return (int) zip->archive.m_pState->m_zip64;
-}
-
-int zip_offset(struct zip_t* zip, uint64_t* offset) {
-    if (!zip || !zip->archive.m_pState)
-    {
-        // zip_t handler or zip state is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    *offset = mz_zip_get_archive_file_start_offset(&zip->archive);
-    return 0;
-}
-
-static int _zip_entry_open(struct zip_t* zip, const char* entryname, int case_sensitive) {
-    size_t                   entrylen = 0;
-    mz_zip_archive*          pzip     = NULL;
-    mz_uint                  num_alignment_padding_bytes, level;
-    mz_zip_archive_file_stat stats;
-    int                      err      = 0;
-    mz_uint16                dos_time = 0, dos_date = 0;
-    mz_uint32                extra_size = 0;
-    mz_uint8                 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE];
-    mz_uint64                local_dir_header_ofs = 0;
-
-    if (!zip)
-    {
-        return ZIP_ENOINIT;
-    }
-
-    local_dir_header_ofs = zip->archive.m_archive_size;
-
-    if (!entryname)
-    {
-        return ZIP_EINVENTNAME;
-    }
-
-    entrylen = strlen(entryname);
-    if (entrylen == 0)
-    {
-        return ZIP_EINVENTNAME;
-    }
-
-    if (zip->entry.name)
-    {
-        CLEANUP(zip->entry.name);
-    }
-
-    pzip = &(zip->archive);
-    if (pzip->m_zip_mode == MZ_ZIP_MODE_READING)
-    {
-        zip->entry.name = zip_strclone(entryname, entrylen);
-        if (!zip->entry.name)
-        {
-            // Cannot parse zip entry name
-            return ZIP_EINVENTNAME;
-        }
-
-        zip->entry.index = (ssize_t) mz_zip_reader_locate_file(
-          pzip, zip->entry.name, NULL, case_sensitive ? MZ_ZIP_FLAG_CASE_SENSITIVE : 0);
-        if (zip->entry.index < (ssize_t) 0)
-        {
-            err = ZIP_ENOENT;
-            goto cleanup;
-        }
-
-        if (!mz_zip_reader_file_stat(pzip, (mz_uint) zip->entry.index, &stats))
-        {
-            err = ZIP_ENOENT;
-            goto cleanup;
-        }
-
-        zip->entry.comp_size     = stats.m_comp_size;
-        zip->entry.uncomp_size   = stats.m_uncomp_size;
-        zip->entry.uncomp_crc32  = stats.m_crc32;
-        zip->entry.dir_offset    = stats.m_central_dir_ofs;
-        zip->entry.header_offset = stats.m_local_header_ofs;
-        zip->entry.method        = stats.m_method;
-        zip->entry.external_attr = stats.m_external_attr;
-#ifndef MINIZ_NO_TIME
-        zip->entry.m_time = stats.m_time;
-#endif
-
-        return 0;
-    }
-
-    /*
-    .ZIP File Format Specification Version: 6.3.3
-
-    4.4.17.1 The name of the file, with optional relative path.
-    The path stored MUST not contain a drive or
-    device letter, or a leading slash.  All slashes
-    MUST be forward slashes '/' as opposed to
-    backwards slashes '\' for compatibility with Amiga
-    and UNIX file systems etc.  If input came from standard
-    input, there is no file name field.
-  */
-    zip->entry.name = zip_strrpl(entryname, entrylen, '\\', '/');
-    if (!zip->entry.name)
-    {
-        // Cannot parse zip entry name
-        return ZIP_EINVENTNAME;
-    }
-
-    level = zip->level & 0xF;
-
-    zip->entry.index         = (ssize_t) zip->archive.m_total_files;
-    zip->entry.comp_size     = 0;
-    zip->entry.uncomp_size   = 0;
-    zip->entry.uncomp_crc32  = MZ_CRC32_INIT;
-    zip->entry.dir_offset    = zip->archive.m_archive_size;
-    zip->entry.header_offset = zip->archive.m_archive_size;
-    memset(zip->entry.header, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE * sizeof(mz_uint8));
-    zip->entry.method = level ? MZ_DEFLATED : 0;
-
-    // UNIX or APPLE
-#if MZ_PLATFORM == 3 || MZ_PLATFORM == 19
-    // regular file with rw-r--r-- permissions
-    zip->entry.external_attr = (mz_uint32) (0100644) << 16;
-#else
-    zip->entry.external_attr = 0;
-#endif
-
-    num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pzip);
-
-    if (!pzip->m_pState || (pzip->m_zip_mode != MZ_ZIP_MODE_WRITING))
-    {
-        // Invalid zip mode
-        err = ZIP_EINVMODE;
-        goto cleanup;
-    }
-    if (zip->level & MZ_ZIP_FLAG_COMPRESSED_DATA)
-    {
-        // Invalid zip compression level
-        err = ZIP_EINVLVL;
-        goto cleanup;
-    }
-
-    if (!mz_zip_writer_write_zeros(pzip, zip->entry.dir_offset, num_alignment_padding_bytes))
-    {
-        // Cannot memset zip entry header
-        err = ZIP_EMEMSET;
-        goto cleanup;
-    }
-    local_dir_header_ofs += num_alignment_padding_bytes;
-
-    zip->entry.m_time = time(NULL);
-#ifndef MINIZ_NO_TIME
-    mz_zip_time_t_to_dos_time(zip->entry.m_time, &dos_time, &dos_date);
-#endif
-
-    // ZIP64 header with NULL sizes (sizes will be in the data descriptor, just
-    // after file data)
-    extra_size = mz_zip_writer_create_zip64_extra_data(
-      extra_data, NULL, NULL,
-      (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL);
-
-    if (!mz_zip_writer_create_local_dir_header(
-          pzip, zip->entry.header, entrylen, (mz_uint16) extra_size, 0, 0, 0, zip->entry.method,
-          MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8 | MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR, dos_time,
-          dos_date))
-    {
-        // Cannot create zip entry header
-        err = ZIP_EMEMSET;
-        goto cleanup;
-    }
-
-    zip->entry.header_offset = zip->entry.dir_offset + num_alignment_padding_bytes;
-
-    if (pzip->m_pWrite(pzip->m_pIO_opaque, zip->entry.header_offset, zip->entry.header,
-                       sizeof(zip->entry.header))
-        != sizeof(zip->entry.header))
-    {
-        // Cannot write zip entry header
-        err = ZIP_EMEMSET;
-        goto cleanup;
-    }
-
-    if (pzip->m_file_offset_alignment)
-    {
-        MZ_ASSERT((zip->entry.header_offset & (pzip->m_file_offset_alignment - 1)) == 0);
-    }
-    zip->entry.dir_offset += num_alignment_padding_bytes + sizeof(zip->entry.header);
-
-    if (pzip->m_pWrite(pzip->m_pIO_opaque, zip->entry.dir_offset, zip->entry.name, entrylen)
-        != entrylen)
-    {
-        // Cannot write data to zip entry
-        err = ZIP_EWRTENT;
-        goto cleanup;
-    }
-
-    zip->entry.dir_offset += entrylen;
-
-    if (pzip->m_pWrite(pzip->m_pIO_opaque, zip->entry.dir_offset, extra_data, extra_size)
-        != extra_size)
-    {
-        // Cannot write ZIP64 data to zip entry
-        err = ZIP_EWRTENT;
-        goto cleanup;
-    }
-    zip->entry.dir_offset += extra_size;
-
-    if (level)
-    {
-        zip->entry.state.m_pZip                 = pzip;
-        zip->entry.state.m_cur_archive_file_ofs = zip->entry.dir_offset;
-        zip->entry.state.m_comp_size            = 0;
-
-        if (tdefl_init(
-              &(zip->entry.comp), mz_zip_writer_add_put_buf_callback, &(zip->entry.state),
-              (int) tdefl_create_comp_flags_from_zip_params((int) level, -15, MZ_DEFAULT_STRATEGY))
-            != TDEFL_STATUS_OKAY)
-        {
-            // Cannot initialize the zip compressor
-            err = ZIP_ETDEFLINIT;
-            goto cleanup;
-        }
-    }
-
-    return 0;
-
-cleanup:
-    CLEANUP(zip->entry.name);
-    return err;
-}
-
-int zip_entry_open(struct zip_t* zip, const char* entryname) {
-    return _zip_entry_open(zip, entryname, 0);
-}
-
-int zip_entry_opencasesensitive(struct zip_t* zip, const char* entryname) {
-    return _zip_entry_open(zip, entryname, 1);
-}
-
-int zip_entry_openbyindex(struct zip_t* zip, size_t index) {
-    mz_zip_archive*          pZip = NULL;
-    mz_zip_archive_file_stat stats;
-    mz_uint                  namelen;
-    const mz_uint8*          pHeader;
-    const char*              pFilename;
-
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    pZip = &(zip->archive);
-    if (pZip->m_zip_mode != MZ_ZIP_MODE_READING)
-    {
-        // open by index requires readonly mode
-        return ZIP_EINVMODE;
-    }
-
-    if (index >= (size_t) pZip->m_total_files)
-    {
-        // index out of range
-        return ZIP_EINVIDX;
-    }
-
-    if (!(pHeader = &MZ_ZIP_ARRAY_ELEMENT(
-            &pZip->m_pState->m_central_dir, mz_uint8,
-            MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, index))))
-    {
-        // cannot find header in central directory
-        return ZIP_ENOHDR;
-    }
-
-    namelen   = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS);
-    pFilename = (const char*) pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
-
-    if (zip->entry.name)
-    {
-        CLEANUP(zip->entry.name);
-    }
-
-    zip->entry.name = zip_strclone(pFilename, namelen);
-    if (!zip->entry.name)
-    {
-        // local entry name is NULL
-        return ZIP_EINVENTNAME;
-    }
-
-    if (!mz_zip_reader_file_stat(pZip, (mz_uint) index, &stats))
-    {
-        return ZIP_ENOENT;
-    }
-
-    zip->entry.index         = (ssize_t) index;
-    zip->entry.comp_size     = stats.m_comp_size;
-    zip->entry.uncomp_size   = stats.m_uncomp_size;
-    zip->entry.uncomp_crc32  = stats.m_crc32;
-    zip->entry.dir_offset    = stats.m_central_dir_ofs;
-    zip->entry.header_offset = stats.m_local_header_ofs;
-    zip->entry.method        = stats.m_method;
-    zip->entry.external_attr = stats.m_external_attr;
-#ifndef MINIZ_NO_TIME
-    zip->entry.m_time = stats.m_time;
-#endif
-
-    return 0;
-}
-
-int zip_entry_close(struct zip_t* zip) {
-    mz_zip_archive* pzip = NULL;
-    mz_uint         level;
-    tdefl_status    done;
-    mz_uint16       entrylen;
-    mz_uint16       dos_time = 0, dos_date = 0;
-    int             err         = 0;
-    mz_uint8*       pExtra_data = NULL;
-    mz_uint32       extra_size  = 0;
-    mz_uint8        extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE];
-    mz_uint8        local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64];
-    mz_uint32       local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64;
-
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        err = ZIP_ENOINIT;
-        goto cleanup;
-    }
-
-    pzip = &(zip->archive);
-    if (pzip->m_zip_mode == MZ_ZIP_MODE_READING)
-    {
-        goto cleanup;
-    }
-
-    level = zip->level & 0xF;
-    if (level)
-    {
-        done = tdefl_compress_buffer(&(zip->entry.comp), "", 0, TDEFL_FINISH);
-        if (done != TDEFL_STATUS_DONE && done != TDEFL_STATUS_OKAY)
-        {
-            // Cannot flush compressed buffer
-            err = ZIP_ETDEFLBUF;
-            goto cleanup;
-        }
-        zip->entry.comp_size  = zip->entry.state.m_comp_size;
-        zip->entry.dir_offset = zip->entry.state.m_cur_archive_file_ofs;
-        zip->entry.method     = MZ_DEFLATED;
-    }
-
-    entrylen = (mz_uint16) strlen(zip->entry.name);
-#ifndef MINIZ_NO_TIME
-    mz_zip_time_t_to_dos_time(zip->entry.m_time, &dos_time, &dos_date);
-#endif
-
-    MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID);
-    MZ_WRITE_LE32(local_dir_footer + 4, zip->entry.uncomp_crc32);
-    MZ_WRITE_LE64(local_dir_footer + 8, zip->entry.comp_size);
-    MZ_WRITE_LE64(local_dir_footer + 16, zip->entry.uncomp_size);
-
-    if (pzip->m_pWrite(pzip->m_pIO_opaque, zip->entry.dir_offset, local_dir_footer,
-                       local_dir_footer_size)
-        != local_dir_footer_size)
-    {
-        // Cannot write zip entry header
-        err = ZIP_EWRTHDR;
-        goto cleanup;
-    }
-    zip->entry.dir_offset += local_dir_footer_size;
-
-    pExtra_data = extra_data;
-    extra_size  = mz_zip_writer_create_zip64_extra_data(
-      extra_data, (zip->entry.uncomp_size >= MZ_UINT32_MAX) ? &zip->entry.uncomp_size : NULL,
-      (zip->entry.comp_size >= MZ_UINT32_MAX) ? &zip->entry.comp_size : NULL,
-      (zip->entry.header_offset >= MZ_UINT32_MAX) ? &zip->entry.header_offset : NULL);
-
-    if ((entrylen) && ISSLASH(zip->entry.name[entrylen - 1]) && !zip->entry.uncomp_size)
-    {
-        /* Set DOS Subdirectory attribute bit. */
-        zip->entry.external_attr |= MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG;
-    }
-
-    if (!mz_zip_writer_add_to_central_dir(
-          pzip, zip->entry.name, entrylen, pExtra_data, (mz_uint16) extra_size, "", 0,
-          zip->entry.uncomp_size, zip->entry.comp_size, zip->entry.uncomp_crc32, zip->entry.method,
-          MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8 | MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR, dos_time,
-          dos_date, zip->entry.header_offset, zip->entry.external_attr, NULL, 0))
-    {
-        // Cannot write to zip central dir
-        err = ZIP_EWRTDIR;
-        goto cleanup;
-    }
-
-    pzip->m_total_files++;
-    pzip->m_archive_size = zip->entry.dir_offset;
-
-cleanup:
-    if (zip)
-    {
-        zip->entry.m_time = 0;
-        zip->entry.index  = -1;
-        CLEANUP(zip->entry.name);
-    }
-    return err;
-}
-
-const char* zip_entry_name(struct zip_t* zip) {
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return NULL;
-    }
-    return zip->entry.name;
-}
-
-ssize_t zip_entry_index(struct zip_t* zip) {
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return (ssize_t) ZIP_ENOINIT;
-    }
-
-    return zip->entry.index;
-}
-
-int zip_entry_isdir(struct zip_t* zip) {
-    mz_uint16 entrylen;
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    if (zip->entry.index < (ssize_t) 0)
-    {
-        // zip entry is not opened
-        return ZIP_EINVIDX;
-    }
-
-    entrylen = (mz_uint16) strlen(zip->entry.name);
-    return ISSLASH(zip->entry.name[entrylen - 1]);
-}
-
-unsigned long long zip_entry_size(struct zip_t* zip) { return zip_entry_uncomp_size(zip); }
-
-unsigned long long zip_entry_uncomp_size(struct zip_t* zip) {
-    return zip ? zip->entry.uncomp_size : 0;
-}
-
-unsigned long long zip_entry_comp_size(struct zip_t* zip) { return zip ? zip->entry.comp_size : 0; }
-
-unsigned int zip_entry_crc32(struct zip_t* zip) { return zip ? zip->entry.uncomp_crc32 : 0; }
-
-unsigned long long zip_entry_dir_offset(struct zip_t* zip) {
-    return zip ? zip->entry.dir_offset : 0;
-}
-
-unsigned long long zip_entry_header_offset(struct zip_t* zip) {
-    return zip ? zip->entry.header_offset : 0;
-}
-
-int zip_entry_write(struct zip_t* zip, const void* buf, size_t bufsize) {
-    mz_uint         level;
-    mz_zip_archive* pzip = NULL;
-    tdefl_status    status;
-
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    pzip = &(zip->archive);
-    if (buf && bufsize > 0)
-    {
-        zip->entry.uncomp_size += bufsize;
-        zip->entry.uncomp_crc32 =
-          (mz_uint32) mz_crc32(zip->entry.uncomp_crc32, (const mz_uint8*) buf, bufsize);
-
-        level = zip->level & 0xF;
-        if (!level)
-        {
-            if ((pzip->m_pWrite(pzip->m_pIO_opaque, zip->entry.dir_offset, buf, bufsize)
-                 != bufsize))
-            {
-                // Cannot write buffer
-                return ZIP_EWRTENT;
-            }
-            zip->entry.dir_offset += bufsize;
-            zip->entry.comp_size += bufsize;
-        }
-        else
-        {
-            status = tdefl_compress_buffer(&(zip->entry.comp), buf, bufsize, TDEFL_NO_FLUSH);
-            if (status != TDEFL_STATUS_DONE && status != TDEFL_STATUS_OKAY)
-            {
-                // Cannot compress buffer
-                return ZIP_ETDEFLBUF;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int zip_entry_fwrite(struct zip_t* zip, const char* filename) {
-    int                        err    = 0;
-    size_t                     n      = 0;
-    MZ_FILE*                   stream = NULL;
-    mz_uint8                   buf[MZ_ZIP_MAX_IO_BUF_SIZE];
-    struct MZ_FILE_STAT_STRUCT file_stat;
-    mz_uint16                  modes;
-
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    memset(buf, 0, MZ_ZIP_MAX_IO_BUF_SIZE);
-    memset((void*) &file_stat, 0, sizeof(struct MZ_FILE_STAT_STRUCT));
-    if (MZ_FILE_STAT(filename, &file_stat) != 0)
-    {
-        // problem getting information - check errno
-        return ZIP_ENOENT;
-    }
-
-#if defined(_WIN32) || defined(__WIN32__) || defined(DJGPP)
-    (void) modes;  // unused
-#else
-    /* Initialize with permission bits--which are not implementation-optional */
-    modes = file_stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO | S_ISUID | S_ISGID | S_ISVTX);
-    if (S_ISDIR(file_stat.st_mode))
-        modes |= UNX_IFDIR;
-    if (S_ISREG(file_stat.st_mode))
-        modes |= UNX_IFREG;
-    if (S_ISLNK(file_stat.st_mode))
-        modes |= UNX_IFLNK;
-    if (S_ISBLK(file_stat.st_mode))
-        modes |= UNX_IFBLK;
-    if (S_ISCHR(file_stat.st_mode))
-        modes |= UNX_IFCHR;
-    if (S_ISFIFO(file_stat.st_mode))
-        modes |= UNX_IFIFO;
-    if (S_ISSOCK(file_stat.st_mode))
-        modes |= UNX_IFSOCK;
-    zip->entry.external_attr = (modes << 16) | !(file_stat.st_mode & S_IWUSR);
-    if ((file_stat.st_mode & S_IFMT) == S_IFDIR)
-    {
-        zip->entry.external_attr |= MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG;
-    }
-#endif
-
-    zip->entry.m_time = file_stat.st_mtime;
-
-    if (!(stream = MZ_FOPEN(filename, "rb")))
-    {
-        // Cannot open filename
-        return ZIP_EOPNFILE;
-    }
-
-    while ((n = fread(buf, sizeof(mz_uint8), MZ_ZIP_MAX_IO_BUF_SIZE, stream)) > 0)
-    {
-        if (zip_entry_write(zip, buf, n) < 0)
-        {
-            err = ZIP_EWRTENT;
-            break;
-        }
-    }
-    fclose(stream);
-
-    return err;
-}
-
-ssize_t zip_entry_read(struct zip_t* zip, void** buf, size_t* bufsize) {
-    mz_zip_archive* pzip = NULL;
-    mz_uint         idx;
-    size_t          size = 0;
-
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return (ssize_t) ZIP_ENOINIT;
-    }
-
-    pzip = &(zip->archive);
-    if (pzip->m_zip_mode != MZ_ZIP_MODE_READING || zip->entry.index < (ssize_t) 0)
-    {
-        // the entry is not found or we do not have read access
-        return (ssize_t) ZIP_ENOENT;
-    }
-
-    idx = (mz_uint) zip->entry.index;
-    if (mz_zip_reader_is_file_a_directory(pzip, idx))
-    {
-        // the entry is a directory
-        return (ssize_t) ZIP_EINVENTTYPE;
-    }
-
-    *buf = mz_zip_reader_extract_to_heap(pzip, idx, &size, 0);
-    if (*buf && bufsize)
-    {
-        *bufsize = size;
-    }
-    return (ssize_t) size;
-}
-
-ssize_t zip_entry_noallocread(struct zip_t* zip, void* buf, size_t bufsize) {
-    mz_zip_archive* pzip = NULL;
-
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return (ssize_t) ZIP_ENOINIT;
-    }
-
-    pzip = &(zip->archive);
-    if (pzip->m_zip_mode != MZ_ZIP_MODE_READING || zip->entry.index < (ssize_t) 0)
-    {
-        // the entry is not found or we do not have read access
-        return (ssize_t) ZIP_ENOENT;
-    }
-
-    if (!mz_zip_reader_extract_to_mem_no_alloc(pzip, (mz_uint) zip->entry.index, buf, bufsize, 0,
-                                               NULL, 0))
-    {
-        return (ssize_t) ZIP_EMEMNOALLOC;
-    }
-
-    return (ssize_t) zip->entry.uncomp_size;
-}
-
-int zip_entry_fread(struct zip_t* zip, const char* filename) {
-    mz_zip_archive*          pzip = NULL;
-    mz_uint                  idx;
-    mz_uint32                xattr = 0;
-    mz_zip_archive_file_stat info;
-
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    memset((void*) &info, 0, sizeof(mz_zip_archive_file_stat));
-    pzip = &(zip->archive);
-    if (pzip->m_zip_mode != MZ_ZIP_MODE_READING || zip->entry.index < (ssize_t) 0)
-    {
-        // the entry is not found or we do not have read access
-        return ZIP_ENOENT;
-    }
-
-    idx = (mz_uint) zip->entry.index;
-    if (mz_zip_reader_is_file_a_directory(pzip, idx))
-    {
-        // the entry is a directory
-        return ZIP_EINVENTTYPE;
-    }
-
-    if (!mz_zip_reader_extract_to_file(pzip, idx, filename, 0))
-    {
-        return ZIP_ENOFILE;
-    }
-
-#if defined(_MSC_VER) || defined(PS4)
-    (void) xattr;  // unused
-#else
-    if (!mz_zip_reader_file_stat(pzip, idx, &info))
-    {
-        // Cannot get information about zip archive;
-        return ZIP_ENOFILE;
-    }
-
-    xattr = (info.m_external_attr >> 16) & 0xFFFF;
-    if (xattr > 0 && xattr <= MZ_UINT16_MAX)
-    {
-        if (CHMOD(filename, (mode_t) xattr) < 0)
-        {
-            return ZIP_ENOPERM;
-        }
-    }
-#endif
-
-    return 0;
-}
-
-int zip_entry_extract(
-  struct zip_t* zip,
-  size_t (*on_extract)(void* arg, uint64_t offset, const void* buf, size_t bufsize),
-  void* arg) {
-    mz_zip_archive* pzip = NULL;
-    mz_uint         idx;
-
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    pzip = &(zip->archive);
-    if (pzip->m_zip_mode != MZ_ZIP_MODE_READING || zip->entry.index < (ssize_t) 0)
-    {
-        // the entry is not found or we do not have read access
-        return ZIP_ENOENT;
-    }
-
-    idx = (mz_uint) zip->entry.index;
-    return (mz_zip_reader_extract_to_callback(pzip, idx, on_extract, arg, 0)) ? 0 : ZIP_EINVIDX;
-}
-
-ssize_t zip_entries_total(struct zip_t* zip) {
-    if (!zip)
-    {
-        // zip_t handler is not initialized
-        return ZIP_ENOINIT;
-    }
-
-    return (ssize_t) zip->archive.m_total_files;
-}
-
-ssize_t zip_entries_delete(struct zip_t* zip, char* const entries[], size_t len) {
-    ssize_t                  n          = 0;
-    ssize_t                  err        = 0;
-    struct zip_entry_mark_t* entry_mark = NULL;
-
-    if (zip == NULL || (entries == NULL && len != 0))
-    {
-        return ZIP_ENOINIT;
-    }
-
-    if (entries == NULL && len == 0)
-    {
-        return 0;
-    }
-
-    n = zip_entries_total(zip);
-    if (n < 0)
-    {
-        return n;
-    }
-
-    entry_mark = (struct zip_entry_mark_t*) calloc((size_t) n, sizeof(struct zip_entry_mark_t));
-    if (!entry_mark)
-    {
-        return ZIP_EOOMEM;
-    }
-
-    zip->archive.m_zip_mode = MZ_ZIP_MODE_READING;
-
-    err = zip_entry_set(zip, entry_mark, (size_t) n, entries, len);
-    if (err < 0)
-    {
-        CLEANUP(entry_mark);
-        return err;
-    }
-
-    err = zip_entries_delete_mark(zip, entry_mark, (int) n);
-    CLEANUP(entry_mark);
-    return err;
-}
-
-ssize_t zip_entries_deletebyindex(struct zip_t* zip, size_t entries[], size_t len) {
-    ssize_t                  n          = 0;
-    ssize_t                  err        = 0;
-    struct zip_entry_mark_t* entry_mark = NULL;
-
-    if (zip == NULL || (entries == NULL && len != 0))
-    {
-        return ZIP_ENOINIT;
-    }
-
-    if (entries == NULL && len == 0)
-    {
-        return 0;
-    }
-
-    n = zip_entries_total(zip);
-    if (n < 0)
-    {
-        return n;
-    }
-
-    entry_mark = (struct zip_entry_mark_t*) calloc((size_t) n, sizeof(struct zip_entry_mark_t));
-    if (!entry_mark)
-    {
-        return ZIP_EOOMEM;
-    }
-
-    zip->archive.m_zip_mode = MZ_ZIP_MODE_READING;
-
-    err = zip_entry_setbyindex(zip, entry_mark, (size_t) n, entries, len);
-    if (err < 0)
-    {
-        CLEANUP(entry_mark);
-        return err;
-    }
-
-    err = zip_entries_delete_mark(zip, entry_mark, (int) n);
-    CLEANUP(entry_mark);
-    return err;
-}
-
-int zip_stream_extract(const char* stream,
-                       size_t      size,
-                       const char* dir,
-                       int (*on_extract)(const char* filename, void* arg),
-                       void* arg) {
-    mz_zip_archive zip_archive;
-    if (!stream || !dir)
-    {
-        // Cannot parse zip archive stream
-        return ZIP_ENOINIT;
-    }
-    if (!memset(&zip_archive, 0, sizeof(mz_zip_archive)))
-    {
-        // Cannot memset zip archive
-        return ZIP_EMEMSET;
-    }
-    if (!mz_zip_reader_init_mem(&zip_archive, stream, size, 0))
-    {
-        // Cannot initialize zip_archive reader
-        return ZIP_ENOINIT;
-    }
-
-    return zip_archive_extract(&zip_archive, dir, on_extract, arg);
-}
-
-struct zip_t* zip_stream_open(const char* stream, size_t size, int level, char mode) {
-    int errnum = 0;
-    return zip_stream_openwitherror(stream, size, level, mode, &errnum);
-}
-
-struct zip_t*
-zip_stream_openwitherror(const char* stream, size_t size, int level, char mode, int* errnum) {
-    struct zip_t* zip = (struct zip_t*) calloc((size_t) 1, sizeof(struct zip_t));
-    if (!zip)
-    {
-        // out of memory
-        *errnum = ZIP_EOOMEM;
-        return NULL;
-    }
-
-    if (level < 0)
-    {
-        level = MZ_DEFAULT_LEVEL;
-    }
-    if ((level & 0xF) > MZ_UBER_COMPRESSION)
-    {
-        // Wrong compression level
-        *errnum = ZIP_EINVLVL;
-        goto cleanup;
-    }
-    zip->level = (mz_uint) level;
-
-    if ((stream != NULL) && (size > 0) && (mode == 'r'))
-    {
-        if (!mz_zip_reader_init_mem(&(zip->archive), stream, size, 0))
-        {
-            *errnum = ZIP_ERINIT;
-            goto cleanup;
-        }
-    }
-    else if ((stream == NULL) && (size == 0) && (mode == 'w'))
-    {
-        // Create a new archive.
-        if (!mz_zip_writer_init_heap(&(zip->archive), 0, 1024))
-        {
-            // Cannot initialize zip_archive writer
-            *errnum = ZIP_EWINIT;
-            goto cleanup;
-        }
-    }
-    else
-    {
-        *errnum = ZIP_EINVMODE;
-        goto cleanup;
-    }
-
-    *errnum = 0;
-    return zip;
-
-cleanup:
-    CLEANUP(zip);
-    return NULL;
-}
-
-ssize_t zip_stream_copy(struct zip_t* zip, void** buf, size_t* bufsize) {
-    size_t n;
-
-    if (!zip)
-    {
-        return (ssize_t) ZIP_ENOINIT;
-    }
-    zip_archive_finalize(&(zip->archive));
-
-    n = (size_t) zip->archive.m_archive_size;
-    if (bufsize != NULL)
-    {
-        *bufsize = n;
-    }
-
-    *buf = calloc(n, sizeof(unsigned char));
-    memcpy(*buf, zip->archive.m_pState->m_pMem, n);
-
-    return (ssize_t) n;
-}
-
-void zip_stream_close(struct zip_t* zip) {
-    if (zip)
-    {
-        mz_zip_writer_end(&(zip->archive));
-        mz_zip_reader_end(&(zip->archive));
-        CLEANUP(zip);
-    }
-}
-
-struct zip_t* zip_cstream_open(FILE* stream, int level, char mode) {
-    int errnum = 0;
-    return zip_cstream_openwitherror(stream, level, mode, &errnum);
-}
-
-struct zip_t* zip_cstream_openwitherror(FILE* stream, int level, char mode, int* errnum) {
-    struct zip_t* zip = NULL;
-    *errnum           = 0;
-    if (!stream)
-    {
-        // zip archive stream is NULL
-        *errnum = ZIP_ENOFILE;
-        goto cleanup;
-    }
-
-    if (level < 0)
-        level = MZ_DEFAULT_LEVEL;
-    if ((level & 0xF) > MZ_UBER_COMPRESSION)
-    {
-        // Wrong compression level
-        *errnum = ZIP_EINVLVL;
-        goto cleanup;
-    }
-
-    zip = (struct zip_t*) calloc((size_t) 1, sizeof(struct zip_t));
-    if (!zip)
-    {
-        // out of memory
-        *errnum = ZIP_EOOMEM;
-        goto cleanup;
-    }
-
-    zip->level = (mz_uint) level;
-    switch (mode)
-    {
-    case 'w' :
-        // Create a new archive.
-        if (!mz_zip_writer_init_cfile(&(zip->archive), stream, MZ_ZIP_FLAG_WRITE_ZIP64))
-        {
-            // Cannot initialize zip_archive writer
-            *errnum = ZIP_EWINIT;
-            goto cleanup;
-        }
-        break;
-
-    case 'r' :
-        if (!mz_zip_reader_init_cfile(&(zip->archive), stream, 0,
-                                      zip->level | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
-        {
-            // An archive file does not exist or cannot initialize
-            // zip_archive reader
-            *errnum = ZIP_ERINIT;
-            goto cleanup;
-        }
-        break;
-
-    case 'a' :
-    case 'd' :
-        if (!mz_zip_reader_init_cfile(&(zip->archive), stream, 0,
-                                      zip->level | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
-        {
-            // An archive file does not exist or cannot initialize
-            // zip_archive reader
-            *errnum = ZIP_ERINIT;
-            goto cleanup;
-        }
-        if ((mode == 'a' || mode == 'd'))
-        {
-            if (!mz_zip_writer_init_from_reader_v2(&(zip->archive), NULL, 0))
-            {
-                *errnum = ZIP_EWRINIT;
-                mz_zip_reader_end(&(zip->archive));
-                goto cleanup;
-            }
-        }
-        break;
-
-    default :
-        *errnum = ZIP_EINVMODE;
-        goto cleanup;
-    }
-
-    return zip;
-
-cleanup:
-    CLEANUP(zip);
-    return NULL;
-}
-
-void zip_cstream_close(struct zip_t* zip) { zip_close(zip); }
-
-int zip_create(const char* zipname, const char* filenames[], size_t len) {
-    int                        err = 0;
-    size_t                     i;
-    mz_zip_archive             zip_archive;
-    struct MZ_FILE_STAT_STRUCT file_stat;
-    mz_uint32                  ext_attributes = 0;
-    mz_uint16                  modes;
-
-    if (!zipname || strlen(zipname) < 1)
-    {
-        // zip_t archive name is empty or NULL
-        return ZIP_EINVZIPNAME;
-    }
-
-    // Create a new archive.
-    if (!memset(&(zip_archive), 0, sizeof(zip_archive)))
-    {
-        // Cannot memset zip archive
-        return ZIP_EMEMSET;
-    }
-
-    if (!mz_zip_writer_init_file(&zip_archive, zipname, 0))
-    {
-        // Cannot initialize zip_archive writer
-        return ZIP_ENOINIT;
-    }
-
-    if (!memset((void*) &file_stat, 0, sizeof(struct MZ_FILE_STAT_STRUCT)))
-    {
-        return ZIP_EMEMSET;
-    }
-
-    for (i = 0; i < len; ++i)
-    {
-        const char* name = filenames[i];
-        if (!name)
-        {
-            err = ZIP_EINVENTNAME;
-            break;
-        }
-
-        if (MZ_FILE_STAT(name, &file_stat) != 0)
-        {
-            // problem getting information - check errno
-            err = ZIP_ENOFILE;
-            break;
-        }
-
-#if defined(_WIN32) || defined(__WIN32__) || defined(DJGPP)
-        (void) modes;  // unused
-#else
-
-        /* Initialize with permission bits--which are not implementation-optional */
-        modes = file_stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO | S_ISUID | S_ISGID | S_ISVTX);
-        if (S_ISDIR(file_stat.st_mode))
-            modes |= UNX_IFDIR;
-        if (S_ISREG(file_stat.st_mode))
-            modes |= UNX_IFREG;
-        if (S_ISLNK(file_stat.st_mode))
-            modes |= UNX_IFLNK;
-        if (S_ISBLK(file_stat.st_mode))
-            modes |= UNX_IFBLK;
-        if (S_ISCHR(file_stat.st_mode))
-            modes |= UNX_IFCHR;
-        if (S_ISFIFO(file_stat.st_mode))
-            modes |= UNX_IFIFO;
-        if (S_ISSOCK(file_stat.st_mode))
-            modes |= UNX_IFSOCK;
-        ext_attributes = (modes << 16) | !(file_stat.st_mode & S_IWUSR);
-        if ((file_stat.st_mode & S_IFMT) == S_IFDIR)
-        {
-            ext_attributes |= MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG;
-        }
-#endif
-
-        if (!mz_zip_writer_add_file(&zip_archive, zip_basename(name), name, "", 0,
-                                    ZIP_DEFAULT_COMPRESSION_LEVEL, ext_attributes))
-        {
-            // Cannot add file to zip_archive
-            err = ZIP_ENOFILE;
-            break;
-        }
-    }
-
-    mz_zip_writer_finalize_archive(&zip_archive);
-    mz_zip_writer_end(&zip_archive);
-    return err;
-}
-
-int zip_extract(const char* zipname,
-                const char* dir,
-                int (*on_extract)(const char* filename, void* arg),
-                void* arg) {
-    mz_zip_archive zip_archive;
-
-    if (!zipname || !dir)
-    {
-        // Cannot parse zip archive name
-        return ZIP_EINVZIPNAME;
-    }
-
-    if (!memset(&zip_archive, 0, sizeof(mz_zip_archive)))
-    {
-        // Cannot memset zip archive
-        return ZIP_EMEMSET;
-    }
-
-    // Now try to open the archive.
-    if (!mz_zip_reader_init_file(&zip_archive, zipname, 0))
-    {
-        // Cannot initialize zip_archive reader
-        return ZIP_ENOINIT;
-    }
-
-    return zip_archive_extract(&zip_archive, dir, on_extract, arg);
-}
diff --git a/src/external/zip.h b/src/external/zip.h
deleted file mode 100644
index ca81a682..00000000
--- a/src/external/zip.h
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#pragma once
-#ifndef ZIP_H
-    #define ZIP_H
-
-    #include <stdint.h>
-    #include <stdio.h>
-    #include <string.h>
-    #include <sys/types.h>
-
-    #ifndef ZIP_SHARED
-        #define ZIP_EXPORT
-    #else
-        #ifdef _WIN32
-            #ifdef ZIP_BUILD_SHARED
-                #define ZIP_EXPORT __declspec(dllexport)
-            #else
-                #define ZIP_EXPORT __declspec(dllimport)
-            #endif
-        #else
-            #define ZIP_EXPORT __attribute__((visibility("default")))
-        #endif
-    #endif
-
-    #ifdef __cplusplus
-extern "C" {
-    #endif
-
-    #if !defined(_POSIX_C_SOURCE) && defined(_MSC_VER)
-        // 64-bit Windows is the only mainstream platform
-        // where sizeof(long) != sizeof(void*)
-        #ifdef _WIN64
-typedef long long ssize_t; /* byte count or error */
-        #else
-typedef long ssize_t; /* byte count or error */
-        #endif
-    #endif
-
-    /**
- * @mainpage
- *
- * Documentation for @ref zip.
- */
-
-    /**
- * @addtogroup zip
- * @{
- */
-
-    /**
- * Default zip compression level.
- */
-    #define ZIP_DEFAULT_COMPRESSION_LEVEL 6
-
-    /**
- * Error codes
- */
-    #define ZIP_ENOINIT -1       // not initialized
-    #define ZIP_EINVENTNAME -2   // invalid entry name
-    #define ZIP_ENOENT -3        // entry not found
-    #define ZIP_EINVMODE -4      // invalid zip mode
-    #define ZIP_EINVLVL -5       // invalid compression level
-    #define ZIP_ENOSUP64 -6      // no zip 64 support
-    #define ZIP_EMEMSET -7       // memset error
-    #define ZIP_EWRTENT -8       // cannot write data to entry
-    #define ZIP_ETDEFLINIT -9    // cannot initialize tdefl compressor
-    #define ZIP_EINVIDX -10      // invalid index
-    #define ZIP_ENOHDR -11       // header not found
-    #define ZIP_ETDEFLBUF -12    // cannot flush tdefl buffer
-    #define ZIP_ECRTHDR -13      // cannot create entry header
-    #define ZIP_EWRTHDR -14      // cannot write entry header
-    #define ZIP_EWRTDIR -15      // cannot write to central dir
-    #define ZIP_EOPNFILE -16     // cannot open file
-    #define ZIP_EINVENTTYPE -17  // invalid entry type
-    #define ZIP_EMEMNOALLOC -18  // extracting data using no memory allocation
-    #define ZIP_ENOFILE -19      // file not found
-    #define ZIP_ENOPERM -20      // no permission
-    #define ZIP_EOOMEM -21       // out of memory
-    #define ZIP_EINVZIPNAME -22  // invalid zip archive name
-    #define ZIP_EMKDIR -23       // make dir error
-    #define ZIP_ESYMLINK -24     // symlink error
-    #define ZIP_ECLSZIP -25      // close archive error
-    #define ZIP_ECAPSIZE -26     // capacity size too small
-    #define ZIP_EFSEEK -27       // fseek error
-    #define ZIP_EFREAD -28       // fread error
-    #define ZIP_EFWRITE -29      // fwrite error
-    #define ZIP_ERINIT -30       // cannot initialize reader
-    #define ZIP_EWINIT -31       // cannot initialize writer
-    #define ZIP_EWRINIT -32      // cannot initialize writer from reader
-
-/**
- * Looks up the error message string corresponding to an error number.
- * @param errnum error number
- * @return error message string corresponding to errnum or NULL if error is not
- * found.
- */
-extern ZIP_EXPORT const char* zip_strerror(int errnum);
-
-/**
- * @struct zip_t
- *
- * This data structure is used throughout the library to represent zip archive -
- * forward declaration.
- */
-struct zip_t;
-
-/**
- * Opens zip archive with compression level using the given mode.
- *
- * @param zipname zip archive file name.
- * @param level compression level (0-9 are the standard zlib-style levels).
- * @param mode file access mode.
- *        - 'r': opens a file for reading/extracting (the file must exists).
- *        - 'w': creates an empty file for writing.
- *        - 'a': appends to an existing archive.
- *
- * @return the zip archive handler or NULL on error
- */
-extern ZIP_EXPORT struct zip_t* zip_open(const char* zipname, int level, char mode);
-
-/**
- * Opens zip archive with compression level using the given mode.
- * The function additionally returns @param errnum -
- *
- * @param zipname zip archive file name.
- * @param level compression level (0-9 are the standard zlib-style levels).
- * @param mode file access mode.
- *        - 'r': opens a file for reading/extracting (the file must exists).
- *        - 'w': creates an empty file for writing.
- *        - 'a': appends to an existing archive.
- * @param errnum 0 on success, negative number (< 0) on error.
- *
- * @return the zip archive handler or NULL on error
- */
-extern ZIP_EXPORT struct zip_t*
-zip_openwitherror(const char* zipname, int level, char mode, int* errnum);
-
-/**
- * Closes the zip archive, releases resources - always finalize.
- *
- * @param zip zip archive handler.
- */
-extern ZIP_EXPORT void zip_close(struct zip_t* zip);
-
-/**
- * Determines if the archive has a zip64 end of central directory headers.
- *
- * @param zip zip archive handler.
- *
- * @return the return code - 1 (true), 0 (false), negative number (< 0) on
- *         error.
- */
-extern ZIP_EXPORT int zip_is64(struct zip_t* zip);
-
-/**
- * Returns the offset in the stream where the zip header is located.
- *
- * @param zip zip archive handler.
- * @param offset zip header offset.
- *
- * @return the return code - 0 if successful, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_offset(struct zip_t* zip, uint64_t* offset);
-
-/**
- * Opens an entry by name in the zip archive.
- *
- * For zip archive opened in 'w' or 'a' mode the function will append
- * a new entry. In readonly mode the function tries to locate the entry
- * in global dictionary.
- *
- * @param zip zip archive handler.
- * @param entryname an entry name in local dictionary.
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_entry_open(struct zip_t* zip, const char* entryname);
-
-/**
- * Opens an entry by name in the zip archive.
- *
- * For zip archive opened in 'w' or 'a' mode the function will append
- * a new entry. In readonly mode the function tries to locate the entry
- * in global dictionary (case sensitive).
- *
- * @param zip zip archive handler.
- * @param entryname an entry name in local dictionary (case sensitive).
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_entry_opencasesensitive(struct zip_t* zip, const char* entryname);
-
-/**
- * Opens a new entry by index in the zip archive.
- *
- * This function is only valid if zip archive was opened in 'r' (readonly) mode.
- *
- * @param zip zip archive handler.
- * @param index index in local dictionary.
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_entry_openbyindex(struct zip_t* zip, size_t index);
-
-/**
- * Closes a zip entry, flushes buffer and releases resources.
- *
- * @param zip zip archive handler.
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_entry_close(struct zip_t* zip);
-
-/**
- * Returns a local name of the current zip entry.
- *
- * The main difference between user's entry name and local entry name
- * is optional relative path.
- * Following .ZIP File Format Specification - the path stored MUST not contain
- * a drive or device letter, or a leading slash.
- * All slashes MUST be forward slashes '/' as opposed to backwards slashes '\'
- * for compatibility with Amiga and UNIX file systems etc.
- *
- * @param zip: zip archive handler.
- *
- * @return the pointer to the current zip entry name, or NULL on error.
- */
-extern ZIP_EXPORT const char* zip_entry_name(struct zip_t* zip);
-
-/**
- * Returns an index of the current zip entry.
- *
- * @param zip zip archive handler.
- *
- * @return the index on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT ssize_t zip_entry_index(struct zip_t* zip);
-
-/**
- * Determines if the current zip entry is a directory entry.
- *
- * @param zip zip archive handler.
- *
- * @return the return code - 1 (true), 0 (false), negative number (< 0) on
- *         error.
- */
-extern ZIP_EXPORT int zip_entry_isdir(struct zip_t* zip);
-
-/**
- * Returns the uncompressed size of the current zip entry.
- * Alias for zip_entry_uncomp_size (for backward compatibility).
- *
- * @param zip zip archive handler.
- *
- * @return the uncompressed size in bytes.
- */
-extern ZIP_EXPORT unsigned long long zip_entry_size(struct zip_t* zip);
-
-/**
- * Returns the uncompressed size of the current zip entry.
- *
- * @param zip zip archive handler.
- *
- * @return the uncompressed size in bytes.
- */
-extern ZIP_EXPORT unsigned long long zip_entry_uncomp_size(struct zip_t* zip);
-
-/**
- * Returns the compressed size of the current zip entry.
- *
- * @param zip zip archive handler.
- *
- * @return the compressed size in bytes.
- */
-extern ZIP_EXPORT unsigned long long zip_entry_comp_size(struct zip_t* zip);
-
-/**
- * Returns CRC-32 checksum of the current zip entry.
- *
- * @param zip zip archive handler.
- *
- * @return the CRC-32 checksum.
- */
-extern ZIP_EXPORT unsigned int zip_entry_crc32(struct zip_t* zip);
-
-/**
- * Returns byte offset of the current zip entry
- * in the archive's central directory.
- *
- * @param zip zip archive handler.
- *
- * @return the offset in bytes.
- */
-extern ZIP_EXPORT unsigned long long zip_entry_dir_offset(struct zip_t* zip);
-
-/**
- * Returns the current zip entry's local header file offset in bytes.
- *
- * @param zip zip archive handler.
- *
- * @return the entry's local header file offset in bytes.
- */
-extern ZIP_EXPORT unsigned long long zip_entry_header_offset(struct zip_t* zip);
-
-/**
- * Compresses an input buffer for the current zip entry.
- *
- * @param zip zip archive handler.
- * @param buf input buffer.
- * @param bufsize input buffer size (in bytes).
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_entry_write(struct zip_t* zip, const void* buf, size_t bufsize);
-
-/**
- * Compresses a file for the current zip entry.
- *
- * @param zip zip archive handler.
- * @param filename input file.
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_entry_fwrite(struct zip_t* zip, const char* filename);
-
-/**
- * Extracts the current zip entry into output buffer.
- *
- * The function allocates sufficient memory for a output buffer.
- *
- * @param zip zip archive handler.
- * @param buf output buffer.
- * @param bufsize output buffer size (in bytes).
- *
- * @note remember to release memory allocated for a output buffer.
- *       for large entries, please take a look at zip_entry_extract function.
- *
- * @return the return code - the number of bytes actually read on success.
- *         Otherwise a negative number (< 0) on error.
- */
-extern ZIP_EXPORT ssize_t zip_entry_read(struct zip_t* zip, void** buf, size_t* bufsize);
-
-/**
- * Extracts the current zip entry into a memory buffer using no memory
- * allocation.
- *
- * @param zip zip archive handler.
- * @param buf preallocated output buffer.
- * @param bufsize output buffer size (in bytes).
- *
- * @note ensure supplied output buffer is large enough.
- *       zip_entry_size function (returns uncompressed size for the current
- *       entry) can be handy to estimate how big buffer is needed.
- *       For large entries, please take a look at zip_entry_extract function.
- *
- * @return the return code - the number of bytes actually read on success.
- *         Otherwise a negative number (< 0) on error (e.g. bufsize is not large
- * enough).
- */
-extern ZIP_EXPORT ssize_t zip_entry_noallocread(struct zip_t* zip, void* buf, size_t bufsize);
-
-/**
- * Extracts the current zip entry into output file.
- *
- * @param zip zip archive handler.
- * @param filename output file.
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_entry_fread(struct zip_t* zip, const char* filename);
-
-/**
- * Extracts the current zip entry using a callback function (on_extract).
- *
- * @param zip zip archive handler.
- * @param on_extract callback function.
- * @param arg opaque pointer (optional argument, which you can pass to the
- *        on_extract callback)
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int
-zip_entry_extract(struct zip_t* zip,
-                  size_t (*on_extract)(void* arg, uint64_t offset, const void* data, size_t size),
-                  void* arg);
-
-/**
- * Returns the number of all entries (files and directories) in the zip archive.
- *
- * @param zip zip archive handler.
- *
- * @return the return code - the number of entries on success, negative number
- *         (< 0) on error.
- */
-extern ZIP_EXPORT ssize_t zip_entries_total(struct zip_t* zip);
-
-/**
- * Deletes zip archive entries.
- *
- * @param zip zip archive handler.
- * @param entries array of zip archive entries to be deleted.
- * @param len the number of entries to be deleted.
- * @return the number of deleted entries, or negative number (< 0) on error.
- */
-extern ZIP_EXPORT ssize_t zip_entries_delete(struct zip_t* zip, char* const entries[], size_t len);
-
-/**
- * Deletes zip archive entries.
- *
- * @param zip zip archive handler.
- * @param entries array of zip archive entries indices to be deleted.
- * @param len the number of entries to be deleted.
- * @return the number of deleted entries, or negative number (< 0) on error.
- */
-extern ZIP_EXPORT ssize_t zip_entries_deletebyindex(struct zip_t* zip,
-                                                    size_t        entries[],
-                                                    size_t        len);
-
-/**
- * Extracts a zip archive stream into directory.
- *
- * If on_extract is not NULL, the callback will be called after
- * successfully extracted each zip entry.
- * Returning a negative value from the callback will cause abort and return an
- * error. The last argument (void *arg) is optional, which you can use to pass
- * data to the on_extract callback.
- *
- * @param stream zip archive stream.
- * @param size stream size.
- * @param dir output directory.
- * @param on_extract on extract callback.
- * @param arg opaque pointer.
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_stream_extract(const char* stream,
-                                         size_t      size,
-                                         const char* dir,
-                                         int (*on_extract)(const char* filename, void* arg),
-                                         void* arg);
-
-/**
- * Opens zip archive stream into memory.
- *
- * @param stream zip archive stream.
- * @param size stream size.
- * @param level compression level (0-9 are the standard zlib-style levels).
- * @param mode file access mode.
- *        - 'r': opens a file for reading/extracting (the file must exists).
- *        - 'w': creates an empty file for writing.
- *        - 'a': appends to an existing archive.
- *
- * @return the zip archive handler or NULL on error
- */
-extern ZIP_EXPORT struct zip_t*
-zip_stream_open(const char* stream, size_t size, int level, char mode);
-
-/**
- * Opens zip archive stream into memory.
- * The function additionally returns @param errnum -
- *
- * @param stream zip archive stream.
- * @param size stream size.*
- * @param level compression level (0-9 are the standard zlib-style levels).
- * @param mode file access mode.
- *        - 'r': opens a file for reading/extracting (the file must exists).
- *        - 'w': creates an empty file for writing.
- *        - 'a': appends to an existing archive.
- * @param errnum 0 on success, negative number (< 0) on error.
- *
- * @return the zip archive handler or NULL on error
- */
-extern ZIP_EXPORT struct zip_t*
-zip_stream_openwitherror(const char* stream, size_t size, int level, char mode, int* errnum);
-
-/**
- * Copy zip archive stream output buffer.
- *
- * @param zip zip archive handler.
- * @param buf output buffer. User should free buf.
- * @param bufsize output buffer size (in bytes).
- *
- * @return copy size
- */
-extern ZIP_EXPORT ssize_t zip_stream_copy(struct zip_t* zip, void** buf, size_t* bufsize);
-
-/**
- * Close zip archive releases resources.
- *
- * @param zip zip archive handler.
- *
- * @return
- */
-extern ZIP_EXPORT void zip_stream_close(struct zip_t* zip);
-
-/**
- * Opens zip archive from existing FILE stream with compression level using the
- * given mode. The stream will not be closed when calling zip_close.
- *
- * @param stream C FILE stream.
- * @param level compression level (0-9 are the standard zlib-style levels).
- * @param mode file access mode. This mode should be equivalent to the mode
- * provided when opening the file.
- *        - 'r': opens a file for reading/extracting (the file must exists).
- *        - 'w': creates an empty file for writing.
- *        - 'a': appends to an existing archive.
- *
- * @return the zip archive handler or NULL on error
- */
-extern ZIP_EXPORT struct zip_t* zip_cstream_open(FILE* stream, int level, char mode);
-
-/**
- * Opens zip archive from existing FILE stream with compression level using the
- * given mode. The function additionally returns @param errnum - The stream will
- * not be closed when calling zip_close.
- *
- * @param stream C FILE stream.
- * @param level compression level (0-9 are the standard zlib-style levels).
- * @param mode file access mode.
- *        - 'r': opens a file for reading/extracting (the file must exists).
- *        - 'w': creates an empty file for writing.
- *        - 'a': appends to an existing archive.
- * @param errnum 0 on success, negative number (< 0) on error.
- *
- * @return the zip archive handler or NULL on error
- */
-extern ZIP_EXPORT struct zip_t*
-zip_cstream_openwitherror(FILE* stream, int level, char mode, int* errnum);
-
-/**
- * Closes the zip archive, releases resources - always finalize.
- * This function is an alias for zip_close function.
- *
- * @param zip zip archive handler.
- */
-extern ZIP_EXPORT void zip_cstream_close(struct zip_t* zip);
-
-/**
- * Creates a new archive and puts files into a single zip archive.
- *
- * @param zipname zip archive file.
- * @param filenames input files.
- * @param len: number of input files.
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_create(const char* zipname, const char* filenames[], size_t len);
-
-/**
- * Extracts a zip archive file into directory.
- *
- * If on_extract_entry is not NULL, the callback will be called after
- * successfully extracted each zip entry.
- * Returning a negative value from the callback will cause abort and return an
- * error. The last argument (void *arg) is optional, which you can use to pass
- * data to the on_extract_entry callback.
- *
- * @param zipname zip archive file.
- * @param dir output directory.
- * @param on_extract_entry on extract callback.
- * @param arg opaque pointer.
- *
- * @return the return code - 0 on success, negative number (< 0) on error.
- */
-extern ZIP_EXPORT int zip_extract(const char* zipname,
-                                  const char* dir,
-                                  int (*on_extract_entry)(const char* filename, void* arg),
-                                  void* arg);
-    /** @} */
-    #ifdef __cplusplus
-}
-    #endif
-
-#endif
diff --git a/src/external/zstd.h b/src/external/zstd.h
new file mode 100644
index 00000000..70d34fdd
--- /dev/null
+++ b/src/external/zstd.h
@@ -0,0 +1,3284 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#ifndef ZSTD_H_235446
+    #define ZSTD_H_235446
+
+    /* ======   Dependencies   ======*/
+    #include <limits.h> /* INT_MAX */
+    #include <stddef.h> /* size_t */
+
+
+    /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+    #ifndef ZSTDLIB_VISIBLE
+    /* Backwards compatibility with old macro name */
+        #ifdef ZSTDLIB_VISIBILITY
+            #define ZSTDLIB_VISIBLE ZSTDLIB_VISIBILITY
+        #elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+            #define ZSTDLIB_VISIBLE __attribute__((visibility("default")))
+        #else
+            #define ZSTDLIB_VISIBLE
+        #endif
+    #endif
+
+    #ifndef ZSTDLIB_HIDDEN
+        #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+            #define ZSTDLIB_HIDDEN __attribute__((visibility("hidden")))
+        #else
+            #define ZSTDLIB_HIDDEN
+        #endif
+    #endif
+
+    #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT == 1)
+        #define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBLE
+    #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT == 1)
+        #define ZSTDLIB_API \
+            __declspec(dllimport) \
+            ZSTDLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+    #else
+        #define ZSTDLIB_API ZSTDLIB_VISIBLE
+    #endif
+
+    /* Deprecation warnings :
+ * Should these warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+ */
+    #ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+        #define ZSTD_DEPRECATED(message) /* disable deprecation warnings */
+    #else
+        #if defined(__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+            #define ZSTD_DEPRECATED(message) [[deprecated(message)]]
+        #elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) \
+          || defined(__clang__) || defined(__IAR_SYSTEMS_ICC__)
+            #define ZSTD_DEPRECATED(message) __attribute__((deprecated(message)))
+        #elif defined(__GNUC__) && (__GNUC__ >= 3)
+            #define ZSTD_DEPRECATED(message) __attribute__((deprecated))
+        #elif defined(_MSC_VER)
+            #define ZSTD_DEPRECATED(message) __declspec(deprecated(message))
+        #else
+            #pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+            #define ZSTD_DEPRECATED(message)
+        #endif
+    #endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
+
+
+    /*******************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
+
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
+
+    /*------   Version   ------*/
+    #define ZSTD_VERSION_MAJOR 1
+    #define ZSTD_VERSION_MINOR 5
+    #define ZSTD_VERSION_RELEASE 6
+    #define ZSTD_VERSION_NUMBER \
+        (ZSTD_VERSION_MAJOR * 100 * 100 + ZSTD_VERSION_MINOR * 100 + ZSTD_VERSION_RELEASE)
+
+/*! ZSTD_versionNumber() :
+ *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);
+
+    #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+    #define ZSTD_QUOTE(str) #str
+    #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+    #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
+
+/*! ZSTD_versionString() :
+ *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
+ZSTDLIB_API const char* ZSTD_versionString(void);
+
+    /* *************************************
+ *  Default constant
+ ***************************************/
+    #ifndef ZSTD_CLEVEL_DEFAULT
+        #define ZSTD_CLEVEL_DEFAULT 3
+    #endif
+
+    /* *************************************
+ *  Constants
+ ***************************************/
+
+    /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+    #define ZSTD_MAGICNUMBER 0xFD2FB528      /* valid since v0.8.0 */
+    #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */
+    #define ZSTD_MAGIC_SKIPPABLE_START \
+        0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+    #define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0
+
+    #define ZSTD_BLOCKSIZELOG_MAX 17
+    #define ZSTD_BLOCKSIZE_MAX (1 << ZSTD_BLOCKSIZELOG_MAX)
+
+
+/***************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
+ *        enough space to successfully compress the data.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t
+ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
+
+/*! ZSTD_decompress() :
+ * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  Multiple compressed frames can be decompressed at once with this method.
+ *  The result will be the concatenation of all decompressed frames, back to back.
+ * `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  First frame's decompressed size can be extracted using ZSTD_getFrameContentSize().
+ *  If maximum upper bound isn't known, prefer using streaming mode to decompress data.
+ * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *           or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress(void*       dst,
+                                   size_t      dstCapacity,
+                                   const void* src,
+                                   size_t      compressedSize);
+
+    /*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ * `src` should point to the start of a ZSTD encoded frame.
+ * `srcSize` must be at least as large as the frame header.
+ *           hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ * @return : - decompressed size of `src` frame content, if known
+ *           - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *           - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *  note 1 : a 0 return value means the frame is valid but "empty".
+ *  note 2 : decompressed size is an optional field, it may not be present (typically in streaming mode).
+ *           When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *           In which case, it's necessary to use streaming mode to decompress data.
+ *           Optionally, application can rely on some implicit limit,
+ *           as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *           (For example, data could be necessarily cut into blocks <= 16 KB).
+ *  note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *           such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *  note 4 : decompressed size can be very large (64-bits value),
+ *           potentially larger than what local system can handle as a single memory segment.
+ *           In which case, it's necessary to use streaming mode to decompress data.
+ *  note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *           Always ensure return value fits within application's authorized limits.
+ *           Each application can set its own limits.
+ *  note 6 : This function replaces ZSTD_getDecompressedSize() */
+    #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+    #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void* src, size_t srcSize);
+
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize")
+ZSTDLIB_API
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+
+    /*======  Helper functions  ======*/
+    /* ZSTD_compressBound() :
+ * maximum compressed size in worst case single-pass scenario.
+ * When invoking `ZSTD_compress()` or any other one-pass compression function,
+ * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize)
+ * as it eliminates one potential failure scenario,
+ * aka not enough room in dst buffer to write the compressed frame.
+ * Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE .
+ *        In which case, ZSTD_compressBound() will return an error code
+ *        which can be tested using ZSTD_isError().
+ *
+ * ZSTD_COMPRESSBOUND() :
+ * same as ZSTD_compressBound(), but as a macro.
+ * It can be used to produce constants, which can be useful for static allocation,
+ * for example to size a static array on stack.
+ * Will produce constant value 0 if srcSize too large.
+ */
+    #define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t) == 8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
+    #define ZSTD_COMPRESSBOUND(srcSize) \
+        (((size_t) (srcSize) >= ZSTD_MAX_INPUT_SIZE) \
+           ? 0 \
+           : (srcSize) + ((srcSize) >> 8) \
+               + (((srcSize) < (128 << 10)) \
+                    ? (((128 << 10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ \
+                    : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t ZSTD_compressBound(
+  size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+/* ZSTD_isError() :
+ * Most ZSTD_* functions returning a size_t value can be tested for error,
+ * using ZSTD_isError().
+ * @return 1 if error, 0 otherwise
+ */
+ZSTDLIB_API unsigned
+ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char*
+ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */
+ZSTDLIB_API int
+ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */
+ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */
+ZSTDLIB_API int ZSTD_defaultCLevel(
+  void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
+
+
+/***************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and reuse it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx*     ZSTD_createCCtx(void);
+ZSTDLIB_API size_t         ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */
+
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to mirror `ZSTD_compress()` behavior,
+ *  this function compresses at the requested compression level,
+ *  __ignoring any other advanced parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx*  cctx,
+                                     void*       dst,
+                                     size_t      dstCapacity,
+                                     const void* src,
+                                     size_t      srcSize,
+                                     int         compressionLevel);
+
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and reuse it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx*     ZSTD_createDCtx(void);
+ZSTDLIB_API size_t         ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer */
+
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters (see below).
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*********************************************
+*  Advanced compression API (Requires v1.4.0+)
+**********************************************/
+
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ *
+ *   This API supersedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove API entry points from experimental which are redundant with this API.
+ */
+
+
+/* Compression strategies, listed from fastest to strongest */
+typedef enum {
+    ZSTD_fast     = 1,
+    ZSTD_dfast    = 2,
+    ZSTD_greedy   = 3,
+    ZSTD_lazy     = 4,
+    ZSTD_lazy2    = 5,
+    ZSTD_btlazy2  = 6,
+    ZSTD_btopt    = 7,
+    ZSTD_btultra  = 8,
+    ZSTD_btultra2 = 9
+    /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
+
+typedef enum {
+
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel =
+      100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog    = 101, /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog      = 102, /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog     = 103, /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog    = 104, /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch     = 105, /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength = 106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy     = 107, /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+
+    ZSTD_c_targetCBlockSize = 130, /* v1.5.6+
+                                  * Attempts to fit compressed block size into approximately targetCBlockSize.
+                                  * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
+                                  * Note that it's not a guarantee, just a convergence target (default:0).
+                                  * No target when targetCBlockSize == 0.
+                                  * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
+                                  * when a client can make use of partial documents (a prominent example being Chrome).
+                                  * Note: this parameter is stable since v1.5.6.
+                                  * It was present as an experimental parameter in earlier versions,
+                                  * but it's not recommended using it with earlier library versions
+                                  * due to massive performance regressions.
+                                  */
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching = 160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value.
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
+    ZSTD_c_ldmHashLog  = 161, /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch = 162, /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog =
+      163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog =
+      164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
+
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag =
+      200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag =
+      201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag =
+      202, /* When applicable, dictionary's ID is written into frame header (default:1) */
+
+    /* multi-threading parameters */
+    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
+     * In a situation where it's unknown if the linked library supports multi-threading or not,
+     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
+     */
+    ZSTD_c_nbWorkers = 400, /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression is performed in parallel, within worker thread(s).
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
+                              * compression is performed inside Caller's thread, and all invocations are blocking */
+    ZSTD_c_jobSize =
+      401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog = 402, /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_srcSizeHint
+     * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
+     * ZSTD_c_useBlockSplitter
+     * ZSTD_c_useRowMatchFinder
+     * ZSTD_c_prefetchCDictTables
+     * ZSTD_c_enableSeqProducerFallback
+     * ZSTD_c_maxBlockSize
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+    ZSTD_c_experimentalParam1 = 500,
+    ZSTD_c_experimentalParam2 = 10,
+    ZSTD_c_experimentalParam3 = 1000,
+    ZSTD_c_experimentalParam4 = 1001,
+    ZSTD_c_experimentalParam5 = 1002,
+    /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
+    ZSTD_c_experimentalParam7  = 1004,
+    ZSTD_c_experimentalParam8  = 1005,
+    ZSTD_c_experimentalParam9  = 1006,
+    ZSTD_c_experimentalParam10 = 1007,
+    ZSTD_c_experimentalParam11 = 1008,
+    ZSTD_c_experimentalParam12 = 1009,
+    ZSTD_c_experimentalParam13 = 1010,
+    ZSTD_c_experimentalParam14 = 1011,
+    ZSTD_c_experimentalParam15 = 1012,
+    ZSTD_c_experimentalParam16 = 1013,
+    ZSTD_c_experimentalParam17 = 1014,
+    ZSTD_c_experimentalParam18 = 1015,
+    ZSTD_c_experimentalParam19 = 1016
+} ZSTD_cParameter;
+
+typedef struct {
+    size_t error;
+    int    lowerBound;
+    int    upperBound;
+} ZSTD_bounds;
+
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+typedef enum {
+    ZSTD_reset_session_only           = 1,
+    ZSTD_reset_parameters             = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
+
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This also removes any reference to any dictionary or external sequence producer.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  (note that this entry point doesn't even expose a compression level parameter).
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
+ *        enough space to successfully compress the data, though it is possible it fails for other reasons.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t
+ZSTD_compress2(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/***********************************************
+*  Advanced decompression API (Requires v1.4.0+)
+************************************************/
+
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
+
+typedef enum {
+
+    ZSTD_d_windowLogMax = 100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
+     * ZSTD_d_disableHuffmanAssembly
+     * ZSTD_d_maxBlockSize
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+    ZSTD_d_experimentalParam1 = 1000,
+    ZSTD_d_experimentalParam2 = 1001,
+    ZSTD_d_experimentalParam3 = 1002,
+    ZSTD_d_experimentalParam4 = 1003,
+    ZSTD_d_experimentalParam5 = 1004,
+    ZSTD_d_experimentalParam6 = 1005
+
+} ZSTD_dParameter;
+
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+
+
+/****************************
+*  Streaming
+****************************/
+
+typedef struct ZSTD_inBuffer_s {
+    const void* src;  /**< start of input buffer */
+    size_t      size; /**< size of input buffer */
+    size_t
+      pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
+
+typedef struct ZSTD_outBuffer_s {
+    void*  dst;  /**< start of output buffer */
+    size_t size; /**< size of output buffer */
+    size_t
+      pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
+
+
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will reuse the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
+
+typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
+/* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t        ZSTD_freeCStream(ZSTD_CStream* zcs); /* accept NULL pointer */
+
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue =
+      0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush = 1, /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end   = 2  /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compressStream2() : Requires v1.4.0+
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - endOp must be a valid directive
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ *  - note: if an operation ends with an error, it may leave @cctx in an undefined state.
+ *          Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
+ *          In order to be re-employed after an error, a state must be reset,
+ *          which can be done explicitly (ZSTD_CCtx_reset()),
+ *          or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2(ZSTD_CCtx*        cctx,
+                                        ZSTD_outBuffer*   output,
+                                        ZSTD_inBuffer*    input,
+                                        ZSTD_EndDirective endOp);
+
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(
+  void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API, available since v1.0+ .
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ ******************************************************************************/
+
+/*!
+ * Equivalent to:
+ *
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *
+ * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API
+ * to compress with a dictionary.
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream*   zcs,
+                                       ZSTD_outBuffer* output,
+                                       ZSTD_inBuffer*  input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+
+
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-employed multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*
+*  However, when `output.pos == output.size`, it's more difficult to know.
+*  If @return > 0, the frame is not complete, meaning
+*  either there is still some data left to flush within internal buffers,
+*  or there is more input to read to complete the frame (or both).
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining content of the compressed frame.
+* *******************************************************************************/
+
+typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
+/* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t        ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer */
+
+/*===== Streaming decompression functions =====*/
+
+/*! ZSTD_initDStream() :
+ * Initialize/reset DStream state for new decompression operation.
+ * Call before new decompression operation using same DStream.
+ *
+ * Note : This function is redundant with the advanced API and equivalent to:
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+
+/*! ZSTD_decompressStream() :
+ * Streaming decompression function.
+ * Call repetitively to consume full input updating it as necessary.
+ * Function will update both input and output `pos` fields exposing current state via these fields:
+ * - `input.pos < input.size`, some input remaining and caller should provide remaining input
+ *   on the next call.
+ * - `output.pos < output.size`, decoder flushed internal output buffer.
+ * - `output.pos == output.size`, unflushed data potentially present in the internal buffers,
+ *   check ZSTD_decompressStream() @return value,
+ *   if > 0, invoke it again to flush remaining data to output.
+ * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX.
+ *
+ * @return : 0 when a frame is completely decoded and fully flushed,
+ *           or an error code, which can be tested using ZSTD_isError(),
+ *           or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
+ *
+ * Note: when an operation returns with an error code, the @zds state may be left in undefined state.
+ *       It's UB to invoke `ZSTD_decompressStream()` on such a state.
+ *       In order to re-use such a state, it must be first reset,
+ *       which can be done explicitly (`ZSTD_DCtx_reset()`),
+ *       or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
+ */
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream*   zds,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer*  input);
+
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(
+  void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
+
+
+/**************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx*  ctx,
+                                           void*       dst,
+                                           size_t      dstCapacity,
+                                           const void* src,
+                                           size_t      srcSize,
+                                           const void* dict,
+                                           size_t      dictSize,
+                                           int         compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx*  dctx,
+                                             void*       dst,
+                                             size_t      dstCapacity,
+                                             const void* src,
+                                             size_t      srcSize,
+                                             const void* dict,
+                                             size_t      dictSize);
+
+
+/***********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict*
+ZSTD_createCDict(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict);
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx*        cctx,
+                                            void*             dst,
+                                            size_t            dstCapacity,
+                                            const void*       src,
+                                            size_t            srcSize,
+                                            const ZSTD_CDict* cdict);
+
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict);
+
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx*        dctx,
+                                              void*             dst,
+                                              size_t            dstCapacity,
+                                              const void*       src,
+                                              size_t            srcSize,
+                                              const ZSTD_DDict* ddict);
+
+
+/********************************
+ *  Dictionary helper functions
+ *******************************/
+
+/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+
+
+/*******************************************************************************
+ * Advanced dictionary and prefix API (Requires v1.4.0+)
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompressDCtx().
+ * Dictionaries are sticky, they remain valid when same context is reused,
+ * they only reset when the context is reset
+ * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
+ * In contrast, Prefixes are single-use.
+ ******************************************************************************/
+
+
+/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames,
+ *           until parameters are reset, a new dictionary is loaded, or the dictionary
+ *           is explicitly invalidated by loading a NULL dictionary.
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted.
+ *  Note 5 : This method does not benefit from LDM (long distance mode).
+ *           If you want to employ LDM on some large dictionary content,
+ *           prefer employing ZSTD_CCtx_refPrefix() described below.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
+ *  Reference a prepared dictionary, to be used for all future compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ *  This method is compatible with LDM (long distance mode).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
+
+/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
+ *  Create an internal DDict from dict buffer, to be used to decompress all future frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated, or
+ *  a new dictionary is loaded.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ *  will store the DDict references in a table, and the DDict used for decompression
+ *  will be determined at decompression time, as per the dict ID in the frame.
+ *  The memory for the table is allocated on the first call to refDDict, and can be
+ *  freed with ZSTD_freeDCtx().
+ *
+ *  If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary
+ *  will be managed, and referencing a dictionary effectively "discards" any previous one.
+ *
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize);
+
+/* ===   Memory management   === */
+
+/*! ZSTD_sizeof_*() : Requires v1.4.0+
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+#endif /* ZSTD_H_235446 */
+
+
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
+
+#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+    #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+    /* This can be overridden externally to hide static symbols. */
+    #ifndef ZSTDLIB_STATIC_API
+        #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT == 1)
+            #define ZSTDLIB_STATIC_API __declspec(dllexport) ZSTDLIB_VISIBLE
+        #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT == 1)
+            #define ZSTDLIB_STATIC_API __declspec(dllimport) ZSTDLIB_VISIBLE
+        #else
+            #define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE
+        #endif
+    #endif
+
+/****************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
+
+    #define ZSTD_FRAMEHEADERSIZE_PREFIX(format) \
+        ((format) == ZSTD_f_zstd1 \
+           ? 5 \
+           : 1) /* minimum input size required to query frame header size */
+    #define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2)
+    #define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */
+    #define ZSTD_SKIPPABLEHEADERSIZE 8
+
+    /* compression parameter bounds */
+    #define ZSTD_WINDOWLOG_MAX_32 30
+    #define ZSTD_WINDOWLOG_MAX_64 31
+    #define ZSTD_WINDOWLOG_MAX \
+        ((int) (sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+    #define ZSTD_WINDOWLOG_MIN 10
+    #define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+    #define ZSTD_HASHLOG_MIN 6
+    #define ZSTD_CHAINLOG_MAX_32 29
+    #define ZSTD_CHAINLOG_MAX_64 30
+    #define ZSTD_CHAINLOG_MAX \
+        ((int) (sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+    #define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN
+    #define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX - 1)
+    #define ZSTD_SEARCHLOG_MIN 1
+    #define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
+    #define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+    #define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX
+    #define ZSTD_TARGETLENGTH_MIN \
+        0 /* note : comparing this constant to an unsigned results in a tautological test */
+    #define ZSTD_STRATEGY_MIN ZSTD_fast
+    #define ZSTD_STRATEGY_MAX ZSTD_btultra2
+    #define ZSTD_BLOCKSIZE_MAX_MIN \
+        (1 \
+         << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */
+
+
+    #define ZSTD_OVERLAPLOG_MIN 0
+    #define ZSTD_OVERLAPLOG_MAX 9
+
+    #define ZSTD_WINDOWLOG_LIMIT_DEFAULT \
+        27 /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+
+
+    /* LDM parameter bounds */
+    #define ZSTD_LDM_HASHLOG_MIN ZSTD_HASHLOG_MIN
+    #define ZSTD_LDM_HASHLOG_MAX ZSTD_HASHLOG_MAX
+    #define ZSTD_LDM_MINMATCH_MIN 4
+    #define ZSTD_LDM_MINMATCH_MAX 4096
+    #define ZSTD_LDM_BUCKETSIZELOG_MIN 1
+    #define ZSTD_LDM_BUCKETSIZELOG_MAX 8
+    #define ZSTD_LDM_HASHRATELOG_MIN 0
+    #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+
+    /* Advanced parameter bounds */
+    #define ZSTD_TARGETCBLOCKSIZE_MIN \
+        1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
+    #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
+    #define ZSTD_SRCSIZEHINT_MIN 0
+    #define ZSTD_SRCSIZEHINT_MAX INT_MAX
+
+
+/* ---  Advanced types  --- */
+
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+
+typedef struct {
+    unsigned int offset; /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+    /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+
+    unsigned int rep; /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               *
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               *
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               *
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
+} ZSTD_Sequence;
+
+typedef struct {
+    unsigned
+      windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned
+      chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;   /**< dispatch table : larger == faster, more memory */
+    unsigned searchLog; /**< nb of searches : larger == more compression, slower */
+    unsigned
+      minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned
+      targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
+
+typedef struct {
+    int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
+    int
+      checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int
+      noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters       fParams;
+} ZSTD_parameters;
+
+typedef enum {
+    ZSTD_dct_auto =
+      0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent =
+      1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict =
+      2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+    ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
+    ZSTD_dlm_byRef =
+      1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
+typedef enum {
+    ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless =
+      1 /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
+    ZSTD_d_validateChecksum = 0,
+    ZSTD_d_ignoreChecksum   = 1
+} ZSTD_forceIgnoreChecksum_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+    ZSTD_rmd_refSingleDDict    = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
+    ZSTD_lcm_auto =
+      0, /**< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+    ZSTD_lcm_huffman =
+      1, /**< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+    ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
+
+typedef enum {
+    /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final
+   * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable
+   * or ZSTD_ps_disable allow for a force enable/disable the feature.
+   */
+    ZSTD_ps_auto =
+      0, /* Let the library automatically determine whether the feature shall be enabled */
+    ZSTD_ps_enable  = 1, /* Force-enable the feature */
+    ZSTD_ps_disable = 2  /* Do not use the feature */
+} ZSTD_paramSwitch_e;
+
+/***************************************
+*  Frame header and size functions
+***************************************/
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
+ */
+ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+typedef enum {
+    ZSTD_frame,
+    ZSTD_skippableFrame
+} ZSTD_frameType_e;
+typedef struct {
+    unsigned long long
+      frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize; /* can be very large, up to <= frameContentSize */
+    unsigned           blockSizeMax;
+    ZSTD_frameType_e
+      frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+    unsigned _reserved1;
+    unsigned _reserved2;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr,
+                                              const void*       src,
+                                              size_t srcSize); /**< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr,
+                                                       const void*       src,
+                                                       size_t            srcSize,
+                                                       ZSTD_format_e     format);
+
+/*! ZSTD_decompressionMargin() :
+ * Zstd supports in-place decompression, where the input and output buffers overlap.
+ * In this case, the output buffer must be at least (Margin + Output_Size) bytes large,
+ * and the input buffer must be at the end of the output buffer.
+ *
+ *  _______________________ Output Buffer ________________________
+ * |                                                              |
+ * |                                        ____ Input Buffer ____|
+ * |                                       |                      |
+ * v                                       v                      v
+ * |---------------------------------------|-----------|----------|
+ * ^                                                   ^          ^
+ * |___________________ Output_Size ___________________|_ Margin _|
+ *
+ * NOTE: See also ZSTD_DECOMPRESSION_MARGIN().
+ * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or
+ * ZSTD_decompressDCtx().
+ * NOTE: This function supports multi-frame input.
+ *
+ * @param src The compressed frame(s)
+ * @param srcSize The size of the compressed frame(s)
+ * @returns The decompression margin or an error that can be checked with ZSTD_isError().
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize);
+
+    /*! ZSTD_DECOMPRESS_MARGIN() :
+ * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from
+ * the compressed frame, compute it from the original size and the blockSizeLog.
+ * See ZSTD_decompressionMargin() for details.
+ *
+ * WARNING: This macro does not support multi-frame input, the input must be a single
+ * zstd frame. If you need that support use the function, or implement it yourself.
+ *
+ * @param originalSize The original uncompressed size of the data.
+ * @param blockSize    The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX).
+ *                     Unless you explicitly set the windowLog smaller than
+ *                     ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX.
+ */
+    #define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) \
+        ((size_t) (ZSTD_FRAMEHEADERSIZE_MAX /* Frame header */ + 4 /* checksum */ \
+                   + ((originalSize) == 0 ? 0 \
+                                          : 3 \
+                                              * (((originalSize) + (blockSize) - 1) \
+                                                 / blockSize)) /* 3 bytes per block */ \
+                   + (blockSize)                               /* One block of margin */ \
+                   ))
+
+typedef enum {
+    ZSTD_sf_noBlockDelimiters =
+      0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+    ZSTD_sf_explicitBlockDelimiters =
+      1 /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
+
+/*! ZSTD_sequenceBound() :
+ * `srcSize` : size of the input buffer
+ *  @return : upper-bound for the number of sequences that can be generated
+ *            from a buffer of srcSize bytes
+ *
+ *  note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
+
+/*! ZSTD_generateSequences() :
+ * WARNING: This function is meant for debugging and informational purposes ONLY!
+ * Its implementation is flawed, and it will be deleted in a future version.
+ * It is not guaranteed to succeed, as there are several cases where it will give
+ * up and fail. You should NOT use this function in production code.
+ *
+ * This function is deprecated, and will be removed in a future version.
+ *
+ * Generate sequences using ZSTD_compress2(), given a source buffer.
+ *
+ * @param zc The compression context to be used for ZSTD_compress2(). Set any
+ *           compression parameters you need on this context.
+ * @param outSeqs The output sequences buffer of size @p outSeqsSize
+ * @param outSeqsSize The size of the output sequences buffer.
+ *                    ZSTD_sequenceBound(srcSize) is an upper bound on the number
+ *                    of sequences that can be generated.
+ * @param src The source buffer to generate sequences from of size @p srcSize.
+ * @param srcSize The size of the source buffer.
+ *
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
+ *
+ * @returns The number of sequences generated, necessarily less than
+ *          ZSTD_sequenceBound(srcSize), or an error code that can be checked
+ *          with ZSTD_isError().
+ */
+ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
+ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(
+  ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, size_t outSeqsSize, const void* src, size_t srcSize);
+
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into the literals of the next sequence.
+ *
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst.
+ * @src contains the entire input (not just the literals).
+ * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame.
+ *
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ *
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ *
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size, or a ZSTD error code.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx*           cctx,
+                                                 void*                dst,
+                                                 size_t               dstSize,
+                                                 const ZSTD_Sequence* inSeqs,
+                                                 size_t               inSeqsSize,
+                                                 const void*          src,
+                                                 size_t               srcSize);
+
+
+/*! ZSTD_writeSkippableFrame() :
+ * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number,
+ * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, if the source size is not representable
+ * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(
+  void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned magicVariant);
+
+/*! ZSTD_readSkippableFrame() :
+ * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
+ * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
+ * in the magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_readSkippableFrame(
+  void* dst, size_t dstCapacity, unsigned* magicVariant, const void* src, size_t srcSize);
+
+/*! ZSTD_isSkippableFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
+ */
+ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
+
+
+/***************************************
+*  Memory management
+***************************************/
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *  This is useful in combination with ZSTD_initStatic(),
+ *  which makes it possible to employ a static buffer for ZSTD_CCtx* state.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2()
+ *  associated with any compression level up to max specified one.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  Note that the size estimation is specific for one-shot compression,
+ *  it is not valid for streaming (see ZSTD_estimateCStreamSize*())
+ *  nor other potential ways of using a ZSTD_CCtx* state.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this knowledge can be used to provide a tighter budget estimation
+ *  because the ZSTD_CCtx* state will need less memory for small inputs.
+ *  This tighter estimation can be provided by employing more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression
+ *  using any compression level up to the max specified one.
+ *  It will also consider src size to be arbitrarily "large", which is a worst case scenario.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
+ *  Size estimates assume that no external sequence producer is registered.
+ *
+ *  ZSTD_DStream memory budget depends on frame's window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Any frame requesting a window size larger than max specified one will be rejected.
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize_advanced(size_t                     dictSize,
+                                                          ZSTD_compressionParameters cParams,
+                                                          ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDDictSize(size_t                dictSize,
+                                                 ZSTD_dictLoadMethod_e dictLoadMethod);
+
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
+ */
+ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_STATIC_API ZSTD_CStream*
+ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */
+
+ZSTDLIB_STATIC_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_STATIC_API ZSTD_DStream*
+ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */
+
+ZSTDLIB_STATIC_API const ZSTD_CDict* ZSTD_initStaticCDict(void*                  workspace,
+                                                          size_t                 workspaceSize,
+                                                          const void*            dict,
+                                                          size_t                 dictSize,
+                                                          ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                                          ZSTD_dictContentType_e dictContentType,
+                                                          ZSTD_compressionParameters cParams);
+
+ZSTDLIB_STATIC_API const ZSTD_DDict* ZSTD_initStaticDDict(void*                  workspace,
+                                                          size_t                 workspaceSize,
+                                                          const void*            dict,
+                                                          size_t                 dictSize,
+                                                          ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                                          ZSTD_dictContentType_e dictContentType);
+
+
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction)(void* opaque, size_t size);
+typedef void (*ZSTD_freeFunction)(void* opaque, void* address);
+typedef struct {
+    ZSTD_allocFunction customAlloc;
+    ZSTD_freeFunction  customFree;
+    void*              opaque;
+} ZSTD_customMem;
+static
+    #ifdef __GNUC__
+  __attribute__((__unused__))
+    #endif
+
+    #if defined(__clang__) && __clang_major__ >= 5
+        #pragma clang diagnostic push
+        #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+    #endif
+  ZSTD_customMem const ZSTD_defaultCMem = {NULL, NULL,
+                                           NULL}; /**< this constant defers to stdlib's functions */
+    #if defined(__clang__) && __clang_major__ >= 5
+        #pragma clang diagnostic pop
+    #endif
+
+ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_STATIC_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+
+ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced(const void*                dict,
+                                                         size_t                     dictSize,
+                                                         ZSTD_dictLoadMethod_e      dictLoadMethod,
+                                                         ZSTD_dictContentType_e     dictContentType,
+                                                         ZSTD_compressionParameters cParams,
+                                                         ZSTD_customMem             customMem);
+
+/*! Thread pool :
+ *  These prototypes make it possible to share a thread pool among multiple compression contexts.
+ *  This can limit resources for applications with multiple threads where each one uses
+ *  a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ *  ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ *  Note that the lifetime of such pool must exist while being used.
+ *  ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ *  to use an internal thread pool).
+ *  ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
+ */
+typedef struct POOL_ctx_s           ZSTD_threadPool;
+ZSTDLIB_STATIC_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_STATIC_API void   ZSTD_freeThreadPool(ZSTD_threadPool* pool); /* accept NULL pointer */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
+
+
+/*
+ * This API is temporary and is expected to change or disappear in the future!
+ */
+ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced2(const void*             dict,
+                                                          size_t                  dictSize,
+                                                          ZSTD_dictLoadMethod_e   dictLoadMethod,
+                                                          ZSTD_dictContentType_e  dictContentType,
+                                                          const ZSTD_CCtx_params* cctxParams,
+                                                          ZSTD_customMem          customMem);
+
+ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_advanced(const void*            dict,
+                                                         size_t                 dictSize,
+                                                         ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                                         ZSTD_dictContentType_e dictContentType,
+                                                         ZSTD_customMem         customMem);
+
+
+/***************************************
+*  Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_STATIC_API ZSTD_CDict*
+ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_getCParams(int                compressionLevel,
+                                                              unsigned long long estimatedSrcSize,
+                                                              size_t             dictSize);
+
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_STATIC_API ZSTD_parameters ZSTD_getParams(int                compressionLevel,
+                                                  unsigned long long estimatedSrcSize,
+                                                  size_t             dictSize);
+
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
+                                                                 unsigned long long         srcSize,
+                                                                 size_t dictSize);
+
+/*! ZSTD_CCtx_setCParams() :
+ *  Set all parameters provided within @p cparams into the working @p cctx.
+ *  Note : if modifying parameters during compression (MT mode only),
+ *         note that changes to the .windowLog parameter will be ignored.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
+ *         On failure, no parameters are updated.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams);
+
+/*! ZSTD_CCtx_setFParams() :
+ *  Set all parameters provided within @p fparams into the working @p cctx.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams);
+
+/*! ZSTD_CCtx_setParams() :
+ *  Set all parameters provided within @p params into the working @p cctx.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params);
+
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2")
+ZSTDLIB_STATIC_API
+size_t ZSTD_compress_advanced(ZSTD_CCtx*      cctx,
+                              void*           dst,
+                              size_t          dstCapacity,
+                              const void*     src,
+                              size_t          srcSize,
+                              const void*     dict,
+                              size_t          dictSize,
+                              ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+ZSTDLIB_STATIC_API
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx*           cctx,
+                                         void*                dst,
+                                         size_t               dstCapacity,
+                                         const void*          src,
+                                         size_t               srcSize,
+                                         const ZSTD_CDict*    cdict,
+                                         ZSTD_frameParameters fParams);
+
+
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx*  cctx,
+                                                               const void* dict,
+                                                               size_t      dictSize);
+
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx*             cctx,
+                                                            const void*            dict,
+                                                            size_t                 dictSize,
+                                                            ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                                            ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx*             cctx,
+                                                       const void*            prefix,
+                                                       size_t                 prefixSize,
+                                                       ZSTD_dictContentType_e dictContentType);
+
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
+
+    /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+    #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+
+    /* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+    #define ZSTD_c_format ZSTD_c_experimentalParam2
+
+    /* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+    #define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+
+    /* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+    #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+
+    /* Controlled with ZSTD_paramSwitch_e enum.
+ * Default is ZSTD_ps_auto.
+ * Set to ZSTD_ps_disable to never compress literals.
+ * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals
+ * may still be emitted if huffman is not beneficial to use.)
+ *
+ * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
+ * literals compression based on the compression parameters - specifically,
+ * negative compression levels do not use literal compression.
+ */
+    #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+
+    /* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+    #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
+    /* Controls whether the new and experimental "dedicated dictionary search
+ * structure" can be used. This feature is still rough around the edges, be
+ * prepared for surprising behavior!
+ *
+ * How to use it:
+ *
+ * When using a CDict, whether to use this feature or not is controlled at
+ * CDict creation, and it must be set in a CCtxParams set passed into that
+ * construction (via ZSTD_createCDict_advanced2()). A compression will then
+ * use the feature or not based on how the CDict was constructed; the value of
+ * this param, set in the CCtx, will have no effect.
+ *
+ * However, when a dictionary buffer is passed into a CCtx, such as via
+ * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
+ * whether the CDict that is created internally can use the feature or not.
+ *
+ * What it does:
+ *
+ * Normally, the internal data structures of the CDict are analogous to what
+ * would be stored in a CCtx after compressing the contents of a dictionary.
+ * To an approximation, a compression using a dictionary can then use those
+ * data structures to simply continue what is effectively a streaming
+ * compression where the simulated compression of the dictionary left off.
+ * Which is to say, the search structures in the CDict are normally the same
+ * format as in the CCtx.
+ *
+ * It is possible to do better, since the CDict is not like a CCtx: the search
+ * structures are written once during CDict creation, and then are only read
+ * after that, while the search structures in the CCtx are both read and
+ * written as the compression goes along. This means we can choose a search
+ * structure for the dictionary that is read-optimized.
+ *
+ * This feature enables the use of that different structure.
+ *
+ * Note that some of the members of the ZSTD_compressionParameters struct have
+ * different semantics and constraints in the dedicated search structure. It is
+ * highly recommended that you simply set a compression level in the CCtxParams
+ * you pass into the CDict creation call, and avoid messing with the cParams
+ * directly.
+ *
+ * Effects:
+ *
+ * This will only have any effect when the selected ZSTD_strategy
+ * implementation supports this feature. Currently, that's limited to
+ * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
+ *
+ * Note that this means that the CDict tables can no longer be copied into the
+ * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
+ * usable. The dictionary can only be attached or reloaded.
+ *
+ * In general, you should expect compression to be faster--sometimes very much
+ * so--and CDict creation to be slightly slower. Eventually, we will probably
+ * make this mode the default.
+ */
+    #define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
+
+    /* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that input data presented with ZSTD_inBuffer
+ * will ALWAYS be the same between calls.
+ * Technically, the @src pointer must never be changed,
+ * and the @pos field can only be updated by zstd.
+ * However, it's possible to increase the @size field,
+ * allowing scenarios where more data can be appended after compressions starts.
+ * These conditions are checked by the compressor,
+ * and compression will fail if they are not respected.
+ * Also, data in the ZSTD_inBuffer within the range [src, src + pos)
+ * MUST not be modified during compression or it will result in data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if conditions are not respected.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST
+ * not be modified during compression or it will result in data corruption.
+ * This is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to rely on user provided buffer instead.
+ */
+    #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
+
+    /* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
+ */
+    #define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
+
+    /* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ *
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
+ */
+    #define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
+
+    /* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ *
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ *
+ * With validation enabled, if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ *
+ */
+    #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
+
+    /* ZSTD_c_useBlockSplitter
+ * Controlled with ZSTD_paramSwitch_e enum.
+ * Default is ZSTD_ps_auto.
+ * Set to ZSTD_ps_disable to never use block splitter.
+ * Set to ZSTD_ps_enable to always use block splitter.
+ *
+ * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
+ * block splitting based on the compression parameters.
+ */
+    #define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13
+
+    /* ZSTD_c_useRowMatchFinder
+ * Controlled with ZSTD_paramSwitch_e enum.
+ * Default is ZSTD_ps_auto.
+ * Set to ZSTD_ps_disable to never use row-based matchfinder.
+ * Set to ZSTD_ps_enable to force usage of row-based matchfinder.
+ *
+ * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
+ * the row-based matchfinder based on support for SIMD instructions and the window log.
+ * Note that this only pertains to compression strategies: greedy, lazy, and lazy2
+ */
+    #define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+
+    /* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+    #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+
+    /* ZSTD_c_prefetchCDictTables
+ * Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto.
+ *
+ * In some situations, zstd uses CDict tables in-place rather than copying them
+ * into the working context. (See docs on ZSTD_dictAttachPref_e above for details).
+ * In such situations, compression speed is seriously impacted when CDict tables are
+ * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables
+ * when they are used in-place.
+ *
+ * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit.
+ * For sufficiently large inputs, zstd will by default memcpy() CDict tables
+ * into the working context, so there is no need to prefetch. This parameter is
+ * targeted at a middle range of input sizes, where a prefetch is cheap enough to be
+ * useful but memcpy() is too expensive. The exact range of input sizes where this
+ * makes sense is best determined by careful experimentation.
+ *
+ * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable,
+ * but in the future zstd may conditionally enable this feature via an auto-detection
+ * heuristic for cold CDicts.
+ * Use ZSTD_ps_disable to opt out of prefetching under any circumstances.
+ */
+    #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
+
+    /* ZSTD_c_enableSeqProducerFallback
+ * Allowed values are 0 (disable) and 1 (enable). The default setting is 0.
+ *
+ * Controls whether zstd will fall back to an internal sequence producer if an
+ * external sequence producer is registered and returns an error code. This fallback
+ * is block-by-block: the internal sequence producer will only be called for blocks
+ * where the external sequence producer returns an error code. Fallback parsing will
+ * follow any other cParam settings, such as compression level, the same as in a
+ * normal (fully-internal) compression operation.
+ *
+ * The user is strongly encouraged to read the full Block-Level Sequence Producer API
+ * documentation (below) before setting this parameter. */
+    #define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17
+
+    /* ZSTD_c_maxBlockSize
+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
+ *
+ * This parameter can be used to set an upper bound on the blocksize
+ * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper
+ * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make
+ * compressBound() inaccurate). Only currently meant to be used for testing.
+ *
+ */
+    #define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
+
+    /* ZSTD_c_searchForExternalRepcodes
+ * This parameter affects how zstd parses external sequences, such as sequences
+ * provided through the compressSequences() API or from an external block-level
+ * sequence producer.
+ *
+ * If set to ZSTD_ps_enable, the library will check for repeated offsets in
+ * external sequences, even if those repcodes are not explicitly indicated in
+ * the "rep" field. Note that this is the only way to exploit repcode matches
+ * while using compressSequences() or an external sequence producer, since zstd
+ * currently ignores the "rep" field of external sequences.
+ *
+ * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in
+ * external sequences, regardless of whether the "rep" field has been set. This
+ * reduces sequence compression overhead by about 25% while sacrificing some
+ * compression ratio.
+ *
+ * The default value is ZSTD_ps_auto, for which the library will enable/disable
+ * based on compression level.
+ *
+ * Note: for now, this param only has an effect if ZSTD_c_blockDelimiters is
+ * set to ZSTD_sf_explicitBlockDelimiters. That may change in the future.
+ */
+    #define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19
+
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx,
+                                                 ZSTD_cParameter  param,
+                                                 int*             value);
+
+
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
+ */
+ZSTDLIB_STATIC_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_STATIC_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */
+
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams,
+                                                        ZSTD_parameters   params);
+
+/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using
+ *  ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : a code representing success or failure (which can be tested with
+ *           ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params,
+                                                       ZSTD_cParameter   param,
+                                                       int               value);
+
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params,
+                                                       ZSTD_cParameter         param,
+                                                       int*                    value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(ZSTD_CCtx*              cctx,
+                                                                 const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_compressStream2_simpleArgs(ZSTD_CCtx*        cctx,
+                                                          void*             dst,
+                                                          size_t            dstCapacity,
+                                                          size_t*           dstPos,
+                                                          const void*       src,
+                                                          size_t            srcSize,
+                                                          size_t*           srcPos,
+                                                          ZSTD_EndDirective endOp);
+
+
+/***************************************
+*  Advanced decompression functions
+***************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_STATIC_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer,
+                                                            size_t      dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx*  dctx,
+                                                               const void* dict,
+                                                               size_t      dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx*             dctx,
+                                                            const void*            dict,
+                                                            size_t                 dictSize,
+                                                            ZSTD_dictLoadMethod_e  dictLoadMethod,
+                                                            ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx*             dctx,
+                                                       const void*            prefix,
+                                                       size_t                 prefixSize,
+                                                       ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+
+/*! ZSTD_DCtx_getParameter() :
+ *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx*      dctx,
+                                                 ZSTD_dParameter param,
+                                                 int*            value);
+
+    /* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
+ */
+    #define ZSTD_d_format ZSTD_d_experimentalParam1
+    /* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+    #define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+    /* ZSTD_d_forceIgnoreChecksum
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * Tells the decompressor to skip checksum validation during decompression, regardless
+ * of whether checksumming was specified during compression. This offers some
+ * slight performance benefits, and may be useful for debugging.
+ * Param has values of type ZSTD_forceIgnoreChecksum_e
+ */
+    #define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
+
+    /* ZSTD_d_refMultipleDDicts
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * If enabled and dctx is allocated on the heap, then additional memory will be allocated
+ * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
+ *
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
+ *
+ * Param has values of byte ZSTD_refMultipleDDicts_e
+ *
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
+ *
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
+ */
+    #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
+
+    /* ZSTD_d_disableHuffmanAssembly
+ * Set to 1 to disable the Huffman assembly implementation.
+ * The default value is 0, which allows zstd to use the Huffman assembly
+ * implementation if available.
+ *
+ * This parameter can be used to disable Huffman assembly at runtime.
+ * If you want to disable it at compile time you can define the macro
+ * ZSTD_DISABLE_ASM.
+ */
+    #define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
+
+    /* ZSTD_d_maxBlockSize
+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
+ *
+ * Forces the decompressor to reject blocks whose content size is
+ * larger than the configured maxBlockSize. When maxBlockSize is
+ * larger than the windowSize, the windowSize is used instead.
+ * This saves memory on the decoder when you know all blocks are small.
+ *
+ * This option is typically used in conjunction with ZSTD_c_maxBlockSize.
+ *
+ * WARNING: This causes the decoder to reject otherwise valid frames
+ * that have block sizes larger than the configured maxBlockSize.
+ */
+    #define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6
+
+
+/*! ZSTD_DCtx_setFormat() :
+ *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+ZSTDLIB_STATIC_API
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs(ZSTD_DCtx*  dctx,
+                                                           void*       dst,
+                                                           size_t      dstCapacity,
+                                                           size_t*     dstPos,
+                                                           const void* src,
+                                                           size_t      srcSize,
+                                                           size_t*     srcPos);
+
+
+/********************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
+
+/*=====   Advanced Streaming compression functions  =====*/
+
+/*! ZSTD_initCStream_srcSize() :
+ * This function is DEPRECATED, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream*      zcs,
+                                int                compressionLevel,
+                                unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingDict() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                                  const void*   dict,
+                                  size_t        dictSize,
+                                  int           compressionLevel);
+
+/*! ZSTD_initCStream_advanced() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParams(zcs, params);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API
+size_t ZSTD_initCStream_advanced(ZSTD_CStream*      zcs,
+                                 const void*        dict,
+                                 size_t             dictSize,
+                                 ZSTD_parameters    params,
+                                 unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingCDict() :
+ * This function is DEPRECATED, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * note : cdict will just be referenced, and must outlive compression session
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
+/*! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setFParams(zcs, fParams);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream*        zcs,
+                                            const ZSTD_CDict*    cdict,
+                                            ZSTD_frameParameters fParams,
+                                            unsigned long long   pledgedSrcSize);
+
+/*! ZSTD_resetCStream() :
+ * This function is DEPRECATED, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+ *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+ *       explicitly specified.
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  This prototype will generate compilation warnings.
+ */
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+
+
+typedef struct {
+    unsigned long long ingested; /* nb input bytes read and buffered */
+    unsigned long long consumed; /* nb input bytes actually compressed */
+    unsigned long long produced; /* nb of compressed bytes generated and buffered */
+    unsigned long long
+      flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;    /* MT only : latest started job nb */
+    unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
+ */
+ZSTDLIB_STATIC_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
+ */
+ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+
+
+/*=====   Advanced Streaming decompression functions  =====*/
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ */
+ZSTD_DEPRECATED(
+  "use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds,
+                                                     const void*   dict,
+                                                     size_t        dictSize);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ */
+ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * reuse decompression parameters from previous init; saves dictionary loading
+ */
+ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
+ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+
+
+/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API *********************
+ *
+ * *** OVERVIEW ***
+ * The Block-Level Sequence Producer API allows users to provide their own custom
+ * sequence producer which libzstd invokes to process each block. The produced list
+ * of sequences (literals and matches) is then post-processed by libzstd to produce
+ * valid compressed blocks.
+ *
+ * This block-level offload API is a more granular complement of the existing
+ * frame-level offload API compressSequences() (introduced in v1.5.1). It offers
+ * an easier migration story for applications already integrated with libzstd: the
+ * user application continues to invoke the same compression functions
+ * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits
+ * from the specific advantages of the external sequence producer. For example,
+ * the sequence producer could be tuned to take advantage of known characteristics
+ * of the input, to offer better speed / ratio, or could leverage hardware
+ * acceleration not available within libzstd itself.
+ *
+ * See contrib/externalSequenceProducer for an example program employing the
+ * Block-Level Sequence Producer API.
+ *
+ * *** USAGE ***
+ * The user is responsible for implementing a function of type
+ * ZSTD_sequenceProducer_F. For each block, zstd will pass the following
+ * arguments to the user-provided function:
+ *
+ *   - sequenceProducerState: a pointer to a user-managed state for the sequence
+ *     producer.
+ *
+ *   - outSeqs, outSeqsCapacity: an output buffer for the sequence producer.
+ *     outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory
+ *     backing outSeqs is managed by the CCtx.
+ *
+ *   - src, srcSize: an input buffer for the sequence producer to parse.
+ *     srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
+ *
+ *   - dict, dictSize: a history buffer, which may be empty, which the sequence
+ *     producer may reference as it parses the src buffer. Currently, zstd will
+ *     always pass dictSize == 0 into external sequence producers, but this will
+ *     change in the future.
+ *
+ *   - compressionLevel: a signed integer representing the zstd compression level
+ *     set by the user for the current operation. The sequence producer may choose
+ *     to use this information to change its compression strategy and speed/ratio
+ *     tradeoff. Note: the compression level does not reflect zstd parameters set
+ *     through the advanced API.
+ *
+ *   - windowSize: a size_t representing the maximum allowed offset for external
+ *     sequences. Note that sequence offsets are sometimes allowed to exceed the
+ *     windowSize if a dictionary is present, see doc/zstd_compression_format.md
+ *     for details.
+ *
+ * The user-provided function shall return a size_t representing the number of
+ * sequences written to outSeqs. This return value will be treated as an error
+ * code if it is greater than outSeqsCapacity. The return value must be non-zero
+ * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided
+ * for convenience, but any value greater than outSeqsCapacity will be treated as
+ * an error code.
+ *
+ * If the user-provided function does not return an error code, the sequences
+ * written to outSeqs must be a valid parse of the src buffer. Data corruption may
+ * occur if the parse is not valid. A parse is defined to be valid if the
+ * following conditions hold:
+ *   - The sum of matchLengths and literalLengths must equal srcSize.
+ *   - All sequences in the parse, except for the final sequence, must have
+ *     matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have
+ *     matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0.
+ *   - All offsets must respect the windowSize parameter as specified in
+ *     doc/zstd_compression_format.md.
+ *   - If the final sequence has matchLength == 0, it must also have offset == 0.
+ *
+ * zstd will only validate these conditions (and fail compression if they do not
+ * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
+ * validation has a performance cost.
+ *
+ * If the user-provided function returns an error, zstd will either fall back
+ * to an internal sequence producer or fail the compression operation. The user can
+ * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback
+ * cParam. Fallback compression will follow any other cParam settings, such as
+ * compression level, the same as in a normal compression operation.
+ *
+ * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F
+ * function by calling
+ *         ZSTD_registerSequenceProducer(cctx,
+ *                                       sequenceProducerState,
+ *                                       sequenceProducer)
+ * This setting will persist until the next parameter reset of the CCtx.
+ *
+ * The sequenceProducerState must be initialized by the user before calling
+ * ZSTD_registerSequenceProducer(). The user is responsible for destroying the
+ * sequenceProducerState.
+ *
+ * *** LIMITATIONS ***
+ * This API is compatible with all zstd compression APIs which respect advanced parameters.
+ * However, there are three limitations:
+ *
+ * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported.
+ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level
+ * external sequence producer.
+ *   - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some
+ *     cases (see its documentation for details). Users must explicitly set
+ *     ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external
+ *     sequence producer is registered.
+ *   - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
+ *     whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
+ *     check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence
+ *     Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog).
+ *
+ * Second, history buffers are not currently supported. Concretely, zstd will always pass
+ * dictSize == 0 to the external sequence producer (for now). This has two implications:
+ *   - Dictionaries are not currently supported. Compression will *not* fail if the user
+ *     references a dictionary, but the dictionary won't have any effect.
+ *   - Stream history is not currently supported. All advanced compression APIs, including
+ *     streaming APIs, work with external sequence producers, but each block is treated as
+ *     an independent chunk without history from previous blocks.
+ *
+ * Third, multi-threading within a single compression is not currently supported. In other words,
+ * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered.
+ * Multi-threading across compressions is fine: simply create one CCtx per thread.
+ *
+ * Long-term, we plan to overcome all three limitations. There is no technical blocker to
+ * overcoming them. It is purely a question of engineering effort.
+ */
+
+    #define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t) (-1))
+
+typedef size_t (*ZSTD_sequenceProducer_F)(void*          sequenceProducerState,
+                                          ZSTD_Sequence* outSeqs,
+                                          size_t         outSeqsCapacity,
+                                          const void*    src,
+                                          size_t         srcSize,
+                                          const void*    dict,
+                                          size_t         dictSize,
+                                          int            compressionLevel,
+                                          size_t         windowSize);
+
+/*! ZSTD_registerSequenceProducer() :
+ * Instruct zstd to use a block-level external sequence producer function.
+ *
+ * The sequenceProducerState must be initialized by the caller, and the caller is
+ * responsible for managing its lifetime. This parameter is sticky across
+ * compressions. It will remain set until the user explicitly resets compression
+ * parameters.
+ *
+ * Sequence producer registration is considered to be an "advanced parameter",
+ * part of the "advanced API". This means it will only have an effect on compression
+ * APIs which respect advanced parameters, such as compress2() and compressStream2().
+ * Older compression APIs such as compressCCtx(), which predate the introduction of
+ * "advanced parameters", will ignore any external sequence producer setting.
+ *
+ * The sequence producer can be "cleared" by registering a NULL function pointer. This
+ * removes all limitations described above in the "LIMITATIONS" section of the API docs.
+ *
+ * The user is strongly encouraged to read the full API documentation (above) before
+ * calling this function. */
+ZSTDLIB_STATIC_API void ZSTD_registerSequenceProducer(ZSTD_CCtx*              cctx,
+                                                      void*                   sequenceProducerState,
+                                                      ZSTD_sequenceProducer_F sequenceProducer);
+
+/*! ZSTD_CCtxParams_registerSequenceProducer() :
+ * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
+ * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
+ * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
+ *
+ * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
+ * is required, then this function is for you. Otherwise, you probably don't need it.
+ *
+ * See tests/zstreamtest.c for example usage. */
+ZSTDLIB_STATIC_API void ZSTD_CCtxParams_registerSequenceProducer(
+  ZSTD_CCtx_params* params, void* sequenceProducerState, ZSTD_sequenceProducer_F sequenceProducer);
+
+
+/*********************************************************************
+*  Buffer-less and synchronous inner streaming functions (DEPRECATED)
+*
+*  This API is deprecated, and will be removed in a future version.
+*  It allows streaming (de)compression with user allocated buffers.
+*  However, it is hard to use, and not as well tested as the rest of
+*  our API.
+*
+*  Please use the normal streaming API instead: ZSTD_compressStream2,
+*  and ZSTD_decompressStream.
+*  If there is functionality that you need, but it doesn't provide,
+*  please open an issue on our GitHub.
+********************************************************************* */
+
+/**
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be reused multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
+*/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx*  cctx,
+                                                       const void* dict,
+                                                       size_t      dictSize,
+                                                       int         compressionLevel);
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(
+  ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
+
+ZSTD_DEPRECATED(
+  "This function will likely be removed in a future release. It is misleading and has very limited utility.")
+ZSTDLIB_STATIC_API
+size_t ZSTD_copyCCtx(
+  ZSTD_CCtx*       cctx,
+  const ZSTD_CCtx* preparedCCtx,
+  unsigned long long
+    pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(
+  ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.")
+ZSTDLIB_STATIC_API size_t
+ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+ZSTDLIB_STATIC_API
+size_t ZSTD_compressBegin_advanced(
+  ZSTD_CCtx*      cctx,
+  const void*     dict,
+  size_t          dictSize,
+  ZSTD_parameters params,
+  unsigned long long
+    pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+ZSTDLIB_STATIC_API
+size_t ZSTD_compressBegin_usingCDict_advanced(
+  ZSTD_CCtx* const           cctx,
+  const ZSTD_CDict* const    cdict,
+  ZSTD_frameParameters const fParams,
+  unsigned long long const
+    pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+/**
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be reused multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  result  : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+  result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+
+ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(
+  unsigned long long windowSize,
+  unsigned long long
+    frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx*  dctx,
+                                                         const void* dict,
+                                                         size_t      dictSize);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTD_DEPRECATED(
+  "This function will likely be removed in the next minor release. It is misleading and has very limited utility.")
+ZSTDLIB_STATIC_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum {
+    ZSTDnit_frameHeader,
+    ZSTDnit_blockHeader,
+    ZSTDnit_block,
+    ZSTDnit_lastBlock,
+    ZSTDnit_checksum,
+    ZSTDnit_skippableFrame
+} ZSTD_nextInputType_e;
+ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+/* ========================================= */
+/**       Block level API (DEPRECATED)       */
+/* ========================================= */
+
+/*!
+
+    This API is deprecated in favor of the regular compression API.
+    You can get the frame header down to 2 bytes by setting:
+      - ZSTD_c_format = ZSTD_f_zstd1_magicless
+      - ZSTD_c_contentSizeFlag = 0
+      - ZSTD_c_checksumFlag = 0
+      - ZSTD_c_dictIDFlag = 0
+
+    This API is not as well tested as our normal API, so we recommend not using it.
+    We will be removing it in a future version. If the normal API doesn't provide
+    the functionality you need, please open a GitHub issue.
+
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
+
+/*=====   Raw zstd block functions  =====*/
+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
+ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx);
+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
+ZSTDLIB_STATIC_API size_t
+ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(
+  ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.")
+ZSTDLIB_STATIC_API size_t ZSTD_insertBlock(
+  ZSTD_DCtx*  dctx,
+  const void* blockStart,
+  size_t
+    blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+
+#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/src/external/zstd_errors.h b/src/external/zstd_errors.h
new file mode 100644
index 00000000..f9ed52c1
--- /dev/null
+++ b/src/external/zstd_errors.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*===== dependency =====*/
+#include <stddef.h> /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#ifndef ZSTDERRORLIB_VISIBLE
+    /* Backwards compatibility with old macro name */
+    #ifdef ZSTDERRORLIB_VISIBILITY
+        #define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY
+    #elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+        #define ZSTDERRORLIB_VISIBLE __attribute__((visibility("default")))
+    #else
+        #define ZSTDERRORLIB_VISIBLE
+    #endif
+#endif
+
+#ifndef ZSTDERRORLIB_HIDDEN
+    #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+        #define ZSTDERRORLIB_HIDDEN __attribute__((visibility("hidden")))
+    #else
+        #define ZSTDERRORLIB_HIDDEN
+    #endif
+#endif
+
+#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT == 1)
+    #define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE
+#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT == 1)
+    #define ZSTDERRORLIB_API \
+        __declspec(dllimport) \
+        ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+    #define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
+#endif
+
+/*-*********************************************
+ *  Error codes list
+ *-*********************************************
+ *  Error codes _values_ are pinned down since v1.3.1 only.
+ *  Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ *  Only values < 100 are considered stable.
+ *
+ *  note 1 : this API shall be used with static linking only.
+ *           dynamic linking is not yet officially supported.
+ *  note 2 : Prefer relying on the enum than on its value whenever possible
+ *           This is the only supported way to use the error list < v1.3.1
+ *  note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+    ZSTD_error_no_error                          = 0,
+    ZSTD_error_GENERIC                           = 1,
+    ZSTD_error_prefix_unknown                    = 10,
+    ZSTD_error_version_unsupported               = 12,
+    ZSTD_error_frameParameter_unsupported        = 14,
+    ZSTD_error_frameParameter_windowTooLarge     = 16,
+    ZSTD_error_corruption_detected               = 20,
+    ZSTD_error_checksum_wrong                    = 22,
+    ZSTD_error_literals_headerWrong              = 24,
+    ZSTD_error_dictionary_corrupted              = 30,
+    ZSTD_error_dictionary_wrong                  = 32,
+    ZSTD_error_dictionaryCreation_failed         = 34,
+    ZSTD_error_parameter_unsupported             = 40,
+    ZSTD_error_parameter_combination_unsupported = 41,
+    ZSTD_error_parameter_outOfBound              = 42,
+    ZSTD_error_tableLog_tooLarge                 = 44,
+    ZSTD_error_maxSymbolValue_tooLarge           = 46,
+    ZSTD_error_maxSymbolValue_tooSmall           = 48,
+    ZSTD_error_stabilityCondition_notRespected   = 50,
+    ZSTD_error_stage_wrong                       = 60,
+    ZSTD_error_init_missing                      = 62,
+    ZSTD_error_memory_allocation                 = 64,
+    ZSTD_error_workSpace_tooSmall                = 66,
+    ZSTD_error_dstSize_tooSmall                  = 70,
+    ZSTD_error_srcSize_wrong                     = 72,
+    ZSTD_error_dstBuffer_null                    = 74,
+    ZSTD_error_noForwardProgress_destFull        = 80,
+    ZSTD_error_noForwardProgress_inputEmpty      = 82,
+    /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+    ZSTD_error_frameIndex_tooLarge       = 100,
+    ZSTD_error_seekableIO                = 102,
+    ZSTD_error_dstBuffer_wrong           = 104,
+    ZSTD_error_srcBuffer_wrong           = 105,
+    ZSTD_error_sequenceProducer_failed   = 106,
+    ZSTD_error_externalSequences_invalid = 107,
+    ZSTD_error_maxCode =
+      120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char*    ZSTD_getErrorString(
+     ZSTD_ErrorCode
+       code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/src/misc.cpp b/src/misc.cpp
index ded2e620..14c154fa 100644
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -32,7 +32,7 @@
 #include <string_view>
 
 #include "types.h"
-#include "external/zip.h"
+#include "external/zstd.h"
 
 namespace Stockfish {
 
@@ -502,23 +502,37 @@ std::string CommandLine::get_working_directory() {
     return workingDirectory;
 }
 
-std::stringstream read_zipped_nnue(const std::string& fpath) {
-    void*  buf     = NULL;
-    size_t bufsize = 0;
+std::stringstream read_compressed_nnue(const std::string& fpath) {
+    std::stringstream ss;
+
+    std::ifstream fin(fpath, std::ios::binary);
+    if (!fin)
+        return ss;
+    std::vector<char> buffIn(ZSTD_DStreamInSize()), buffOut(ZSTD_DStreamOutSize());
+    ZSTD_DCtx* const  dctx = ZSTD_createDCtx();
+    if (!dctx)
+        return ss;
 
-    struct zip_t* zip = zip_open(fpath.c_str(), 0, 'r');
-    if (zip_entries_total(zip) == 1)
+    while (fin.read(buffIn.data(), buffIn.size()) || fin.gcount() > 0)
     {
-        zip_entry_openbyindex(zip, 0);
-        { zip_entry_read(zip, &buf, &bufsize); }
-        zip_entry_close(zip);
+        size_t        read  = static_cast<size_t>(fin.gcount());
+        ZSTD_inBuffer input = {buffIn.data(), read, 0};
+
+        while (input.pos < input.size)
+        {
+            ZSTD_outBuffer output = {buffOut.data(), buffOut.size(), 0};
+            size_t const   ret    = ZSTD_decompressStream(dctx, &output, &input);
+            if (ZSTD_isError(ret))
+            {
+                ZSTD_freeDCtx(dctx);
+                return ss;
+            }
+
+            ss.write(buffOut.data(), output.pos);
+        }
     }
-    zip_close(zip);
 
-    std::stringstream ss;
-    if (buf)
-        ss.write((const char*) buf, bufsize);
-    free(buf);
+    ZSTD_freeDCtx(dctx);
 
     return ss;
 }
diff --git a/src/misc.h b/src/misc.h
index 22e04586..ec23bcd8 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -47,7 +47,7 @@ void start_logger(const std::string& fname);
 
 size_t str_to_size_t(const std::string& s);
 
-std::stringstream read_zipped_nnue(const std::string& fpath);
+std::stringstream read_compressed_nnue(const std::string& fpath);
 
 #if defined(__linux__)
 
diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp
index 5be4ac94..fb055380 100644
--- a/src/nnue/network.cpp
+++ b/src/nnue/network.cpp
@@ -233,15 +233,9 @@ NnueEvalTrace Network::trace_evaluate(const Position& pos, AccumulatorCaches::Ca
 
 
 void Network::load_user_net(const std::string& dir, const std::string& evalfilePath) {
-    std::stringstream sstream     = read_zipped_nnue(dir + evalfilePath);
+    std::stringstream sstream     = read_compressed_nnue(dir + evalfilePath);
     auto              description = load(sstream);
 
-    if (!description.has_value())
-    {
-        std::ifstream stream(dir + evalfilePath, std::ios::binary);
-        description = load(stream);
-    }
-
     if (description.has_value())
     {
         evalFile.current        = evalfilePath;