diff --git a/lz4/LICENSE b/lz4/LICENSE
new file mode 100644
index 0000000..b566df3
--- /dev/null
+++ b/lz4/LICENSE
@@ -0,0 +1,24 @@
+LZ4 Library
+Copyright (c) 2011-2014, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/lz4/Makefile b/lz4/Makefile
index bc9cec5..faffb39 100644
--- a/lz4/Makefile
+++ b/lz4/Makefile
@@ -1,14 +1,117 @@
-UNAME := $(shell uname)
+# ################################################################
+# LZ4 library - Makefile
+# Copyright (C) Yann Collet 2011-2015
+# All rights reserved.
+# 
+# BSD license
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+# 
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright notice, this
+#   list of conditions and the following disclaimer in the documentation and/or
+#   other materials provided with the distribution.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+# You can contact the author at :
+#  - LZ4 source repository : http://code.google.com/p/lz4/
+#  - LZ4 source mirror : https://github.com/Cyan4973/lz4
+#  - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
+# ################################################################
 
-LIBTYPE=so
+# Version numbers
+VERSION ?= 126
+LIBVER_MAJOR=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER_MINOR=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER_PATCH=`sed -n '/define LZ4_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz4.h`
+LIBVER=$(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
 
-all: clean build
+DESTDIR?=
+PREFIX ?= /usr
+CFLAGS ?= -O3
+CFLAGS += -I. -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -pedantic
+
+LIBDIR?= $(PREFIX)/lib
+INCLUDEDIR=$(PREFIX)/include
+
+
+# OS X linker doesn't support -soname, and use different extension
+# see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html
+ifeq ($(shell uname), Darwin)
+	SHARED_EXT = dylib
+	SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT)
+	SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT)
+	SONAME_FLAGS = -install_name $(PREFIX)/lib/liblz4.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER)
+else
+	SONAME_FLAGS = -Wl,-soname=liblz4.$(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT = so
+	SHARED_EXT_MAJOR = $(SHARED_EXT).$(LIBVER_MAJOR)
+	SHARED_EXT_VER = $(SHARED_EXT).$(LIBVER)
+endif
+
+default: liblz4
+
+all: liblz4
+
+liblz4: lz4.c lz4hc.c lz4frame.c xxhash.c
+	@echo compiling static library
+	@$(CC) $(CPPFLAGS) $(CFLAGS) -c $^
+	@$(AR) rcs liblz4.a lz4.o lz4hc.o lz4frame.o xxhash.o
+	@echo compiling dynamic library $(LIBVER)
+	@$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -shared $^ -fPIC $(SONAME_FLAGS) -o $@.$(SHARED_EXT_VER)
+	@echo creating versioned links
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT_MAJOR)
+	@ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT)
 
 clean:
-	rm -f *.so *.dylib *.gch
+	@rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* liblz4.pc
+	@echo Cleaning library completed
+
+
+#------------------------------------------------------------------------
+#make install is validated only for Linux, OSX, kFreeBSD and Hurd targets
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU))
+
+liblz4.pc: liblz4.pc.in Makefile
+	@echo creating pkgconfig
+	@sed -e 's|@PREFIX@|$(PREFIX)|' \
+            -e 's|@LIBDIR@|$(LIBDIR)|' \
+            -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \
+            -e 's|@VERSION@|$(VERSION)|' \
+             $< >$@
+
+install: liblz4 liblz4.pc
+	@install -d -m 755 $(DESTDIR)$(LIBDIR)/pkgconfig/ $(DESTDIR)$(INCLUDEDIR)/
+	@install -m 755 liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER)
+	@cp -a liblz4.$(SHARED_EXT_MAJOR) $(DESTDIR)$(LIBDIR)
+	@cp -a liblz4.$(SHARED_EXT) $(DESTDIR)$(LIBDIR)
+	@cp -a liblz4.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
+	@install -m 644 liblz4.a $(DESTDIR)$(LIBDIR)/liblz4.a
+	@install -m 644 lz4.h $(DESTDIR)$(INCLUDEDIR)/lz4.h
+	@install -m 644 lz4hc.h $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
+	@install -m 644 lz4frame.h $(DESTDIR)$(INCLUDEDIR)/lz4frame.h
+	@echo lz4 static and shared library installed
 
-build:
-	gcc -O3 -shared -fPIC -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration -o liblz4.$(LIBTYPE) lz4.h lz4hc.h lz4.c lz4hc.c lz4_offset_hack.c
+uninstall:
+	@rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT)
+	@rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR)
+	@rm -f $(DESTDIR)$(LIBDIR)/pkgconfig/liblz4.pc
+	@[ -x $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER)
+	@[ -f $(DESTDIR)$(LIBDIR)/liblz4.a ] && rm -f $(DESTDIR)$(LIBDIR)/liblz4.a
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/lz4.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/lz4hc.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4hc.h
+	@echo lz4 libraries successfully uninstalled
 
-debug:
-	gcc -O3 -shared -fPIC -g -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration -o liblz4.$(LIBTYPE) lz4.h lz4hc.h lz4.c lz4hc.c lz4_offset_hack.c
+endif
diff --git a/lz4/__init__.py b/lz4/__init__.py
index 9b66108..80fc3c8 100644
--- a/lz4/__init__.py
+++ b/lz4/__init__.py
@@ -1,87 +1,95 @@
-import cffi, platform, os
+from cffi import FFI
+from os.path import dirname
+from os.path import join as pthjoin
+from os.path import relpath
 
-libtype = "so"
+ffi = FFI()
 
-ffi = cffi.FFI()
+ffi.cdef("int LZ4_compress   (const char*, char*, int);")
+ffi.cdef("int LZ4_decompress_safe (const char*, char*, int, int);")
+ffi.cdef("int LZ4_compressHC (const char*, char*, int);")
+ffi.cdef("int LZ4_compressBound (int);")
 
-ffi.cdef("int LZ4_compress   (const char* source, char* dest, int isize);")
-ffi.cdef("int LZ4_uncompress (const char* source, char* dest, int osize);")
-ffi.cdef("int LZ4_compressHC (const char* source, char* dest, int isize);")
-ffi.cdef("int LZ4_uncompress_offset (const char* source, char* dest, int osize, int offset);")
+path = dirname(__file__)
+lz4_files = ['lz4.c', 'lz4hc.c', 'xxhash.c']
+sources = [relpath(pthjoin(path, p)) for p in lz4_files]
+_lz4 = ffi.verify(sources=sources)
 
-candidate_names = [os.path.join(os.path.dirname(os.path.abspath(__file__)), "%s.%s" % (name, libtype)) for name in
-    ['liblz4', 'liblz4.pypy-20']
-]
-existing_names = [name for name in candidate_names if os.path.exists(name)]
-assert len(existing_names) >= 1, "Can't find compiled library."
 
-liblz4 = ffi.dlopen(existing_names[0])
+def _store_le32(c, x):
+    c[0] = chr(x & 0xff)
+    c[1] = chr((x >> 8) & 0xff)
+    c[2] = chr((x >> 16) & 0xff)
+    c[3] = chr((x >> 24) & 0xff)
 
-def LZ4_compressBound(isize):
-    return isize + (isize / 255) + 16
 
-def store_le32(c, x):
-    c[0] = chr(x & 0xff);
-    c[1] = chr((x >> 8) & 0xff);
-    c[2] = chr((x >> 16) & 0xff);
-    c[3] = chr((x >> 24) & 0xff);
-
-def load_le32(c):
-    return ord(c[0]) | (ord(c[1]) << 8) | (ord(c[2]) << 16) | (ord(c[3]) << 24)
+def _load_le32(c):
+    return ord(c[0])      | \
+        (ord(c[1]) << 8)  | \
+        (ord(c[2]) << 16) | \
+        (ord(c[3]) << 24)
 
 HDR_SIZE = 4
 INT_MAX = 2 ** 31
 
+
 def _compress_with(compressor, source):
     source_size = len(source)
-    
-    dest_size = HDR_SIZE + LZ4_compressBound(source_size)
+
+    dest_size = HDR_SIZE + _lz4.LZ4_compressBound(source_size)
     dest = ffi.new("char[]", dest_size)
     actual_size = 0
 
-    store_le32(dest, source_size)
+    _store_le32(dest, source_size)
     if source_size > 0:
         osize = compressor(source, dest + HDR_SIZE, source_size)
         actual_size = HDR_SIZE + osize
-        # ideally we would resize the buffer now, but I don't know how to do that with cffi
-        
+
     return (dest, actual_size)
 
+
 def _compress(source):
-    return _compress_with(liblz4.LZ4_compress, source)
+    return _compress_with(_lz4.LZ4_compress, source)
 
-def _compressHC(source):
-    return _compress_with(liblz4.LZ4_compressHC, source)
 
-def compress(source):
-    data, size = _compress(source)
-    return ffi.buffer(data, size)[:]
+def _compressHC(source):
+    return _compress_with(_lz4.LZ4_compressHC, source)
 
-def compressHC(source):
-    data, size = _compressHC(source)
-    return ffi.buffer(data, size)[:]
 
 def _uncompress(source):
     source_size = len(source)
 
     if source_size < HDR_SIZE:
         raise ValueError("input too short")
-    
-    dest_size = load_le32(source)
-    if dest_size > INT_MAX:
-        raise ValueError("invalid size in header: 0x%x" % dest_size)
-
-    if dest_size > 0:
-        dest = ffi.new("char[]", dest_size)
-        osize = liblz4.LZ4_uncompress_offset(source, dest, dest_size, HDR_SIZE)
 
-        if osize < 0:
-            raise ValueError("corrupt input at byte %d" % -osize)
-        elif osize < source_size - HDR_SIZE:
-            raise ValueError("decompression incomplete")
+    dest_size = _load_le32(source)
+    if dest_size > INT_MAX or dest_size < 0:
+        raise ValueError("invalid size in header: %d - 0x%x"
+                         % (dest_size, dest_size))
 
+    dest = ffi.new("char[]", dest_size)
+    osize = _lz4.LZ4_decompress_safe(source[4:], dest,
+                                     source_size - HDR_SIZE, dest_size)
+    if osize < 0:
+        raise ValueError("corrupt input at byte %d" % -osize)
     return (dest, dest_size)
 
+
+def compress(source):
+    data, size = _compress(source)
+    return ffi.buffer(data, size)[:]
+
+
+def compressHC(source):
+    data, size = _compressHC(source)
+    return ffi.buffer(data, size)[:]
+
+
 def uncompress(source):
     data, size = _uncompress(source)
-    return ffi.buffer(data, size)[:]
\ No newline at end of file
+    return ffi.buffer(data, size)[:]
+
+loads = uncompress
+dumps = compress
+
+__all__ = ['compress', 'compressHC', 'uncompress', 'loads', 'dumps']
diff --git a/lz4/lz4.c b/lz4/lz4.c
index 1f2eafd..ed928ce 100644
--- a/lz4/lz4.c
+++ b/lz4/lz4.c
@@ -1,906 +1,1367 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2012, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
-*/
-
-//**************************************
-// Tuning parameters
-//**************************************
-// MEMORY_USAGE :
-// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-// Increasing memory usage improves compression ratio
-// Reduced memory usage can improve speed, due to cache effect
-// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
-#define MEMORY_USAGE 14
-
-// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
-// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
-// You can set this option to 1 in situations where data will remain within closed environment
-// This option is useless on Little_Endian CPU (such as x86)
-//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
-
-
-
-//**************************************
-// CPU Feature Detection
-//**************************************
-// 32 or 64 bits ?
-#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) )   // Detects 64 bits mode
-#  define LZ4_ARCH64 1
-#else
-#  define LZ4_ARCH64 0
-#endif
-
-// Little Endian or Big Endian ?
-// Overwrite the #define below if you know your architecture endianess
-#if defined (__GLIBC__)
-#  include <endian.h>
-#  if (__BYTE_ORDER == __BIG_ENDIAN)
-#     define LZ4_BIG_ENDIAN 1
-#  endif
-#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
-#  define LZ4_BIG_ENDIAN 1
-#elif defined(__sparc) || defined(__sparc__) \
-   || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
-   || defined(__hpux)  || defined(__hppa) \
-   || defined(_MIPSEB) || defined(__s390__)
-#  define LZ4_BIG_ENDIAN 1
-#else
-// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
-#endif
-
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
-#if defined(__ARM_FEATURE_UNALIGNED)
-#  define LZ4_FORCE_UNALIGNED_ACCESS 1
-#endif
-
-// Define this parameter if your target system or compiler does not support hardware bit count
-#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
-#  define LZ4_FORCE_SW_BITCOUNT
-#endif
-
-
-//**************************************
-// Compiler Options
-//**************************************
-#if __STDC_VERSION__ >= 199901L   // C99
-/* "restrict" is a known keyword */
-#else
-#  define restrict // Disable restrict
-#endif
-
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-#ifdef _MSC_VER  // Visual Studio
-#  include <intrin.h>   // For Visual 2005
-#  if LZ4_ARCH64	// 64-bit
-#    pragma intrinsic(_BitScanForward64) // For Visual 2005
-#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
-#  else
-#    pragma intrinsic(_BitScanForward)   // For Visual 2005
-#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
-#  endif
-#endif
-
-#ifdef _MSC_VER
-#  define lz4_bswap16(x) _byteswap_ushort(x)
-#else
-#  define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
-#endif
-
-#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
-#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
-#else
-#  define expect(expr,value)    (expr)
-#endif
-
-#define likely(expr)     expect((expr) != 0, 1)
-#define unlikely(expr)   expect((expr) != 0, 0)
-
-
-//**************************************
-// Includes
-//**************************************
-#include <stdlib.h>   // for malloc
-#include <string.h>   // for memset
-#include "lz4.h"
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined(_MSC_VER)    // Visual Studio does not support 'stdint' natively
-#  define BYTE	unsigned __int8
-#  define U16		unsigned __int16
-#  define U32		unsigned __int32
-#  define S32		__int32
-#  define U64		unsigned __int64
-#else
-#  include <stdint.h>
-#  define BYTE	uint8_t
-#  define U16		uint16_t
-#  define U32		uint32_t
-#  define S32		int32_t
-#  define U64		uint64_t
-#endif
-
-#ifndef LZ4_FORCE_UNALIGNED_ACCESS
-#  pragma pack(push, 1)
-#endif
-
-typedef struct _U16_S { U16 v; } U16_S;
-typedef struct _U32_S { U32 v; } U32_S;
-typedef struct _U64_S { U64 v; } U64_S;
-
-#ifndef LZ4_FORCE_UNALIGNED_ACCESS
-#  pragma pack(pop)
-#endif
-
-#define A64(x) (((U64_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A16(x) (((U16_S *)(x))->v)
-
-
-//**************************************
-// Constants
-//**************************************
-#define MINMATCH 4
-
-#define HASH_LOG (MEMORY_USAGE-2)
-#define HASHTABLESIZE (1 << HASH_LOG)
-#define HASH_MASK (HASHTABLESIZE - 1)
-
-// NOTCOMPRESSIBLE_DETECTIONLEVEL :
-// Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
-// This may decrease compression ratio dramatically, but will be faster on incompressible data
-// Increasing this value will make the algorithm search more before declaring a segment "incompressible"
-// This could improve compression a bit, but will be slower on incompressible data
-// The default value (6) is recommended
-#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
-#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
-#define STACKLIMIT 13
-#define HEAPMODE (HASH_LOG>STACKLIMIT)  // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
-#define COPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
-#define MINLENGTH (MFLIMIT+1)
-
-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
-
-#define ML_BITS  4
-#define ML_MASK  ((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
-
-
-//**************************************
-// Architecture-specific macros
-//**************************************
-#if LZ4_ARCH64	// 64-bit
-#  define STEPSIZE 8
-#  define UARCH U64
-#  define AARCH A64
-#  define LZ4_COPYSTEP(s,d)       A64(d) = A64(s); d+=8; s+=8;
-#  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d)
-#  define LZ4_SECURECOPY(s,d,e)   if (d<e) LZ4_WILDCOPY(s,d,e)
-#  define HTYPE                   U32
-#  define INITBASE(base)          const BYTE* const base = ip
-#else		// 32-bit
-#  define STEPSIZE 4
-#  define UARCH U32
-#  define AARCH A32
-#  define LZ4_COPYSTEP(s,d)       A32(d) = A32(s); d+=4; s+=4;
-#  define LZ4_COPYPACKET(s,d)     LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
-#  define LZ4_SECURECOPY          LZ4_WILDCOPY
-#  define HTYPE                   const BYTE*
-#  define INITBASE(base)          const int base = 0
-#endif
-
-#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
-#else		// Little Endian
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
-#endif
-
-
-//**************************************
-// Local structures
-//**************************************
-struct refTables
-{
-    HTYPE hashTable[HASHTABLESIZE];
-};
-
-
-//**************************************
-// Macros
-//**************************************
-#define LZ4_HASH_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
-#define LZ4_HASH_VALUE(p)		LZ4_HASH_FUNCTION(A32(p))
-#define LZ4_WILDCOPY(s,d,e)		do { LZ4_COPYPACKET(s,d) } while (d<e);
-#define LZ4_BLINDCOPY(s,d,l)	{ BYTE* e=(d)+l; LZ4_WILDCOPY(s,d,e); d=e; }
-
-
-//****************************
-// Private functions
-//****************************
-#if LZ4_ARCH64
-
-static inline int LZ4_NbCommonBytes (register U64 val)
-{
-#if defined(LZ4_BIG_ENDIAN)
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse64( &r, val );
-    return (int)(r>>3);
-    #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clzll(val) >> 3);
-    #else
-    int r;
-    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-    r += (!val);
-    return r;
-    #endif
-#else
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanForward64( &r, val );
-    return (int)(r>>3);
-    #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctzll(val) >> 3);
-    #else
-    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-    return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
-    #endif
-#endif
-}
-
-#else
-
-static inline int LZ4_NbCommonBytes (register U32 val)
-{
-#if defined(LZ4_BIG_ENDIAN)
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse( &r, val );
-    return (int)(r>>3);
-    #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clz(val) >> 3);
-    #else
-    int r;
-    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-    r += (!val);
-    return r;
-    #endif
-#else
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r;
-    _BitScanForward( &r, val );
-    return (int)(r>>3);
-    #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctz(val) >> 3);
-    #else
-    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-    #endif
-#endif
-}
-
-#endif
-
-
-
-//******************************
-// Compression functions
-//******************************
-
-// LZ4_compressCtx :
-// -----------------
-// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-// If it cannot achieve it, compression will stop, and result of the function will be zero.
-// return : the number of bytes written in buffer 'dest', or 0 if the compression fails
-
-static inline int LZ4_compressCtx(void** ctx,
-                 const char* source,
-                 char* dest,
-                 int isize,
-                 int maxOutputSize)
-{
-#if HEAPMODE
-    struct refTables *srt = (struct refTables *) (*ctx);
-    HTYPE* HashTable;
-#else
-    HTYPE HashTable[HASHTABLESIZE] = {0};
-#endif
-
-    const BYTE* ip = (BYTE*) source;
-    INITBASE(base);
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + isize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-#define matchlimit (iend - LASTLITERALS)
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-
-    int length;
-    const int skipStrength = SKIPSTRENGTH;
-    U32 forwardH;
-
-
-    // Init
-    if (isize<MINLENGTH) goto _last_literals;
-#if HEAPMODE
-    if (*ctx == NULL)
-    {
-        srt = (struct refTables *) malloc ( sizeof(struct refTables) );
-        *ctx = (void*) srt;
-    }
-    HashTable = (HTYPE*)(srt->hashTable);
-    memset((void*)HashTable, 0, sizeof(srt->hashTable));
-#else
-    (void) ctx;
-#endif
-
-
-    // First Byte
-    HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
-    ip++; forwardH = LZ4_HASH_VALUE(ip);
-
-    // Main Loop
-    for ( ; ; )
-    {
-        int findMatchAttempts = (1U << skipStrength) + 3;
-        const BYTE* forwardIp = ip;
-        const BYTE* ref;
-        BYTE* token;
-
-        // Find a match
-        do {
-            U32 h = forwardH;
-            int step = findMatchAttempts++ >> skipStrength;
-            ip = forwardIp;
-            forwardIp = ip + step;
-
-            if unlikely(forwardIp > mflimit) { goto _last_literals; }
-
-            forwardH = LZ4_HASH_VALUE(forwardIp);
-            ref = base + HashTable[h];
-            HashTable[h] = ip - base;
-
-        } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
-
-        // Catch up
-        while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
-
-        // Encode Literal length
-        length = (int)(ip - anchor);
-        token = op++;
-        if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
-#ifdef _MSC_VER
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len = length-RUN_MASK; 
-            *token=(RUN_MASK<<ML_BITS); 
-            if (len>254)
-            {
-                do { *op++ = 255; len -= 255; } while (len>254);
-                *op++ = (BYTE)len; 
-                memcpy(op, anchor, length);
-                op += length;
-                goto _next_match;
-            }
-            else
-            *op++ = (BYTE)len; 
-        }
-        else *token = (length<<ML_BITS);
-#else
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len;
-            *token=(RUN_MASK<<ML_BITS); 
-            len = length-RUN_MASK; 
-            for(; len > 254 ; len-=255) *op++ = 255; 
-            *op++ = (BYTE)len; 
-        }
-        else *token = (length<<ML_BITS);
-#endif
-
-        // Copy Literals
-        LZ4_BLINDCOPY(anchor, op, length);
-
-_next_match:
-        // Encode Offset
-        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
-
-        // Start Counting
-        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
-        anchor = ip;
-        while likely(ip<matchlimit-(STEPSIZE-1))
-        {
-            UARCH diff = AARCH(ref) ^ AARCH(ip);
-            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-            ip += LZ4_NbCommonBytes(diff);
-            goto _endCount;
-        }
-        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-        if ((ip<matchlimit) && (*ref == *ip)) ip++;
-_endCount:
-
-        // Encode MatchLength
-        length = (int)(ip - anchor);
-        if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
-        if (length>=(int)ML_MASK) 
-        { 
-            *token += ML_MASK; 
-            length -= ML_MASK; 
-            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
-            if (length > 254) { length-=255; *op++ = 255; } 
-            *op++ = (BYTE)length; 
-        }
-        else *token += length;
-
-        // Test end of chunk
-        if (ip > mflimit) { anchor = ip;  break; }
-
-        // Fill table
-        HashTable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
-
-        // Test next position
-        ref = base + HashTable[LZ4_HASH_VALUE(ip)];
-        HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
-        if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
-
-        // Prepare next loop
-        anchor = ip++;
-        forwardH = LZ4_HASH_VALUE(ip);
-    }
-
-_last_literals:
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0;
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    }
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-
-// Note : this function is valid only if isize < LZ4_64KLIMIT
-#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
-#define HASHLOG64K (HASH_LOG+1)
-#define HASH64KTABLESIZE (1U<<HASHLOG64K)
-#define LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
-#define LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
-static inline int LZ4_compress64kCtx(void** ctx,
-                 const char* source,
-                 char* dest,
-                 int isize,
-                 int maxOutputSize)
-{
-#if HEAPMODE
-    struct refTables *srt = (struct refTables *) (*ctx);
-    U16* HashTable;
-#else
-    U16 HashTable[HASH64KTABLESIZE] = {0};
-#endif
-
-    const BYTE* ip = (BYTE*) source;
-    const BYTE* anchor = ip;
-    const BYTE* const base = ip;
-    const BYTE* const iend = ip + isize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-#define matchlimit (iend - LASTLITERALS)
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-
-    int len, length;
-    const int skipStrength = SKIPSTRENGTH;
-    U32 forwardH;
-
-
-    // Init
-    if (isize<MINLENGTH) goto _last_literals;
-#if HEAPMODE
-    if (*ctx == NULL)
-    {
-        srt = (struct refTables *) malloc ( sizeof(struct refTables) );
-        *ctx = (void*) srt;
-    }
-    HashTable = (U16*)(srt->hashTable);
-    memset((void*)HashTable, 0, sizeof(srt->hashTable));
-#else
-    (void) ctx;
-#endif
-
-
-    // First Byte
-    ip++; forwardH = LZ4_HASH64K_VALUE(ip);
-
-    // Main Loop
-    for ( ; ; )
-    {
-        int findMatchAttempts = (1U << skipStrength) + 3;
-        const BYTE* forwardIp = ip;
-        const BYTE* ref;
-        BYTE* token;
-
-        // Find a match
-        do {
-            U32 h = forwardH;
-            int step = findMatchAttempts++ >> skipStrength;
-            ip = forwardIp;
-            forwardIp = ip + step;
-
-            if (forwardIp > mflimit) { goto _last_literals; }
-
-            forwardH = LZ4_HASH64K_VALUE(forwardIp);
-            ref = base + HashTable[h];
-            HashTable[h] = (U16)(ip - base);
-
-        } while (A32(ref) != A32(ip));
-
-        // Catch up
-        while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
-
-        // Encode Literal length
-        length = (int)(ip - anchor);
-        token = op++;
-        if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; 		// Check output limit
-#ifdef _MSC_VER
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len = length-RUN_MASK; 
-            *token=(RUN_MASK<<ML_BITS); 
-            if (len>254)
-            {
-                do { *op++ = 255; len -= 255; } while (len>254);
-                *op++ = (BYTE)len; 
-                memcpy(op, anchor, length);
-                op += length;
-                goto _next_match;
-            }
-            else
-            *op++ = (BYTE)len; 
-        }
-        else *token = (length<<ML_BITS);
-#else
-        if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
-        else *token = (length<<ML_BITS);
-#endif
-
-        // Copy Literals
-        LZ4_BLINDCOPY(anchor, op, length);
-
-_next_match:
-        // Encode Offset
-        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
-
-        // Start Counting
-        ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
-        anchor = ip;
-        while (ip<matchlimit-(STEPSIZE-1))
-        {
-            UARCH diff = AARCH(ref) ^ AARCH(ip);
-            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-            ip += LZ4_NbCommonBytes(diff);
-            goto _endCount;
-        }
-        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-        if ((ip<matchlimit) && (*ref == *ip)) ip++;
-_endCount:
-
-        // Encode MatchLength
-        len = (int)(ip - anchor);
-        if unlikely(op + (1 + LASTLITERALS) + (len>>8) > oend) return 0; 		// Check output limit
-        if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
-        else *token += len;
-
-        // Test end of chunk
-        if (ip > mflimit) { anchor = ip;  break; }
-
-        // Fill table
-        HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
-
-        // Test next position
-        ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
-        HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
-        if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
-
-        // Prepare next loop
-        anchor = ip++;
-        forwardH = LZ4_HASH64K_VALUE(ip);
-    }
-
-_last_literals:
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if (op + lastRun + 1 + (lastRun-RUN_MASK+255)/255 > oend) return 0;
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    }
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-int LZ4_compress_limitedOutput(const char* source, 
-                               char* dest, 
-                               int isize, 
-                               int maxOutputSize)
-{
-#if HEAPMODE
-    void* ctx = malloc(sizeof(struct refTables));
-    int result;
-    if (isize < LZ4_64KLIMIT)
-        result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
-    else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
-    free(ctx);
-    return result;
-#else
-    if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
-    return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
-#endif
-}
-
-
-int LZ4_compress(const char* source,
-                 char* dest,
-                 int isize)
-{
-    return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
-}
-
-
-
-
-//****************************
-// Decompression functions
-//****************************
-
-// Note : The decoding functions LZ4_uncompress() and LZ4_uncompress_unknownOutputSize()
-//		are safe against "buffer overflow" attack type.
-//		They will never write nor read outside of the provided output buffers.
-//      LZ4_uncompress_unknownOutputSize() also insures that it will never read outside of the input buffer.
-//		A corrupted input will produce an error result, a negative int, indicating the position of the error within input stream.
-
-int LZ4_uncompress(const char* source,
-                 char* dest,
-                 int osize)
-{
-    // Local Variables
-    const BYTE* restrict ip = (const BYTE*) source;
-    const BYTE* ref;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + osize;
-    BYTE* cpy;
-
-    unsigned token;
-
-    size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-    size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-
-    // Main Loop
-    while (1)
-    {
-        size_t length;
-
-        // get runlength
-        token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK)  { size_t len; for (;(len=*ip++)==255;length+=255){} length += len; }
-
-        // copy literals
-        cpy = op+length;
-        if (cpy>oend-COPYLENGTH)
-        {
-            if (cpy != oend) goto _output_error;         // Error : not enough place for another match (min 4) + 5 literals
-            memcpy(op, ip, length);
-            ip += length;
-            break;                                       // EOF
-        }
-        LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
-
-        // get offset
-        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-        if unlikely(ref < (BYTE* const)dest) goto _output_error;   // Error : offset outside destination buffer
-
-        // get matchlength
-        if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; }
-
-        // copy repeated sequence
-        if unlikely((op-ref)<STEPSIZE)
-        {
-#if LZ4_ARCH64
-            size_t dec64 = dec64table[op-ref];
-#else
-            const int dec64 = 0;
-#endif
-            op[0] = ref[0];
-            op[1] = ref[1];
-            op[2] = ref[2];
-            op[3] = ref[3];
-            op += 4, ref += 4; ref -= dec32table[op-ref];
-            A32(op) = A32(ref); 
-            op += STEPSIZE-4; ref -= dec64;
-        } else { LZ4_COPYSTEP(ref,op); }
-        cpy = op + length - (STEPSIZE-4);
-
-        if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
-        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
-            LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
-            while(op<cpy) *op++=*ref++;
-            op=cpy;
-            continue;
-        }
-        
-        LZ4_WILDCOPY(ref, op, cpy);
-        op=cpy;		// correction
-    }
-
-    // end of decoding
-    return (int) (((char*)ip)-source);
-
-    // write overflow error detected
-_output_error:
-    return (int) (-(((char*)ip)-source));
-}
-
-
-int LZ4_uncompress_unknownOutputSize(
-                const char* source,
-                char* dest,
-                int isize,
-                int maxOutputSize)
-{
-    // Local Variables
-    const BYTE* restrict ip = (const BYTE*) source;
-    const BYTE* const iend = ip + isize;
-    const BYTE* ref;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-    BYTE* cpy;
-
-    size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-    size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-
-    // Special case
-    if unlikely(ip==iend) goto _output_error;    // A correctly formed null-compressed LZ4 must have at least one byte (token=0)
-
-    // Main Loop
-    while (1) 
-    {
-        unsigned token;
-        size_t length;
-
-        // get runlength
-        token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK) 
-        { 
-            int s=255; 
-            while (likely(ip<iend) && (s==255)) { s=*ip++; length += s; } 
-        }
-
-        // copy literals
-        cpy = op+length;
-        if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS)))
-        {
-            if (cpy > oend) goto _output_error;          // Error : writes beyond output buffer
-            if (ip+length != iend) goto _output_error;   // Error : LZ4 format requires to consume all input at this stage (no match within the last 11 bytes, and at least 8 remaining input bytes for another match+literals)
-            memcpy(op, ip, length);
-            op += length;
-            break;                                       // Necessarily EOF, due to parsing restrictions
-        }
-        LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
-
-        // get offset
-        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-        if unlikely(ref < (BYTE* const)dest) goto _output_error;   // Error : offset outside of destination buffer
-
-        // get matchlength
-        if ((length=(token&ML_MASK)) == ML_MASK)    
-        { 
-            while likely(ip<iend-(LASTLITERALS+1))    // Error : a minimum input bytes must remain for LASTLITERALS + token
-            { 
-                int s = *ip++; 
-                length +=s; 
-                if (s==255) continue; 
-                break; 
-            } 
-        }
-
-        // copy repeated sequence
-        if unlikely(op-ref<STEPSIZE)
-        {
-#if LZ4_ARCH64
-            size_t dec64 = dec64table[op-ref];
-#else
-            const int dec64 = 0;
-#endif
-            op[0] = ref[0];
-            op[1] = ref[1];
-            op[2] = ref[2];
-            op[3] = ref[3];
-            op += 4, ref += 4; ref -= dec32table[op-ref];
-            A32(op) = A32(ref); 
-            op += STEPSIZE-4; ref -= dec64;
-        } else { LZ4_COPYSTEP(ref,op); }
-        cpy = op + length - (STEPSIZE-4);
-
-        if unlikely(cpy>oend-(COPYLENGTH+(STEPSIZE-4)))
-        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
-            LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
-            while(op<cpy) *op++=*ref++;
-            op=cpy;
-            continue;
-        }
-
-        LZ4_WILDCOPY(ref, op, cpy);
-        op=cpy;		// correction
-    }
-
-    // end of decoding
-    return (int) (((char*)op)-dest);
-
-    // write overflow error detected
-_output_error:
-    return (int) (-(((char*)ip)-source));
-}
-
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2015, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : http://code.google.com/p/lz4
+   - LZ4 source mirror : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+/**************************************
+   Tuning parameters
+**************************************/
+/*
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#define HEAPMODE 0
+
+/*
+ * CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS :
+ * By default, the source code expects the compiler to correctly optimize
+ * 4-bytes and 8-bytes read on architectures able to handle it efficiently.
+ * This is not always the case. In some circumstances (ARM notably),
+ * the compiler will issue cautious code even when target is able to correctly handle unaligned memory accesses.
+ *
+ * You can force the compiler to use unaligned memory access by uncommenting the line below.
+ * One of the below scenarios will happen :
+ * 1 - Your target CPU correctly handle unaligned access, and was not well optimized by compiler (good case).
+ *     You will witness large performance improvements (+50% and up).
+ *     Keep the line uncommented and send a word to upstream (https://groups.google.com/forum/#!forum/lz4c)
+ *     The goal is to automatically detect such situations by adding your target CPU within an exception list.
+ * 2 - Your target CPU correctly handle unaligned access, and was already already optimized by compiler
+ *     No change will be experienced.
+ * 3 - Your target CPU inefficiently handle unaligned access.
+ *     You will experience a performance loss. Comment back the line.
+ * 4 - Your target CPU does not handle unaligned access.
+ *     Program will crash.
+ * If uncommenting results in better performance (case 1)
+ * please report your configuration to upstream (https://groups.google.com/forum/#!forum/lz4c)
+ * An automatic detection macro will be added to match your case within future versions of the library.
+ */
+/* #define CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS 1 */
+
+
+/**************************************
+   CPU Feature Detection
+**************************************/
+/*
+ * Automated efficient unaligned memory access detection
+ * Based on known hardware architectures
+ * This list will be updated thanks to feedbacks
+ */
+#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
+    || defined(__ARM_FEATURE_UNALIGNED) \
+    || defined(__i386__) || defined(__x86_64__) \
+    || defined(_M_IX86) || defined(_M_X64) \
+    || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
+    || (defined(_M_ARM) && (_M_ARM >= 7))
+#  define LZ4_UNALIGNED_ACCESS 1
+#else
+#  define LZ4_UNALIGNED_ACCESS 0
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+/**************************************
+   Compiler Options
+**************************************/
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+/* "restrict" is a known keyword */
+#else
+#  define restrict /* Disable restrict */
+#endif
+
+#ifdef _MSC_VER    /* Visual Studio */
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
+#else
+#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif   /* __STDC_VERSION__ */
+#endif  /* _MSC_VER */
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
+#else
+#  define expect(expr,value)    (expr)
+#endif
+
+#define likely(expr)     expect((expr) != 0, 1)
+#define unlikely(expr)   expect((expr) != 0, 0)
+
+
+/**************************************
+   Memory routines
+**************************************/
+#include <stdlib.h>   /* malloc, calloc, free */
+#define ALLOCATOR(n,s) calloc(n,s)
+#define FREEMEM        free
+#include <string.h>   /* memset, memcpy */
+#define MEM_INIT       memset
+
+
+/**************************************
+   Includes
+**************************************/
+#include "lz4.h"
+
+
+/**************************************
+   Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+   Reading and writing into memory
+**************************************/
+#define STEPSIZE sizeof(size_t)
+
+static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }
+
+static unsigned LZ4_isLittleEndian(void)
+{
+    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+    if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian()))
+        return *(U16*)memPtr;
+    else
+    {
+        const BYTE* p = memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian()))
+    {
+        *(U16*)memPtr = value;
+        return;
+    }
+    else
+    {
+        BYTE* p = memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
+
+
+static U16 LZ4_read16(const void* memPtr)
+{
+    if (LZ4_UNALIGNED_ACCESS)
+        return *(U16*)memPtr;
+    else
+    {
+        U16 val16;
+        memcpy(&val16, memPtr, 2);
+        return val16;
+    }
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+    if (LZ4_UNALIGNED_ACCESS)
+        return *(U32*)memPtr;
+    else
+    {
+        U32 val32;
+        memcpy(&val32, memPtr, 4);
+        return val32;
+    }
+}
+
+static U64 LZ4_read64(const void* memPtr)
+{
+    if (LZ4_UNALIGNED_ACCESS)
+        return *(U64*)memPtr;
+    else
+    {
+        U64 val64;
+        memcpy(&val64, memPtr, 8);
+        return val64;
+    }
+}
+
+static size_t LZ4_read_ARCH(const void* p)
+{
+    if (LZ4_64bits())
+        return (size_t)LZ4_read64(p);
+    else
+        return (size_t)LZ4_read32(p);
+}
+
+
+static void LZ4_copy4(void* dstPtr, const void* srcPtr)
+{
+    if (LZ4_UNALIGNED_ACCESS)
+    {
+        *(U32*)dstPtr = *(U32*)srcPtr;
+        return;
+    }
+    memcpy(dstPtr, srcPtr, 4);
+}
+
+static void LZ4_copy8(void* dstPtr, const void* srcPtr)
+{
+#if GCC_VERSION!=409  /* disabled on GCC 4.9, as it generates invalid opcode (crash) */
+    if (LZ4_UNALIGNED_ACCESS)
+    {
+        if (LZ4_64bits())
+            *(U64*)dstPtr = *(U64*)srcPtr;
+        else
+            ((U32*)dstPtr)[0] = ((U32*)srcPtr)[0],
+            ((U32*)dstPtr)[1] = ((U32*)srcPtr)[1];
+        return;
+    }
+#endif
+    memcpy(dstPtr, srcPtr, 8);
+}
+
+/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
+static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = dstPtr;
+    const BYTE* s = srcPtr;
+    BYTE* e = dstEnd;
+    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
+}
+
+
+/**************************************
+   Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define COPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (COPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/**************************************
+   Common Utils
+**************************************/
+#define LZ4_STATIC_ASSERT(c)    { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
+
+
+/********************************
+   Common functions
+********************************/
+static unsigned LZ4_NbCommonBytes (register size_t val)
+{
+    if (LZ4_isLittleEndian())
+    {
+        if (LZ4_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64( &r, (U64)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward( &r, (U32)val );
+            return (int)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    }
+    else   /* Big Endian CPU */
+    {
+        if (LZ4_64bits())
+        {
+#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse64( &r, val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+        else /* 32 bits */
+        {
+#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanReverse( &r, (unsigned long)val );
+            return (unsigned)(r>>3);
+#       elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (__builtin_clz(val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        }
+    }
+}
+
+static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    while (likely(pIn<pInLimit-(STEPSIZE-1)))
+    {
+        size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if (LZ4_64bits()) if ((pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/**************************************
+   Local Constants
+**************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
+
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
+
+
+/**************************************
+   Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+
+
+/**************************************
+   Local Structures and types
+**************************************/
+typedef struct {
+    U32 hashTable[HASH_SIZE_U32];
+    U32 currentOffset;
+    U32 initCheck;
+    const BYTE* dictionary;
+    const BYTE* bufferStart;
+    U32 dictSize;
+} LZ4_stream_t_internal;
+
+typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+
+
+/********************************
+   Compression functions
+********************************/
+
+static U32 LZ4_hashSequence(U32 sequence, tableType_t tableType)
+{
+    if (tableType == byU16)
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+static U32 LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(LZ4_read32(p), tableType); }
+
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+    }
+}
+
+static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+static int LZ4_compress_generic(
+                 void* ctx,
+                 const char* source,
+                 char* dest,
+                 int inputSize,
+                 int maxOutputSize,
+                 limitedOutput_directive outputLimited,
+                 tableType_t tableType,
+                 dict_directive dict,
+                 dictIssue_directive dictIssue)
+{
+    LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx;
+
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* base;
+    const BYTE* lowLimit;
+    const BYTE* const lowRefLimit = ip - dictPtr->dictSize;
+    const BYTE* const dictionary = dictPtr->dictionary;
+    const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
+    const size_t dictDelta = dictEnd - (const BYTE*)source;
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const olimit = op + maxOutputSize;
+
+    U32 forwardH;
+    size_t refDelta=0;
+
+    /* Init conditions */
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;          /* Unsupported input size, too large (or negative) */
+    switch(dict)
+    {
+    case noDict:
+    default:
+        base = (const BYTE*)source;
+        lowLimit = (const BYTE*)source;
+        break;
+    case withPrefix64k:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source - dictPtr->dictSize;
+        break;
+    case usingExtDict:
+        base = (const BYTE*)source - dictPtr->currentOffset;
+        lowLimit = (const BYTE*)source;
+        break;
+    }
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
+    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; )
+    {
+        const BYTE* match;
+        BYTE* token;
+        {
+            const BYTE* forwardIp = ip;
+            unsigned step=1;
+            unsigned searchMatchNb = (1U << LZ4_skipTrigger);
+
+            /* Find a match */
+            do {
+                U32 h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = searchMatchNb++ >> LZ4_skipTrigger;
+
+                if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
+                if (dict==usingExtDict)
+                {
+                    if (match<(const BYTE*)source)
+                    {
+                        refDelta = dictDelta;
+                        lowLimit = dictionary;
+                    }
+                    else
+                    {
+                        refDelta = 0;
+                        lowLimit = (const BYTE*)source;
+                    }
+                }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
+                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
+                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
+        }
+
+        /* Catch up */
+        while ((ip>anchor) && (match+refDelta > lowLimit) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
+
+        {
+            /* Encode Literal length */
+            unsigned litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
+                return 0;   /* Check output limit */
+            if (litLength>=RUN_MASK)
+            {
+                int len = (int)litLength-RUN_MASK;
+                *token=(RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy(op, anchor, op+litLength);
+            op+=litLength;
+        }
+
+_next_match:
+        /* Encode Offset */
+        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
+
+        /* Encode MatchLength */
+        {
+            unsigned matchLength;
+
+            if ((dict==usingExtDict) && (lowLimit==dictionary))
+            {
+                const BYTE* limit;
+                match += refDelta;
+                limit = ip + (dictEnd-match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += MINMATCH + matchLength;
+                if (ip==limit)
+                {
+                    unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+                    matchLength += more;
+                    ip += more;
+                }
+            }
+            else
+            {
+                matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += MINMATCH + matchLength;
+            }
+
+            if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit)))
+                return 0;    /* Check output limit */
+            if (matchLength>=ML_MASK)
+            {
+                *token += ML_MASK;
+                matchLength -= ML_MASK;
+                for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; }
+                if (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+                *op++ = (BYTE)matchLength;
+            }
+            else *token += (BYTE)(matchLength);
+        }
+
+        anchor = ip;
+
+        /* Test end of chunk */
+        if (ip > mflimit) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        /* Test next position */
+        match = LZ4_getPosition(ip, ctx, tableType, base);
+        if (dict==usingExtDict)
+        {
+            if (match<(const BYTE*)source)
+            {
+                refDelta = dictDelta;
+                lowLimit = dictionary;
+            }
+            else
+            {
+                refDelta = 0;
+                lowLimit = (const BYTE*)source;
+            }
+        }
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
+            && (match+MAX_DISTANCE>=ip)
+            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
+        { token=op++; *token=0; goto _next_match; }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((outputLimited) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize))
+            return 0;   /* Check output limit */
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compress(const char* source, char* dest, int inputSize)
+{
+#if (HEAPMODE)
+    void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U64, 8);   /* Aligned on 8-bytes boundaries */
+#else
+    U64 ctx[LZ4_STREAMSIZE_U64] = {0};      /* Ensure data is aligned on 8-bytes boundaries */
+#endif
+    int result;
+
+    if (inputSize < LZ4_64Klimit)
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue);
+    else
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue);
+
+#if (HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+#if (HEAPMODE)
+    void* ctx = ALLOCATOR(LZ4_STREAMSIZE_U64, 8);   /* Aligned on 8-bytes boundaries */
+#else
+    U64 ctx[LZ4_STREAMSIZE_U64] = {0};      /* Ensure data is aligned on 8-bytes boundaries */
+#endif
+    int result;
+
+    if (inputSize < LZ4_64Klimit)
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue);
+    else
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue);
+
+#if (HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+
+/*****************************************
+   Experimental : Streaming functions
+*****************************************/
+
+/*
+ * LZ4_initStream
+ * Use this function once, to init a newly allocated LZ4_stream_t structure
+ * Return : 1 if OK, 0 if error
+ */
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+LZ4_stream_t* LZ4_createStream(void)
+{
+    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    LZ4_resetStream(lz4s);
+    return lz4s;
+}
+
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+    FREEMEM(LZ4_stream);
+    return (0);
+}
+
+
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+    const BYTE* p = (const BYTE*)dictionary;
+    const BYTE* const dictEnd = p + dictSize;
+    const BYTE* base;
+
+    if (dict->initCheck) LZ4_resetStream(LZ4_dict);                         /* Uninitialized structure detected */
+
+    if (dictSize < MINMATCH)
+    {
+        dict->dictionary = NULL;
+        dict->dictSize = 0;
+        return 0;
+    }
+
+    if (p <= dictEnd - 64 KB) p = dictEnd - 64 KB;
+    base = p - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->currentOffset += dict->dictSize;
+
+    while (p <= dictEnd-MINMATCH)
+    {
+        LZ4_putPosition(p, dict, byU32, base);
+        p+=3;
+    }
+
+    return dict->dictSize;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+{
+    if ((LZ4_dict->currentOffset > 0x80000000) ||
+        ((size_t)LZ4_dict->currentOffset > (size_t)src))   /* address space overflow */
+    {
+        /* rescale hash table */
+        U32 delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        for (i=0; i<HASH_SIZE_U32; i++)
+        {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
+
+
+FORCE_INLINE int LZ4_compress_continue_generic (void* LZ4_stream, const char* source, char* dest, int inputSize,
+                                                int maxOutputSize, limitedOutput_directive limit)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = (const BYTE*) source;
+    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
+    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
+    LZ4_renormDictT(streamPtr, smallest);
+
+    /* Check overlapping input/dictionary space */
+    {
+        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd))
+        {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+        }
+    }
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == (const BYTE*)source)
+    {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, dictSmall);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, noDictIssue);
+        streamPtr->dictSize += (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+
+    /* external dictionary mode */
+    {
+        int result;
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, dictSmall);
+        else
+            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, noDictIssue);
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        streamPtr->currentOffset += (U32)inputSize;
+        return result;
+    }
+}
+
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, 0, notLimited);
+}
+
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput);
+}
+
+
+/* Hidden debug function, to force separate dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+{
+    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict;
+    int result;
+    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    const BYTE* smallest = dictEnd;
+    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
+    LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest);
+
+    result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue);
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)inputSize;
+    streamPtr->currentOffset += (U32)inputSize;
+
+    return result;
+}
+
+
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+    const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
+
+    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+
+    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
+}
+
+
+
+/****************************
+   Decompression functions
+****************************/
+/*
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is essential this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+FORCE_INLINE int LZ4_decompress_generic(
+                 const char* const source,
+                 char* const dest,
+                 int inputSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
+                 int partialDecoding,    /* full, partial */
+                 int targetOutputSize,   /* only used if partialDecoding==partial */
+                 int dict,               /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* == dest if dict == noDict */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
+{
+    /* Local Variables */
+    const BYTE* restrict ip = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+    const BYTE* const lowLimit = lowPrefix - dictSize;
+
+    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+    const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
+    const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+
+    const int safeDecode = (endOnInput==endOnInputSize);
+    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+    /* Special cases */
+    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                         /* targetOutputSize too high => decode everything */
+    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
+    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+
+
+    /* Main Loop */
+    while (1)
+    {
+        unsigned token;
+        size_t length;
+        const BYTE* match;
+
+        /* get literal length */
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)
+        {
+            unsigned s;
+            do
+            {
+                s = *ip++;
+                length += s;
+            }
+            while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255));
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
+            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
+        }
+
+        /* copy literals */
+        cpy = op+length;
+        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+        {
+            if (partialDecoding)
+            {
+                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
+            }
+            else
+            {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
+            }
+            memcpy(op, ip, length);
+            ip += length;
+            op += length;
+            break;     /* Necessarily EOF, due to parsing restrictions */
+        }
+        LZ4_wildCopy(op, ip, cpy);
+        ip += length; op = cpy;
+
+        /* get offset */
+        match = cpy - LZ4_readLE16(ip); ip+=2;
+        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside destination buffer */
+
+        /* get matchlength */
+        length = token & ML_MASK;
+        if (length == ML_MASK)
+        {
+            unsigned s;
+            do
+            {
+                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
+                s = *ip++;
+                length += s;
+            } while (s==255);
+            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
+        }
+        length += MINMATCH;
+
+        /* check external dictionary */
+        if ((dict==usingExtDict) && (match < lowPrefix))
+        {
+            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
+
+            if (length <= (size_t)(lowPrefix-match))
+            {
+                /* match can be copied as a single segment from external dictionary */
+                match = dictEnd - (lowPrefix-match);
+                memcpy(op, match, length);
+                op += length;
+            }
+            else
+            {
+                /* match encompass external dictionary and current segment */
+                size_t copySize = (size_t)(lowPrefix-match);
+                memcpy(op, dictEnd - copySize, copySize);
+                op += copySize;
+                copySize = length - copySize;
+                if (copySize > (size_t)(op-lowPrefix))   /* overlap within current segment */
+                {
+                    BYTE* const endOfMatch = op + copySize;
+                    const BYTE* copyFrom = lowPrefix;
+                    while (op < endOfMatch) *op++ = *copyFrom++;
+                }
+                else
+                {
+                    memcpy(op, lowPrefix, copySize);
+                    op += copySize;
+                }
+            }
+            continue;
+        }
+
+        /* copy repeated sequence */
+        cpy = op + length;
+        if (unlikely((op-match)<8))
+        {
+            const size_t dec64 = dec64table[op-match];
+            op[0] = match[0];
+            op[1] = match[1];
+            op[2] = match[2];
+            op[3] = match[3];
+            match += dec32table[op-match];
+            LZ4_copy4(op+4, match);
+            op += 8; match -= dec64;
+        } else { LZ4_copy8(op, match); op+=8; match+=8; }
+
+        if (unlikely(cpy>oend-12))
+        {
+            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals */
+            if (op < oend-8)
+            {
+                LZ4_wildCopy(op, match, oend-8);
+                match += (oend-8) - op;
+                op = oend-8;
+            }
+            while (op<cpy) *op++ = *match++;
+        }
+        else
+            LZ4_wildCopy(op, match, cpy);
+        op=cpy;   /* correction */
+    }
+
+    /* end of decoding */
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
+    else
+       return (int) (((char*)ip)-source);   /* Nb of input bytes read */
+
+    /* Overflow error detected */
+_output_error:
+    return (int) (-(((char*)ip)-source))-1;
+}
+
+
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
+}
+
+int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
+}
+
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
+}
+
+
+/* streaming decompression functions */
+
+typedef struct
+{
+    BYTE* externalDict;
+    size_t extDictSize;
+    BYTE* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+/*
+ * If you prefer dynamic allocation methods,
+ * LZ4_createStreamDecode()
+ * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
+ */
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(sizeof(U64), LZ4_STREAMDECODESIZE_U64);
+    return lz4s;
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+    FREEMEM(LZ4_stream);
+    return 0;
+}
+
+/*
+ * LZ4_setStreamDecode
+ * Use this function to instruct where to find the dictionary
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    lz4sd->prefixSize = (size_t) dictSize;
+    lz4sd->prefixEnd = (BYTE*) dictionary + dictSize;
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
+}
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest)
+    {
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += result;
+        lz4sd->prefixEnd  += result;
+    }
+    else
+    {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                        endOnInputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+    int result;
+
+    if (lz4sd->prefixEnd == (BYTE*)dest)
+    {
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    }
+    else
+    {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize;
+        result = LZ4_decompress_generic(source, dest, 0, originalSize,
+                                        endOnOutputSize, full, 0,
+                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as "_continue" ones,
+    the dictionary must be explicitly provided within parameters
+*/
+
+FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
+{
+    if (dictSize==0)
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
+    if (dictStart+dictSize == dest)
+    {
+        if (dictSize >= (int)(64 KB - 1))
+            return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
+        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
+    }
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (BYTE*)dictStart, dictSize);
+}
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
+}
+
+/* debug function */
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (BYTE*)dictStart, dictSize);
+}
+
+
+/***************************************************
+    Obsolete Functions
+***************************************************/
+/*
+These function names are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
+
+static void LZ4_init(LZ4_stream_t_internal* lz4ds, const BYTE* base)
+{
+    MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE);
+    lz4ds->bufferStart = base;
+}
+
+int LZ4_resetStreamState(void* state, const char* inputBuffer)
+{
+    if ((((size_t)state) & 3) != 0) return 1;   /* Error : pointer is not aligned on 4-bytes boundary */
+    LZ4_init((LZ4_stream_t_internal*)state, (const BYTE*)inputBuffer);
+    return 0;
+}
+
+void* LZ4_create (const char* inputBuffer)
+{
+    void* lz4ds = ALLOCATOR(8, LZ4_STREAMSIZE_U64);
+    LZ4_init ((LZ4_stream_t_internal*)lz4ds, (const BYTE*)inputBuffer);
+    return lz4ds;
+}
+
+char* LZ4_slideInputBuffer (void* LZ4_Data)
+{
+    LZ4_stream_t_internal* ctx = (LZ4_stream_t_internal*)LZ4_Data;
+    int dictSize = LZ4_saveDict((LZ4_stream_t*)ctx, (char*)ctx->bufferStart, 64 KB);
+    return (char*)(ctx->bufferStart + dictSize);
+}
+
+/*  Obsolete compresson functions using User-allocated state */
+
+int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
+
+int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize)
+{
+    if (((size_t)(state)&3) != 0) return 0;   /* Error : state is not aligned on 4-bytes boundary */
+    MEM_INIT(state, 0, LZ4_STREAMSIZE);
+
+    if (inputSize < LZ4_64Klimit)
+        return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue);
+    else
+        return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue);
+}
+
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    if (((size_t)(state)&3) != 0) return 0;   /* Error : state is not aligned on 4-bytes boundary */
+    MEM_INIT(state, 0, LZ4_STREAMSIZE);
+
+    if (inputSize < LZ4_64Klimit)
+        return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue);
+    else
+        return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue);
+}
+
+/* Obsolete streaming decompression functions */
+
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+}
+
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
+}
+
+#endif   /* LZ4_COMMONDEFS_ONLY */
+
diff --git a/lz4/lz4.h b/lz4/lz4.h
index 4897eb2..7778caa 100644
--- a/lz4/lz4.h
+++ b/lz4/lz4.h
@@ -1,128 +1,315 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Header File
-   Copyright (C) 2011-2012, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
-*/
-#pragma once
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-//**************************************
-// Compiler Options
-//**************************************
-#if defined(_MSC_VER) && !defined(__cplusplus)   // Visual Studio
-#  define inline __inline           // Visual is not C99, but supports some kind of inline
-#endif
-
-
-//****************************
-// Simple Functions
-//****************************
-
-int LZ4_compress   (const char* source, char* dest, int isize);
-int LZ4_uncompress (const char* source, char* dest, int osize);
-
-/*
-LZ4_compress() :
-    Compresses 'isize' bytes from 'source' into 'dest'.
-    Destination buffer must be already allocated,
-    and must be sized to handle worst cases situations (input data not compressible)
-    Worst case size evaluation is provided by function LZ4_compressBound()
-
-    isize  : is the input size. Max supported value is ~1.9GB
-    return : the number of bytes written in buffer dest
-
-
-LZ4_uncompress() :
-    osize  : is the output size, therefore the original size
-    return : the number of bytes read in the source buffer
-             If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
-             This function never writes outside of provided buffers, and never modifies input buffer.
-    note : destination buffer must be already allocated.
-           its size must be a minimum of 'osize' bytes.
-*/
-
-
-//****************************
-// Advanced Functions
-//****************************
-
-static inline int LZ4_compressBound(int isize)   { return ((isize) + ((isize)/255) + 16); }
-#define           LZ4_COMPRESSBOUND(    isize)            ((isize) + ((isize)/255) + 16)
-
-/*
-LZ4_compressBound() :
-    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
-    primarily useful for memory allocation of output buffer.
-	inline function is recommended for the general case,
-	but macro is also provided when results need to be evaluated at compile time (such as table size allocation).
-
-    isize  : is the input size. Max supported value is ~1.9GB
-    return : maximum output size in a "worst case" scenario
-    note : this function is limited by "int" range (2^31-1)
-*/
-
-
-int LZ4_compress_limitedOutput   (const char* source, char* dest, int isize, int maxOutputSize);
-
-/*
-LZ4_compress_limitedOutput() :
-    Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-    If it cannot achieve it, compression will stop, and result of the function will be zero.
-    This function never writes outside of provided output buffer.
-
-    isize  : is the input size. Max supported value is ~1.9GB
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes written in buffer 'dest'
-             or 0 if the compression fails
-*/
-
-
-int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
-
-/*
-LZ4_uncompress_unknownOutputSize() :
-    isize  : is the input size, therefore the compressed size
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-             If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
-             This function never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets
-    note   : Destination buffer must be already allocated.
-             This version is slightly slower than LZ4_uncompress()
-*/
-
-
-#if defined (__cplusplus)
-}
-#endif
+/*
+   LZ4 - Fast LZ compression algorithm
+   Header File
+   Copyright (C) 2011-2014, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * lz4.h provides raw compression format functions, for optimal performance and integration into programs.
+ * If you need to generate data using an inter-operable format (respecting the framing specification),
+ * please use lz4frame.h instead.
+*/
+
+/**************************************
+   Version
+**************************************/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    5    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+int LZ4_versionNumber (void);
+
+/**************************************
+   Tuning parameter
+**************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 14
+
+
+/**************************************
+   Simple Functions
+**************************************/
+
+int LZ4_compress        (const char* source, char* dest, int sourceSize);
+int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+
+/*
+LZ4_compress() :
+    Compresses 'sourceSize' bytes from 'source' into 'dest'.
+    Destination buffer must be already allocated,
+    and must be sized to handle worst cases situations (input data not compressible)
+    Worst case size evaluation is provided by function LZ4_compressBound()
+    inputSize : Max supported value is LZ4_MAX_INPUT_SIZE
+    return : the number of bytes written in buffer dest
+             or 0 if the compression fails
+
+LZ4_decompress_safe() :
+    compressedSize : is obviously the source size
+    maxDecompressedSize : is the size of the destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into the destination buffer (necessarily <= maxDecompressedSize)
+             If the destination buffer is not large enough, decoding will stop and output an error code (<0).
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function is protected against buffer overflow exploits,
+             and never writes outside of output buffer, nor reads outside of input buffer.
+             It is also protected against malicious data packets.
+*/
+
+
+/**************************************
+   Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*
+LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (output buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+
+    isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
+    return : maximum output size in a "worst case" scenario
+             or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+*/
+int LZ4_compressBound(int isize);
+
+
+/*
+LZ4_compress_limitedOutput() :
+    Compress 'sourceSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    If it cannot achieve it, compression will stop, and result of the function will be zero.
+    This saves time and memory on detecting non-compressible (or barely compressible) data.
+    This function never writes outside of provided output buffer.
+
+    sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes written in buffer 'dest'
+             or 0 if compression fails
+*/
+int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+
+
+/*
+LZ4_compress_withState() :
+    Same compression functions, but using an externally allocated memory space to store compression state.
+    Use LZ4_sizeofState() to know how much memory must be allocated,
+    and then, provide it as 'void* state' to compression functions.
+*/
+int LZ4_sizeofState(void);
+int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+
+/*
+LZ4_decompress_fast() :
+    originalSize : is the original and therefore uncompressed size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
+    note : This function fully respect memory boundaries for properly formed compressed data.
+           It is a bit faster than LZ4_decompress_safe().
+           However, it does not provide any protection against intentionally modified data stream (malicious input).
+           Use this function in trusted environment only (data to decode comes from a trusted source).
+*/
+int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
+
+
+/*
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'compressedSize' at position 'source'
+    into destination buffer 'dest' of size 'maxDecompressedSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
+
+/***********************************************
+   Streaming Compression Functions
+***********************************************/
+
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
+/*
+ * LZ4_stream_t
+ * information structure to track an LZ4 stream.
+ * important : init this structure content before first use !
+ * note : only allocated directly the structure if you are statically linking LZ4
+ *        If you are using liblz4 as a DLL, please use below construction methods instead.
+ */
+typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
+
+/*
+ * LZ4_resetStream
+ * Use this function to init an allocated LZ4_stream_t structure
+ */
+void LZ4_resetStream (LZ4_stream_t* LZ4_streamPtr);
+
+/*
+ * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
+ * LZ4_freeStream releases its memory.
+ * In the context of a DLL (liblz4), please use these methods rather than the static struct.
+ * They are more future proof, in case of a change of LZ4_stream_t size.
+ */
+LZ4_stream_t* LZ4_createStream(void);
+int           LZ4_freeStream (LZ4_stream_t* LZ4_streamPtr);
+
+/*
+ * LZ4_loadDict
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ * Loading a size of 0 is allowed.
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict (LZ4_stream_t* LZ4_streamPtr, const char* dictionary, int dictSize);
+
+/*
+ * LZ4_compress_continue
+ * Compress data block 'source', using blocks compressed before as dictionary to improve compression ratio
+ * Previous data blocks are assumed to still be present at their previous location.
+ */
+int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+
+/*
+ * LZ4_compress_limitedOutput_continue
+ * Same as before, but also specify a maximum target compressed size (maxOutputSize)
+ * If objective cannot be met, compression exits, and returns a zero.
+ */
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*
+ * LZ4_saveDict
+ * If previously compressed data block is not guaranteed to remain available at its memory location
+ * save it into a safer place (char* safeBuffer)
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ *        dictionary is immediately usable, you can therefore call again LZ4_compress_continue()
+ * Return : dictionary size in bytes, or 0 if error
+ * Note : any dictSize > 64 KB will be interpreted as 64KB.
+ */
+int LZ4_saveDict (LZ4_stream_t* LZ4_streamPtr, char* safeBuffer, int dictSize);
+
+
+/************************************************
+   Streaming Decompression Functions
+************************************************/
+
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
+/*
+ * LZ4_streamDecode_t
+ * information structure to track an LZ4 stream.
+ * init this structure content using LZ4_setStreamDecode or memset() before first use !
+ *
+ * In the context of a DLL (liblz4) please prefer usage of construction methods below.
+ * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
+ * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
+ * LZ4_freeStreamDecode releases its memory.
+ */
+LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*
+ * LZ4_setStreamDecode
+ * Use this function to instruct where to find the dictionary.
+ * Setting a size of 0 is allowed (same effect as reset).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
+    If this condition is not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where is its new address using LZ4_setStreamDecode()
+*/
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as
+    a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue()
+    They are stand-alone and don't use nor update an LZ4_streamDecode_t structure.
+*/
+int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
+int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+
+
+
+/**************************************
+   Obsolete Functions
+**************************************/
+/*
+Obsolete decompression functions
+These function names are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is the same as LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
+These function prototypes are now disabled; uncomment them if you really need them.
+It is highly recommended to stop using these functions and migrate to newer ones */
+/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
+/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
+
+
+/* Obsolete streaming functions; use new streaming interface whenever possible */
+void* LZ4_create (const char* inputBuffer);
+int   LZ4_sizeofStreamState(void);
+int   LZ4_resetStreamState(void* state, const char* inputBuffer);
+char* LZ4_slideInputBuffer (void* state);
+
+/* Obsolete streaming decoding functions */
+int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int compressedSize, int maxOutputSize);
+int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int originalSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lz4/lz4_offset_hack.c b/lz4/lz4_offset_hack.c
deleted file mode 100644
index c7bab96..0000000
--- a/lz4/lz4_offset_hack.c
+++ /dev/null
@@ -1,5 +0,0 @@
-#include "lz4.h"
-
-int LZ4_uncompress_offset (const char* source, char* dest, int osize, int offset) {
-    return LZ4_uncompress(source + offset, dest, osize);
-}
\ No newline at end of file
diff --git a/lz4/lz4frame.c b/lz4/lz4frame.c
new file mode 100644
index 0000000..5183f22
--- /dev/null
+++ b/lz4/lz4frame.c
@@ -0,0 +1,1330 @@
+/*
+LZ4 auto-framing library
+Copyright (C) 2011-2014, Yann Collet.
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- LZ4 source repository : http://code.google.com/p/lz4/
+- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API to create LZ4-compressed Frames
+* fully conformant to specification v1.4.1.
+* All related operations, including memory management, are handled by the library.
+* */
+
+
+/**************************************
+Compiler Options
+**************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#ifdef __GNUC__
+#  pragma GCC diagnostic ignored "-Wmissing-braces"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
+#  pragma GCC diagnostic ignored "-Wmissing-field-initializers"   /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */
+#endif
+
+
+/**************************************
+Memory routines
+**************************************/
+#include <stdlib.h>   /* malloc, calloc, free */
+#define ALLOCATOR(s)   calloc(1,s)
+#define FREEMEM        free
+#include <string.h>   /* memset, memcpy, memmove */
+#define MEM_INIT       memset
+
+
+/**************************************
+Includes
+**************************************/
+#include "lz4frame_static.h"
+#include "lz4.h"
+#include "lz4hc.h"
+#include "xxhash.h"
+
+
+/**************************************
+Basic Types
+**************************************/
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
+# include <stdint.h>
+typedef  uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char       BYTE;
+typedef unsigned short      U16;
+typedef unsigned int        U32;
+typedef   signed int        S32;
+typedef unsigned long long  U64;
+#endif
+
+
+/**************************************
+Constants
+**************************************/
+#define KB *(1<<10)
+#define MB *(1<<20)
+#define GB *(1<<30)
+
+#define _1BIT  0x01
+#define _2BITS 0x03
+#define _3BITS 0x07
+#define _4BITS 0x0F
+#define _8BITS 0xFF
+
+#define LZ4F_MAGICNUMBER 0x184D2204U
+#define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U
+#define LZ4F_MAXHEADERFRAME_SIZE 7
+#define LZ4F_BLOCKSIZEID_DEFAULT max64KB
+
+static const U32 minHClevel = 3;
+
+/**************************************
+Structures and local types
+**************************************/
+typedef struct
+{
+    LZ4F_preferences_t prefs;
+    U32 version;
+    U32 cStage;
+    size_t maxBlockSize;
+    size_t maxBufferSize;
+    BYTE*  tmpBuff;
+    BYTE*  tmpIn;
+    size_t tmpInSize;
+    XXH32_state_t xxh;
+    void* lz4CtxPtr;
+    U32 lz4CtxLevel;     /* 0: unallocated;  1: LZ4_stream_t;  3: LZ4_streamHC_t */
+} LZ4F_cctx_internal_t;
+
+typedef struct
+{
+    LZ4F_frameInfo_t frameInfo;
+    unsigned version;
+    unsigned dStage;
+    size_t maxBlockSize;
+    size_t maxBufferSize;
+    const BYTE* srcExpect;
+    BYTE*  tmpIn;
+    size_t tmpInSize;
+    size_t tmpInTarget;
+    BYTE*  tmpOutBuffer;
+    BYTE*  dict;
+    size_t dictSize;
+    BYTE*  tmpOut;
+    size_t tmpOutSize;
+    size_t tmpOutStart;
+    XXH32_state_t xxh;
+    BYTE   header[8];
+} LZ4F_dctx_internal_t;
+
+
+/**************************************
+Macros
+**************************************/
+
+
+/**************************************
+Error management
+**************************************/
+#define LZ4F_GENERATE_STRING(STRING) #STRING,
+static const char* LZ4F_errorStrings[] = { LZ4F_LIST_ERRORS(LZ4F_GENERATE_STRING) };
+
+
+U32 LZ4F_isError(LZ4F_errorCode_t code)
+{
+    return (code > (LZ4F_errorCode_t)(-ERROR_maxCode));
+}
+
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code)
+{
+    static const char* codeError = "Unspecified error code";
+    if (LZ4F_isError(code)) return LZ4F_errorStrings[-(int)(code)];
+    return codeError;
+}
+
+
+/**************************************
+Private functions
+**************************************/
+static size_t LZ4F_getBlockSize(unsigned blockSizeID)
+{
+    static const size_t blockSizes[4] = { 64 KB, 256 KB, 1 MB, 4 MB };
+
+    if (blockSizeID == 0) blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    blockSizeID -= 4;
+    if (blockSizeID > 3) return (size_t)-ERROR_maxBlockSize_invalid;
+    return blockSizes[blockSizeID];
+}
+
+
+/* unoptimized version; solves endianess & alignment issues */
+static void LZ4F_writeLE32 (BYTE* dstPtr, U32 value32)
+{
+    dstPtr[0] = (BYTE)value32;
+    dstPtr[1] = (BYTE)(value32 >> 8);
+    dstPtr[2] = (BYTE)(value32 >> 16);
+    dstPtr[3] = (BYTE)(value32 >> 24);
+}
+
+static U32 LZ4F_readLE32 (const BYTE* srcPtr)
+{
+    U32 value32 = srcPtr[0];
+    value32 += (srcPtr[1]<<8);
+    value32 += (srcPtr[2]<<16);
+    value32 += (srcPtr[3]<<24);
+    return value32;
+}
+
+
+static BYTE LZ4F_headerChecksum (const BYTE* header, size_t length)
+{
+    U32 xxh = XXH32(header, (U32)length, 0);
+    return (BYTE)(xxh >> 8);
+}
+
+
+/**************************************
+Simple compression functions
+**************************************/
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefs = { 0 };
+    size_t headerSize;
+    size_t streamSize;
+
+    if (preferencesPtr!=NULL) prefs = *preferencesPtr;
+    {
+        blockSizeID_t proposedBSID = max64KB;
+        size_t maxBlockSize = 64 KB;
+        while (prefs.frameInfo.blockSizeID > proposedBSID)
+        {
+            if (srcSize <= maxBlockSize)
+            {
+                prefs.frameInfo.blockSizeID = proposedBSID;
+                break;
+            }
+            proposedBSID++;
+            maxBlockSize <<= 2;
+        }
+    }
+    prefs.autoFlush = 1;
+
+    headerSize = 7;      /* basic header size (no option) including magic number */
+    streamSize = LZ4F_compressBound(srcSize, &prefs);
+
+    return headerSize + streamSize;
+}
+
+
+/* LZ4F_compressFrame()
+* Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.4.1, in a single step.
+* The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+* You can get the minimum value of dstMaxSize by using LZ4F_compressFrameBound()
+* If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode)
+* The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+* The result of the function is the number of bytes written into dstBuffer.
+* The function outputs an error code if it fails (can be tested using LZ4F_isError())
+*/
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_cctx_internal_t cctxI = { 0 };   /* works because no allocation */
+    LZ4F_preferences_t prefs = { 0 };
+    LZ4F_compressOptions_t options = { 0 };
+    LZ4F_errorCode_t errorCode;
+    BYTE* const dstStart = (BYTE*) dstBuffer;
+    BYTE* dstPtr = dstStart;
+    BYTE* const dstEnd = dstStart + dstMaxSize;
+
+
+    cctxI.version = LZ4F_VERSION;
+    cctxI.maxBufferSize = 5 MB;   /* mess with real buffer size to prevent allocation; works because autoflush==1 & stableSrc==1 */
+
+    if (preferencesPtr!=NULL) prefs = *preferencesPtr;
+    {
+        blockSizeID_t proposedBSID = max64KB;
+        size_t maxBlockSize = 64 KB;
+        while (prefs.frameInfo.blockSizeID > proposedBSID)
+        {
+            if (srcSize <= maxBlockSize)
+            {
+                prefs.frameInfo.blockSizeID = proposedBSID;
+                break;
+            }
+            proposedBSID++;
+            maxBlockSize <<= 2;
+        }
+    }
+    prefs.autoFlush = 1;
+    if (srcSize <= LZ4F_getBlockSize(prefs.frameInfo.blockSizeID))
+        prefs.frameInfo.blockMode = blockIndependent;   /* no need for linked blocks */
+
+    options.stableSrc = 1;
+
+    if (dstMaxSize < LZ4F_compressFrameBound(srcSize, &prefs))
+        return (size_t)-ERROR_dstMaxSize_tooSmall;
+
+    errorCode = LZ4F_compressBegin(&cctxI, dstBuffer, dstMaxSize, &prefs);  /* write header */
+    if (LZ4F_isError(errorCode)) return errorCode;
+    dstPtr += errorCode;   /* header size */
+
+    dstMaxSize -= errorCode;
+    errorCode = LZ4F_compressUpdate(&cctxI, dstPtr, dstMaxSize, srcBuffer, srcSize, &options);
+    if (LZ4F_isError(errorCode)) return errorCode;
+    dstPtr += errorCode;
+
+    errorCode = LZ4F_compressEnd(&cctxI, dstPtr, dstEnd-dstPtr, &options);   /* flush last block, and generate suffix */
+    if (LZ4F_isError(errorCode)) return errorCode;
+    dstPtr += errorCode;
+
+    FREEMEM(cctxI.lz4CtxPtr);
+
+    return (dstPtr - dstStart);
+}
+
+
+/***********************************
+* Advanced compression functions
+* *********************************/
+
+/* LZ4F_createCompressionContext() :
+* The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+* This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+* The version provided MUST be LZ4F_VERSION. It is intended to track potential version differences between different binaries.
+* The function will provide a pointer to an allocated LZ4F_compressionContext_t object.
+* If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
+* Object can release its memory using LZ4F_freeCompressionContext();
+*/
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version)
+{
+    LZ4F_cctx_internal_t* cctxPtr;
+
+    cctxPtr = (LZ4F_cctx_internal_t*)ALLOCATOR(sizeof(LZ4F_cctx_internal_t));
+    if (cctxPtr==NULL) return (LZ4F_errorCode_t)(-ERROR_allocation_failed);
+
+    cctxPtr->version = version;
+    cctxPtr->cStage = 0;   /* Next stage : write header */
+
+    *LZ4F_compressionContextPtr = (LZ4F_compressionContext_t)cctxPtr;
+
+    return OK_NoError;
+}
+
+
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_compressionContext)
+{
+    LZ4F_cctx_internal_t* cctxPtr = (LZ4F_cctx_internal_t*)LZ4F_compressionContext;
+
+    FREEMEM(cctxPtr->lz4CtxPtr);
+    FREEMEM(cctxPtr->tmpBuff);
+    FREEMEM(LZ4F_compressionContext);
+
+    return OK_NoError;
+}
+
+
+/* LZ4F_compressBegin() :
+* will write the frame header into dstBuffer.
+* dstBuffer must be large enough to accommodate a header (dstMaxSize). Maximum header size is LZ4F_MAXHEADERFRAME_SIZE bytes.
+* The result of the function is the number of bytes written into dstBuffer for the header
+* or an error code (can be tested using LZ4F_isError())
+*/
+size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    LZ4F_preferences_t prefNull = { 0 };
+    LZ4F_cctx_internal_t* cctxPtr = (LZ4F_cctx_internal_t*)compressionContext;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    BYTE* headerStart;
+    size_t requiredBuffSize;
+
+    if (dstMaxSize < LZ4F_MAXHEADERFRAME_SIZE) return (size_t)-ERROR_dstMaxSize_tooSmall;
+    if (cctxPtr->cStage != 0) return (size_t)-ERROR_GENERIC;
+    if (preferencesPtr == NULL) preferencesPtr = &prefNull;
+    cctxPtr->prefs = *preferencesPtr;
+
+    /* ctx Management */
+    {
+        U32 targetCtxLevel = cctxPtr->prefs.compressionLevel<minHClevel ? 1 : 2;
+        if (cctxPtr->lz4CtxLevel < targetCtxLevel)
+        {
+            FREEMEM(cctxPtr->lz4CtxPtr);
+            if (cctxPtr->prefs.compressionLevel<minHClevel)
+                cctxPtr->lz4CtxPtr = (void*)LZ4_createStream();
+            else
+                cctxPtr->lz4CtxPtr = (void*)LZ4_createStreamHC();
+            cctxPtr->lz4CtxLevel = targetCtxLevel;
+        }
+    }
+
+    /* Buffer Management */
+    if (cctxPtr->prefs.frameInfo.blockSizeID == 0) cctxPtr->prefs.frameInfo.blockSizeID = LZ4F_BLOCKSIZEID_DEFAULT;
+    cctxPtr->maxBlockSize = LZ4F_getBlockSize(cctxPtr->prefs.frameInfo.blockSizeID);
+
+    requiredBuffSize = cctxPtr->maxBlockSize + ((cctxPtr->prefs.frameInfo.blockMode == blockLinked) * 128 KB);
+    if (preferencesPtr->autoFlush)
+        requiredBuffSize = (cctxPtr->prefs.frameInfo.blockMode == blockLinked) * 64 KB;   /* just needs dict */
+
+    if (cctxPtr->maxBufferSize < requiredBuffSize)
+    {
+        cctxPtr->maxBufferSize = requiredBuffSize;
+        FREEMEM(cctxPtr->tmpBuff);
+        cctxPtr->tmpBuff = (BYTE*)ALLOCATOR(requiredBuffSize);
+        if (cctxPtr->tmpBuff == NULL) return (size_t)-ERROR_allocation_failed;
+    }
+    cctxPtr->tmpIn = cctxPtr->tmpBuff;
+    cctxPtr->tmpInSize = 0;
+    XXH32_reset(&(cctxPtr->xxh), 0);
+    if (cctxPtr->prefs.compressionLevel<minHClevel)
+        LZ4_resetStream((LZ4_stream_t*)(cctxPtr->lz4CtxPtr));
+    else
+        LZ4_resetStreamHC((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), cctxPtr->prefs.compressionLevel);
+
+    /* Magic Number */
+    LZ4F_writeLE32(dstPtr, LZ4F_MAGICNUMBER);
+    dstPtr += 4;
+    headerStart = dstPtr;
+
+    /* FLG Byte */
+    *dstPtr++ = ((1 & _2BITS) << 6)    /* Version('01') */
+        + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)    /* Block mode */
+        + (char)((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2);   /* Stream checksum */
+    /* BD Byte */
+    *dstPtr++ = (char)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4);
+    /* CRC Byte */
+    *dstPtr++ = LZ4F_headerChecksum(headerStart, 2);
+
+    cctxPtr->cStage = 1;   /* header written, wait for data block */
+
+    return (dstPtr - dstStart);
+}
+
+
+/* LZ4F_compressBound() : gives the size of Dst buffer given a srcSize to handle worst case situations.
+*                        The LZ4F_frameInfo_t structure is optional :
+*                        you can provide NULL as argument, all preferences will then be set to default.
+* */
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr)
+{
+    const LZ4F_preferences_t prefsNull = { 0 };
+    const LZ4F_preferences_t* prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
+    blockSizeID_t bid = prefsPtr->frameInfo.blockSizeID;
+    size_t blockSize = LZ4F_getBlockSize(bid);
+    unsigned nbBlocks = (unsigned)(srcSize / blockSize) + 1;
+    size_t lastBlockSize = prefsPtr->autoFlush ? srcSize % blockSize : blockSize;
+    size_t blockInfo = 4;   /* default, without block CRC option */
+    size_t frameEnd = 4 + (prefsPtr->frameInfo.contentChecksumFlag*4);
+    size_t result = (blockInfo * nbBlocks) + (blockSize * (nbBlocks-1)) + lastBlockSize + frameEnd;
+
+    return result;
+}
+
+
+typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level);
+
+static size_t LZ4F_compressBlock(void* dst, const void* src, size_t srcSize, compressFunc_t compress, void* lz4ctx, int level)
+{
+    /* compress one block */
+    BYTE* cSizePtr = (BYTE*)dst;
+    U32 cSize;
+    cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+4), (int)(srcSize), (int)(srcSize-1), level);
+    LZ4F_writeLE32(cSizePtr, cSize);
+    if (cSize == 0)   /* compression failed */
+    {
+        cSize = (U32)srcSize;
+        LZ4F_writeLE32(cSizePtr, cSize + LZ4F_BLOCKUNCOMPRESSED_FLAG);
+        memcpy(cSizePtr+4, src, srcSize);
+    }
+    return cSize + 4;
+}
+
+
+static int LZ4F_localLZ4_compress_limitedOutput_withState(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level)
+{
+    (void) level;
+    return LZ4_compress_limitedOutput_withState(ctx, src, dst, srcSize, dstSize);
+}
+
+static int LZ4F_localLZ4_compress_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level)
+{
+    (void) level;
+    return LZ4_compress_limitedOutput_continue((LZ4_stream_t*)ctx, src, dst, srcSize, dstSize);
+}
+
+static int LZ4F_localLZ4_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level)
+{
+    (void) level;
+    return LZ4_compressHC_limitedOutput_continue((LZ4_streamHC_t*)ctx, src, dst, srcSize, dstSize);
+}
+
+static compressFunc_t LZ4F_selectCompression(blockMode_t blockMode, U32 level)
+{
+    if (level < minHClevel)
+    {
+        if (blockMode == blockIndependent) return LZ4F_localLZ4_compress_limitedOutput_withState;
+        return LZ4F_localLZ4_compress_limitedOutput_continue;
+    }
+    if (blockMode == blockIndependent) return LZ4_compressHC2_limitedOutput_withStateHC;
+    return LZ4F_localLZ4_compressHC_limitedOutput_continue;
+}
+
+static int LZ4F_localSaveDict(LZ4F_cctx_internal_t* cctxPtr)
+{
+    if (cctxPtr->prefs.compressionLevel < minHClevel)
+        return LZ4_saveDict ((LZ4_stream_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+    return LZ4_saveDictHC ((LZ4_streamHC_t*)(cctxPtr->lz4CtxPtr), (char*)(cctxPtr->tmpBuff), 64 KB);
+}
+
+typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus;
+
+/* LZ4F_compressUpdate()
+* LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+* The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+* If this condition is not respected, LZ4F_compress() will fail (result is an errorCode)
+* You can get the minimum value of dstMaxSize by using LZ4F_compressBound()
+* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+* The result of the function is the number of bytes written into dstBuffer : it can be zero, meaning input data was just buffered.
+* The function outputs an error code if it fails (can be tested using LZ4F_isError())
+*/
+size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    LZ4F_compressOptions_t cOptionsNull = { 0 };
+    LZ4F_cctx_internal_t* cctxPtr = (LZ4F_cctx_internal_t*)compressionContext;
+    size_t blockSize = cctxPtr->maxBlockSize;
+    const BYTE* srcPtr = (const BYTE*)srcBuffer;
+    const BYTE* const srcEnd = srcPtr + srcSize;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    LZ4F_lastBlockStatus lastBlockCompressed = notDone;
+    compressFunc_t compress;
+
+
+    if (cctxPtr->cStage != 1) return (size_t)-ERROR_GENERIC;
+    if (dstMaxSize < LZ4F_compressBound(srcSize, &(cctxPtr->prefs))) return (size_t)-ERROR_dstMaxSize_tooSmall;
+    if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
+
+    /* select compression function */
+    compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+    /* complete tmp buffer */
+    if (cctxPtr->tmpInSize > 0)   /* some data already within tmp buffer */
+    {
+        size_t sizeToCopy = blockSize - cctxPtr->tmpInSize;
+        if (sizeToCopy > srcSize)
+        {
+            /* add src to tmpIn buffer */
+            memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, srcSize);
+            srcPtr = srcEnd;
+            cctxPtr->tmpInSize += srcSize;
+            /* still needs some CRC */
+        }
+        else
+        {
+            /* complete tmpIn block and then compress it */
+            lastBlockCompressed = fromTmpBuffer;
+            memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
+            srcPtr += sizeToCopy;
+
+            dstPtr += LZ4F_compressBlock(dstPtr, cctxPtr->tmpIn, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+
+            if (cctxPtr->prefs.frameInfo.blockMode==blockLinked) cctxPtr->tmpIn += blockSize;
+            cctxPtr->tmpInSize = 0;
+        }
+    }
+
+    while ((size_t)(srcEnd - srcPtr) >= blockSize)
+    {
+        /* compress full block */
+        lastBlockCompressed = fromSrcBuffer;
+        dstPtr += LZ4F_compressBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+        srcPtr += blockSize;
+    }
+
+    if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd))
+    {
+        /* compress remaining input < blockSize */
+        lastBlockCompressed = fromSrcBuffer;
+        dstPtr += LZ4F_compressBlock(dstPtr, srcPtr, srcEnd - srcPtr, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+        srcPtr  = srcEnd;
+    }
+
+    /* preserve dictionary if necessary */
+    if ((cctxPtr->prefs.frameInfo.blockMode==blockLinked) && (lastBlockCompressed==fromSrcBuffer))
+    {
+        if (compressOptionsPtr->stableSrc)
+        {
+            cctxPtr->tmpIn = cctxPtr->tmpBuff;
+        }
+        else
+        {
+            int realDictSize = LZ4F_localSaveDict(cctxPtr);
+            if (realDictSize==0) return (size_t)-ERROR_GENERIC;
+            cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
+        }
+    }
+
+    /* keep tmpIn within limits */
+    if ((cctxPtr->tmpIn + blockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)   /* necessarily blockLinked && lastBlockCompressed==fromTmpBuffer */
+        && !(cctxPtr->prefs.autoFlush))
+    {
+        LZ4F_localSaveDict(cctxPtr);
+        cctxPtr->tmpIn = cctxPtr->tmpBuff + 64 KB;
+    }
+
+    /* some input data left, necessarily < blockSize */
+    if (srcPtr < srcEnd)
+    {
+        /* fill tmp buffer */
+        size_t sizeToCopy = srcEnd - srcPtr;
+        memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy);
+        cctxPtr->tmpInSize = sizeToCopy;
+    }
+
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == contentChecksumEnabled)
+        XXH32_update(&(cctxPtr->xxh), srcBuffer, (unsigned)srcSize);
+
+    return dstPtr - dstStart;
+}
+
+
+/* LZ4F_flush()
+* Should you need to create compressed data immediately, without waiting for a block to be filled,
+* you can call LZ4_flush(), which will immediately compress any remaining data stored within compressionContext.
+* The result of the function is the number of bytes written into dstBuffer
+* (it can be zero, this means there was no data left within compressionContext)
+* The function outputs an error code if it fails (can be tested using LZ4F_isError())
+* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+*/
+size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    LZ4F_compressOptions_t cOptionsNull = { 0 };
+    LZ4F_cctx_internal_t* cctxPtr = (LZ4F_cctx_internal_t*)compressionContext;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    compressFunc_t compress;
+
+
+    if (cctxPtr->tmpInSize == 0) return 0;   /* nothing to flush */
+    if (cctxPtr->cStage != 1) return (size_t)-ERROR_GENERIC;
+    if (dstMaxSize < (cctxPtr->tmpInSize + 16)) return (size_t)-ERROR_dstMaxSize_tooSmall;
+    if (compressOptionsPtr == NULL) compressOptionsPtr = &cOptionsNull;
+    (void)compressOptionsPtr;   /* not yet useful */
+
+    /* select compression function */
+    compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
+
+    /* compress tmp buffer */
+    dstPtr += LZ4F_compressBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
+    if (cctxPtr->prefs.frameInfo.blockMode==blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize;
+    cctxPtr->tmpInSize = 0;
+
+    /* keep tmpIn within limits */
+    if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize))   /* necessarily blockLinked */
+    {
+        LZ4F_localSaveDict(cctxPtr);
+        cctxPtr->tmpIn = cctxPtr->tmpBuff + 64 KB;
+    }
+
+    return dstPtr - dstStart;
+}
+
+
+/* LZ4F_compressEnd()
+* When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+* It will flush whatever data remained within compressionContext (like LZ4_flush())
+* but also properly finalize the frame, with an endMark and a checksum.
+* The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+* The function outputs an error code if it fails (can be tested using LZ4F_isError())
+* The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+* compressionContext can then be used again, starting with LZ4F_compressBegin(). The preferences will remain the same.
+*/
+size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr)
+{
+    LZ4F_cctx_internal_t* cctxPtr = (LZ4F_cctx_internal_t*)compressionContext;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* dstPtr = dstStart;
+    size_t errorCode;
+
+    errorCode = LZ4F_flush(compressionContext, dstBuffer, dstMaxSize, compressOptionsPtr);
+    if (LZ4F_isError(errorCode)) return errorCode;
+    dstPtr += errorCode;
+
+    LZ4F_writeLE32(dstPtr, 0);
+    dstPtr+=4;   /* endMark */
+
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == contentChecksumEnabled)
+    {
+        U32 xxh = XXH32_digest(&(cctxPtr->xxh));
+        LZ4F_writeLE32(dstPtr, xxh);
+        dstPtr+=4;   /* content Checksum */
+    }
+
+    cctxPtr->cStage = 0;   /* state is now re-usable (with identical preferences) */
+
+    return dstPtr - dstStart;
+}
+
+
+/***********************************
+* Decompression functions
+* *********************************/
+
+/* Resource management */
+
+/* LZ4F_createDecompressionContext() :
+* The first thing to do is to create a decompressionContext object, which will be used in all decompression operations.
+* This is achieved using LZ4F_createDecompressionContext().
+* The function will provide a pointer to a fully allocated and initialized LZ4F_decompressionContext object.
+* If the result LZ4F_errorCode_t is not zero, there was an error during context creation.
+* Object can release its memory using LZ4F_freeDecompressionContext();
+*/
+LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_compressionContext_t* LZ4F_decompressionContextPtr, unsigned versionNumber)
+{
+    LZ4F_dctx_internal_t* dctxPtr;
+
+    dctxPtr = ALLOCATOR(sizeof(LZ4F_dctx_internal_t));
+    if (dctxPtr==NULL) return (LZ4F_errorCode_t)-ERROR_GENERIC;
+
+    dctxPtr->version = versionNumber;
+    *LZ4F_decompressionContextPtr = (LZ4F_compressionContext_t)dctxPtr;
+    return OK_NoError;
+}
+
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_compressionContext_t LZ4F_decompressionContext)
+{
+    LZ4F_dctx_internal_t* dctxPtr = (LZ4F_dctx_internal_t*)LZ4F_decompressionContext;
+    FREEMEM(dctxPtr->tmpIn);
+    FREEMEM(dctxPtr->tmpOutBuffer);
+    FREEMEM(dctxPtr);
+    return OK_NoError;
+}
+
+
+/* Decompression */
+
+static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const BYTE* srcPtr, size_t srcSize)
+{
+    BYTE FLG, BD, HC;
+    unsigned version, blockMode, blockChecksumFlag, contentSizeFlag, contentChecksumFlag, dictFlag, blockSizeID;
+    size_t bufferNeeded;
+
+    /* need to decode header to get frameInfo */
+    if (srcSize < 7) return (size_t)-ERROR_GENERIC;   /* minimal header size */
+
+    /* control magic number */
+    if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) return (size_t)-ERROR_GENERIC;
+    srcPtr += 4;
+
+    /* Flags */
+    FLG = srcPtr[0];
+    version = (FLG>>6)&_2BITS;
+    blockMode = (FLG>>5) & _1BIT;
+    blockChecksumFlag = (FLG>>4) & _1BIT;
+    contentSizeFlag = (FLG>>3) & _1BIT;
+    contentChecksumFlag = (FLG>>2) & _1BIT;
+    dictFlag = (FLG>>0) & _1BIT;
+    BD = srcPtr[1];
+    blockSizeID = (BD>>4) & _3BITS;
+
+    /* check */
+    HC = LZ4F_headerChecksum(srcPtr, 2);
+    if (HC != srcPtr[2]) return (size_t)-ERROR_GENERIC;   /* Bad header checksum error */
+
+    /* validate */
+    if (version != 1) return (size_t)-ERROR_GENERIC;   /* Version Number, only supported value */
+    if (blockChecksumFlag != 0) return (size_t)-ERROR_GENERIC;   /* Only supported value for the time being */
+    if (contentSizeFlag != 0) return (size_t)-ERROR_GENERIC;   /* Only supported value for the time being */
+    if (((FLG>>1)&_1BIT) != 0) return (size_t)-ERROR_GENERIC;   /* Reserved bit */
+    if (dictFlag != 0) return (size_t)-ERROR_GENERIC;   /* Only supported value for the time being */
+    if (((BD>>7)&_1BIT) != 0) return (size_t)-ERROR_GENERIC;   /* Reserved bit */
+    if (blockSizeID < 4) return (size_t)-ERROR_GENERIC;   /* Only supported values for the time being */
+    if (((BD>>0)&_4BITS) != 0) return (size_t)-ERROR_GENERIC;   /* Reserved bits */
+
+    /* save */
+    dctxPtr->frameInfo.blockMode = blockMode;
+    dctxPtr->frameInfo.contentChecksumFlag = contentChecksumFlag;
+    dctxPtr->frameInfo.blockSizeID = blockSizeID;
+    dctxPtr->maxBlockSize = LZ4F_getBlockSize(blockSizeID);
+
+    /* init */
+    if (contentChecksumFlag) XXH32_reset(&(dctxPtr->xxh), 0);
+
+    /* alloc */
+    bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==blockLinked) * 128 KB);
+    if (bufferNeeded > dctxPtr->maxBufferSize)   /* tmp buffers too small */
+    {
+        FREEMEM(dctxPtr->tmpIn);
+        FREEMEM(dctxPtr->tmpOutBuffer);
+        dctxPtr->maxBufferSize = bufferNeeded;
+        dctxPtr->tmpIn = ALLOCATOR(dctxPtr->maxBlockSize);
+        if (dctxPtr->tmpIn == NULL) return (size_t)-ERROR_GENERIC;
+        dctxPtr->tmpOutBuffer= ALLOCATOR(dctxPtr->maxBufferSize);
+        if (dctxPtr->tmpOutBuffer== NULL) return (size_t)-ERROR_GENERIC;
+    }
+    dctxPtr->tmpInSize = 0;
+    dctxPtr->tmpInTarget = 0;
+    dctxPtr->dict = dctxPtr->tmpOutBuffer;
+    dctxPtr->dictSize = 0;
+    dctxPtr->tmpOut = dctxPtr->tmpOutBuffer;
+    dctxPtr->tmpOutStart = 0;
+    dctxPtr->tmpOutSize = 0;
+
+    return 7;
+}
+
+
+typedef enum { dstage_getHeader=0, dstage_storeHeader, dstage_decodeHeader,
+    dstage_getCBlockSize, dstage_storeCBlockSize, dstage_decodeCBlockSize,
+    dstage_copyDirect,
+    dstage_getCBlock, dstage_storeCBlock, dstage_decodeCBlock,
+    dstage_decodeCBlock_intoDst, dstage_decodeCBlock_intoTmp, dstage_flushOut,
+    dstage_getSuffix, dstage_storeSuffix, dstage_checkSuffix
+} dStage_t;
+
+
+/* LZ4F_getFrameInfo()
+* This function decodes frame header information, such as blockSize.
+* It is optional : you could start by calling directly LZ4F_decompress() instead.
+* The objective is to extract header information without starting decompression, typically for allocation purposes.
+* LZ4F_getFrameInfo() can also be used *after* starting decompression, on a valid LZ4F_decompressionContext_t.
+* The number of bytes read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+* You are expected to resume decompression from where it stopped (srcBuffer + *srcSizePtr)
+* The function result is an hint of the better srcSize to use for next call to LZ4F_decompress,
+* or an error code which can be tested using LZ4F_isError().
+*/
+LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t decompressionContext, LZ4F_frameInfo_t* frameInfoPtr, const void* srcBuffer, size_t* srcSizePtr)
+{
+    LZ4F_dctx_internal_t* dctxPtr = (LZ4F_dctx_internal_t*)decompressionContext;
+
+    if (dctxPtr->dStage == dstage_getHeader)
+    {
+        LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, srcBuffer, *srcSizePtr);
+        if (LZ4F_isError(errorCode)) return errorCode;
+        *srcSizePtr = errorCode;
+        *frameInfoPtr = dctxPtr->frameInfo;
+        dctxPtr->srcExpect = NULL;
+        dctxPtr->dStage = dstage_getCBlockSize;
+        return 4;
+    }
+
+    /* frameInfo already decoded */
+    *srcSizePtr = 0;
+    *frameInfoPtr = dctxPtr->frameInfo;
+    return 0;
+}
+
+
+static int LZ4F_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize)
+{
+    (void)dictStart;
+    (void)dictSize;
+    return LZ4_decompress_safe (source, dest, compressedSize, maxDecompressedSize);
+}
+
+
+
+static void LZ4F_updateDict(LZ4F_dctx_internal_t* dctxPtr, const BYTE* dstPtr, size_t dstSize, const BYTE* dstPtr0, unsigned withinTmp)
+{
+    if (dctxPtr->dictSize==0)
+        dctxPtr->dict = (BYTE*)dstPtr;   /* priority to dictionary continuity */
+
+    if (dctxPtr->dict + dctxPtr->dictSize == dstPtr)   /* dictionary continuity */
+    {
+        dctxPtr->dictSize += dstSize;
+        return;
+    }
+
+    if (dstPtr - dstPtr0 + dstSize >= 64 KB)   /* dstBuffer large enough to become dictionary */
+    {
+        dctxPtr->dict = (BYTE*)dstPtr0;
+        dctxPtr->dictSize = dstPtr - dstPtr0 + dstSize;
+        return;
+    }
+
+    if ((withinTmp) && (dctxPtr->dict == dctxPtr->tmpOutBuffer))
+    {
+        /* assumption : dctxPtr->dict + dctxPtr->dictSize == dctxPtr->tmpOut + dctxPtr->tmpOutStart */
+        dctxPtr->dictSize += dstSize;
+        return;
+    }
+
+    if (withinTmp) /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
+    {
+#if 0
+        size_t savedDictSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
+        memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart- savedDictSize, savedDictSize);
+        dctxPtr->dict = dctxPtr->tmpOutBuffer;
+        dctxPtr->dictSize = savedDictSize + dctxPtr->tmpOutStart + dstSize;
+        return;
+
+#else
+
+        size_t preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
+        size_t copySize = 64 KB - dctxPtr->tmpOutSize;
+        BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart;
+        if (dctxPtr->tmpOutSize > 64 KB) copySize = 0;
+        if (copySize > preserveSize) copySize = preserveSize;
+
+        memcpy(dctxPtr->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+        dctxPtr->dict = dctxPtr->tmpOutBuffer;
+        dctxPtr->dictSize = preserveSize + dctxPtr->tmpOutStart + dstSize;
+        return;
+#endif
+    }
+
+    if (dctxPtr->dict == dctxPtr->tmpOutBuffer)     /* copy dst into tmp to complete dict */
+    {
+        if (dctxPtr->dictSize + dstSize > dctxPtr->maxBufferSize)   /* tmp buffer not large enough */
+        {
+            size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+            memcpy(dctxPtr->dict, dctxPtr->dict + dctxPtr->dictSize - preserveSize, preserveSize);
+            dctxPtr->dictSize = preserveSize;
+        }
+        memcpy(dctxPtr->dict + dctxPtr->dictSize, dstPtr, dstSize);
+        dctxPtr->dictSize += dstSize;
+        return;
+    }
+
+    /* join dict & dest into tmp */
+    {
+        size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+        if (preserveSize > dctxPtr->dictSize) preserveSize = dctxPtr->dictSize;
+        memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveSize, preserveSize);
+        memcpy(dctxPtr->tmpOutBuffer + preserveSize, dstPtr, dstSize);
+        dctxPtr->dict = dctxPtr->tmpOutBuffer;
+        dctxPtr->dictSize = preserveSize + dstSize;
+    }
+}
+
+
+
+/* LZ4F_decompress()
+* Call this function repetitively to regenerate data compressed within srcBuffer.
+* The function will attempt to decode *srcSizePtr from srcBuffer, into dstBuffer of maximum size *dstSizePtr.
+*
+* The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+*
+* The number of bytes effectively read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+* If the number of bytes read is < number of bytes provided, then the decompression operation is not complete.
+* You will have to call it again, continuing from where it stopped.
+*
+* The function result is an hint of the better srcSize to use for next call to LZ4F_decompress.
+* Basically, it's the size of the current (or remaining) compressed block + header of next block.
+* Respecting the hint provides some boost to performance, since it allows less buffer shuffling.
+* Note that this is just a hint, you can always provide any srcSize you want.
+* When a frame is fully decoded, the function result will be 0.
+* If decompression failed, function result is an error code which can be tested using LZ4F_isError().
+*/
+size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const LZ4F_decompressOptions_t* decompressOptionsPtr)
+{
+    LZ4F_dctx_internal_t* dctxPtr = (LZ4F_dctx_internal_t*)decompressionContext;
+    static const LZ4F_decompressOptions_t optionsNull = { 0 };
+    const BYTE* const srcStart = (const BYTE*)srcBuffer;
+    const BYTE* const srcEnd = srcStart + *srcSizePtr;
+    const BYTE* srcPtr = srcStart;
+    BYTE* const dstStart = (BYTE*)dstBuffer;
+    BYTE* const dstEnd = dstStart + *dstSizePtr;
+    BYTE* dstPtr = dstStart;
+    const BYTE* selectedIn=NULL;
+    unsigned doAnotherStage = 1;
+    size_t nextSrcSizeHint = 1;
+
+
+    if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull;
+    *srcSizePtr = 0;
+    *dstSizePtr = 0;
+
+    /* expect to continue decoding src buffer where it left previously */
+    if (dctxPtr->srcExpect != NULL)
+    {
+        if (srcStart != dctxPtr->srcExpect) return (size_t)-ERROR_GENERIC;
+    }
+
+    /* programmed as a state machine */
+
+    while (doAnotherStage)
+    {
+
+        switch(dctxPtr->dStage)
+        {
+
+        case dstage_getHeader:
+            {
+                if (srcEnd-srcPtr >= 7)
+                {
+                    selectedIn = srcPtr;
+                    srcPtr += 7;
+                    dctxPtr->dStage = dstage_decodeHeader;
+                    break;
+                }
+                dctxPtr->tmpInSize = 0;
+                dctxPtr->dStage = dstage_storeHeader;
+                break;
+            }
+
+        case dstage_storeHeader:
+            {
+                size_t sizeToCopy = 7 - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy =  srcEnd - srcPtr;
+                memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                dctxPtr->tmpInSize += sizeToCopy;
+                srcPtr += sizeToCopy;
+                if (dctxPtr->tmpInSize < 7)
+                {
+                    nextSrcSizeHint = (7 - dctxPtr->tmpInSize) + 4;
+                    doAnotherStage = 0;   /* no enough src, wait to get some more */
+                    break;
+                }
+                selectedIn = dctxPtr->header;
+                dctxPtr->dStage = dstage_decodeHeader;
+                break;
+            }
+
+        case dstage_decodeHeader:
+            {
+                LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, selectedIn, 7);
+                if (LZ4F_isError(errorCode)) return errorCode;
+                dctxPtr->dStage = dstage_getCBlockSize;
+                break;
+            }
+
+        case dstage_getCBlockSize:
+            {
+                if ((srcEnd - srcPtr) >= 4)
+                {
+                    selectedIn = srcPtr;
+                    srcPtr += 4;
+                    dctxPtr->dStage = dstage_decodeCBlockSize;
+                    break;
+                }
+                /* not enough input to read cBlockSize field */
+                dctxPtr->tmpInSize = 0;
+                dctxPtr->dStage = dstage_storeCBlockSize;
+                break;
+            }
+
+        case dstage_storeCBlockSize:
+            {
+                size_t sizeToCopy = 4 - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
+                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctxPtr->tmpInSize += sizeToCopy;
+                if (dctxPtr->tmpInSize < 4) /* not enough input to get full cBlockSize; wait for more */
+                {
+                    nextSrcSizeHint = 4 - dctxPtr->tmpInSize;
+                    doAnotherStage=0;
+                    break;
+                }
+                selectedIn = dctxPtr->tmpIn;
+                dctxPtr->dStage = dstage_decodeCBlockSize;
+                break;
+            }
+
+        case dstage_decodeCBlockSize:
+            {
+                size_t nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU;
+                if (nextCBlockSize==0)   /* frameEnd signal, no more CBlock */
+                {
+                    dctxPtr->dStage = dstage_getSuffix;
+                    break;
+                }
+                if (nextCBlockSize > dctxPtr->maxBlockSize) return (size_t)-ERROR_GENERIC;   /* invalid cBlockSize */
+                dctxPtr->tmpInTarget = nextCBlockSize;
+                if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG)
+                {
+                    dctxPtr->dStage = dstage_copyDirect;
+                    break;
+                }
+                dctxPtr->dStage = dstage_getCBlock;
+                if (dstPtr==dstEnd)
+                {
+                    nextSrcSizeHint = nextCBlockSize + 4;
+                    doAnotherStage = 0;
+                }
+                break;
+            }
+
+        case dstage_copyDirect:   /* uncompressed block */
+            {
+                size_t sizeToCopy = dctxPtr->tmpInTarget;
+                if ((size_t)(srcEnd-srcPtr) < sizeToCopy) sizeToCopy = srcEnd - srcPtr;  /* not enough input to read full block */
+                if ((size_t)(dstEnd-dstPtr) < sizeToCopy) sizeToCopy = dstEnd - dstPtr;
+                memcpy(dstPtr, srcPtr, sizeToCopy);
+                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), srcPtr, (U32)sizeToCopy);
+
+                /* dictionary management */
+                if (dctxPtr->frameInfo.blockMode==blockLinked)
+                    LZ4F_updateDict(dctxPtr, dstPtr, sizeToCopy, dstStart, 0);
+
+                srcPtr += sizeToCopy;
+                dstPtr += sizeToCopy;
+                if (sizeToCopy == dctxPtr->tmpInTarget)   /* all copied */
+                {
+                    dctxPtr->dStage = dstage_getCBlockSize;
+                    break;
+                }
+                dctxPtr->tmpInTarget -= sizeToCopy;   /* still need to copy more */
+                nextSrcSizeHint = dctxPtr->tmpInTarget + 4;
+                doAnotherStage = 0;
+                break;
+            }
+
+        case dstage_getCBlock:   /* entry from dstage_decodeCBlockSize */
+            {
+                if ((size_t)(srcEnd-srcPtr) < dctxPtr->tmpInTarget)
+                {
+                    dctxPtr->tmpInSize = 0;
+                    dctxPtr->dStage = dstage_storeCBlock;
+                    break;
+                }
+                selectedIn = srcPtr;
+                srcPtr += dctxPtr->tmpInTarget;
+                dctxPtr->dStage = dstage_decodeCBlock;
+                break;
+            }
+
+        case dstage_storeCBlock:
+            {
+                size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd-srcPtr)) sizeToCopy = srcEnd-srcPtr;
+                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                dctxPtr->tmpInSize += sizeToCopy;
+                srcPtr += sizeToCopy;
+                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget)  /* need more input */
+                {
+                    nextSrcSizeHint = (dctxPtr->tmpInTarget - dctxPtr->tmpInSize) + 4;
+                    doAnotherStage=0;
+                    break;
+                }
+                selectedIn = dctxPtr->tmpIn;
+                dctxPtr->dStage = dstage_decodeCBlock;
+                break;
+            }
+
+        case dstage_decodeCBlock:
+            {
+                if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize)   /* not enough place into dst : decode into tmpOut */
+                    dctxPtr->dStage = dstage_decodeCBlock_intoTmp;
+                else
+                    dctxPtr->dStage = dstage_decodeCBlock_intoDst;
+                break;
+            }
+
+        case dstage_decodeCBlock_intoDst:
+            {
+                int (*decoder)(const char*, char*, int, int, const char*, int);
+                int decodedSize;
+
+                if (dctxPtr->frameInfo.blockMode == blockLinked)
+                    decoder = LZ4_decompress_safe_usingDict;
+                else
+                    decoder = LZ4F_decompress_safe;
+
+                decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
+                if (decodedSize < 0) return (size_t)-ERROR_GENERIC;   /* decompression failed */
+                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize);
+
+                /* dictionary management */
+                if (dctxPtr->frameInfo.blockMode==blockLinked)
+                    LZ4F_updateDict(dctxPtr, dstPtr, decodedSize, dstStart, 0);
+
+                dstPtr += decodedSize;
+                dctxPtr->dStage = dstage_getCBlockSize;
+                break;
+            }
+
+        case dstage_decodeCBlock_intoTmp:
+            {
+                /* not enough place into dst : decode into tmpOut */
+                int (*decoder)(const char*, char*, int, int, const char*, int);
+                int decodedSize;
+
+                if (dctxPtr->frameInfo.blockMode == blockLinked)
+                    decoder = LZ4_decompress_safe_usingDict;
+                else
+                    decoder = LZ4F_decompress_safe;
+
+                /* ensure enough place for tmpOut */
+                if (dctxPtr->frameInfo.blockMode == blockLinked)
+                {
+                    if (dctxPtr->dict == dctxPtr->tmpOutBuffer)
+                    {
+                        if (dctxPtr->dictSize > 128 KB)
+                        {
+                            memcpy(dctxPtr->dict, dctxPtr->dict + dctxPtr->dictSize - 64 KB, 64 KB);
+                            dctxPtr->dictSize = 64 KB;
+                        }
+                        dctxPtr->tmpOut = dctxPtr->dict + dctxPtr->dictSize;
+                    }
+                    else   /* dict not within tmp */
+                    {
+                        size_t reservedDictSpace = dctxPtr->dictSize;
+                        if (reservedDictSpace > 64 KB) reservedDictSpace = 64 KB;
+                        dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + reservedDictSpace;
+                    }
+                }
+
+                /* Decode */
+                decodedSize = decoder((const char*)selectedIn, (char*)dctxPtr->tmpOut, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
+                if (decodedSize < 0) return (size_t)-ERROR_decompressionFailed;   /* decompression failed */
+                if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dctxPtr->tmpOut, decodedSize);
+                dctxPtr->tmpOutSize = decodedSize;
+                dctxPtr->tmpOutStart = 0;
+                dctxPtr->dStage = dstage_flushOut;
+                break;
+            }
+
+        case dstage_flushOut:  /* flush decoded data from tmpOut to dstBuffer */
+            {
+                size_t sizeToCopy = dctxPtr->tmpOutSize - dctxPtr->tmpOutStart;
+                if (sizeToCopy > (size_t)(dstEnd-dstPtr)) sizeToCopy = dstEnd-dstPtr;
+                memcpy(dstPtr, dctxPtr->tmpOut + dctxPtr->tmpOutStart, sizeToCopy);
+
+                /* dictionary management */
+                if (dctxPtr->frameInfo.blockMode==blockLinked)
+                    LZ4F_updateDict(dctxPtr, dstPtr, sizeToCopy, dstStart, 1);
+
+                dctxPtr->tmpOutStart += sizeToCopy;
+                dstPtr += sizeToCopy;
+
+                /* end of flush ? */
+                if (dctxPtr->tmpOutStart == dctxPtr->tmpOutSize)
+                {
+                    dctxPtr->dStage = dstage_getCBlockSize;
+                    break;
+                }
+                nextSrcSizeHint = 4;
+                doAnotherStage = 0;   /* still some data to flush */
+                break;
+            }
+
+        case dstage_getSuffix:
+            {
+                size_t suffixSize = dctxPtr->frameInfo.contentChecksumFlag * 4;
+                if (suffixSize == 0)   /* frame completed */
+                {
+                    nextSrcSizeHint = 0;
+                    dctxPtr->dStage = dstage_getHeader;
+                    doAnotherStage = 0;
+                    break;
+                }
+                if ((srcEnd - srcPtr) >= 4)   /* CRC present */
+                {
+                    selectedIn = srcPtr;
+                    srcPtr += 4;
+                    dctxPtr->dStage = dstage_checkSuffix;
+                    break;
+                }
+                dctxPtr->tmpInSize = 0;
+                dctxPtr->dStage = dstage_storeSuffix;
+                break;
+            }
+
+        case dstage_storeSuffix:
+            {
+                size_t sizeToCopy = 4 - dctxPtr->tmpInSize;
+                if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy = srcEnd - srcPtr;
+                memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
+                srcPtr += sizeToCopy;
+                dctxPtr->tmpInSize += sizeToCopy;
+                if (dctxPtr->tmpInSize < 4)  /* not enough input to read complete suffix */
+                {
+                    nextSrcSizeHint = 4 - dctxPtr->tmpInSize;
+                    doAnotherStage=0;
+                    break;
+                }
+                selectedIn = dctxPtr->tmpIn;
+                dctxPtr->dStage = dstage_checkSuffix;
+                break;
+            }
+
+        case dstage_checkSuffix:
+            {
+                U32 readCRC = LZ4F_readLE32(selectedIn);
+                U32 resultCRC = XXH32_digest(&(dctxPtr->xxh));
+                if (readCRC != resultCRC) return (size_t)-ERROR_checksum_invalid;
+                nextSrcSizeHint = 0;
+                dctxPtr->dStage = dstage_getHeader;
+                doAnotherStage = 0;
+                break;
+            }
+        }
+    }
+
+    /* preserve dictionary within tmp if necessary */
+    if ( (dctxPtr->frameInfo.blockMode==blockLinked)
+        &&(dctxPtr->dict != dctxPtr->tmpOutBuffer)
+        &&(!decompressOptionsPtr->stableDst)
+        &&((unsigned)(dctxPtr->dStage-1) < (unsigned)(dstage_getSuffix-1))
+        )
+    {
+        if (dctxPtr->dStage == dstage_flushOut)
+        {
+            size_t preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
+            size_t copySize = 64 KB - dctxPtr->tmpOutSize;
+            BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart;
+            if (dctxPtr->tmpOutSize > 64 KB) copySize = 0;
+            if (copySize > preserveSize) copySize = preserveSize;
+
+            memcpy(dctxPtr->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize);
+
+            dctxPtr->dict = dctxPtr->tmpOutBuffer;
+            dctxPtr->dictSize = preserveSize + dctxPtr->tmpOutStart;
+        }
+        else
+        {
+            size_t newDictSize = dctxPtr->dictSize;
+            BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize;
+            if ((newDictSize) > 64 KB) newDictSize = 64 KB;
+
+            memcpy(dctxPtr->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize);
+
+            dctxPtr->dict = dctxPtr->tmpOutBuffer;
+            dctxPtr->dictSize = newDictSize;
+            dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + newDictSize;
+        }
+    }
+
+    if (srcPtr<srcEnd)   /* function must be called again with following source data */
+        dctxPtr->srcExpect = srcPtr;
+    else
+        dctxPtr->srcExpect = NULL;
+    *srcSizePtr = (srcPtr - srcStart);
+    *dstSizePtr = (dstPtr - dstStart);
+    return nextSrcSizeHint;
+}
diff --git a/lz4/lz4frame.h b/lz4/lz4frame.h
new file mode 100644
index 0000000..d73e3e2
--- /dev/null
+++ b/lz4/lz4frame.h
@@ -0,0 +1,252 @@
+/*
+   LZ4 auto-framing library
+   Header File
+   Copyright (C) 2011-2015, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 source mirror : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/* LZ4F is a stand-alone API to create LZ4-compressed frames
+ * fully conformant to specification v1.4.1.
+ * All related operations, including memory management, are handled by the library.
+ * You don't need lz4.h when using lz4frame.h.
+ * */
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/**************************************
+   Includes
+**************************************/
+#include <stddef.h>   /* size_t */
+
+
+/**************************************
+ * Error management
+ * ************************************/
+typedef size_t LZ4F_errorCode_t;
+
+unsigned    LZ4F_isError(LZ4F_errorCode_t code);
+const char* LZ4F_getErrorName(LZ4F_errorCode_t code);   /* return error code string; useful for debugging */
+
+
+/**************************************
+ * Frame compression types
+ * ************************************/
+typedef enum { LZ4F_default=0, max64KB=4, max256KB=5, max1MB=6, max4MB=7 } blockSizeID_t;
+typedef enum { blockLinked=0, blockIndependent} blockMode_t;
+typedef enum { noContentChecksum=0, contentChecksumEnabled } contentChecksum_t;
+
+typedef struct {
+  blockSizeID_t     blockSizeID;           /* max64KB, max256KB, max1MB, max4MB ; 0 == default */
+  blockMode_t       blockMode;             /* blockLinked, blockIndependent ; 0 == default */
+  contentChecksum_t contentChecksumFlag;   /* noContentChecksum, contentChecksumEnabled ; 0 == default  */
+  unsigned          reserved[5];
+} LZ4F_frameInfo_t;
+
+typedef struct {
+  LZ4F_frameInfo_t frameInfo;
+  unsigned         compressionLevel;       /* 0 == default (fast mode); values above 16 count as 16 */
+  unsigned         autoFlush;              /* 1 == always flush : reduce need for tmp buffer */
+  unsigned         reserved[4];
+} LZ4F_preferences_t;
+
+
+/***********************************
+ * Simple compression function
+ * *********************************/
+size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+
+size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+/* LZ4F_compressFrame()
+ * Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.4.1.
+ * The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+ * You can get the minimum value of dstMaxSize by using LZ4F_compressFrameBound()
+ * If this condition is not respected, LZ4F_compressFrame() will fail (result is an errorCode)
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument. All preferences will be set to default.
+ * The result of the function is the number of bytes written into dstBuffer.
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+
+
+/**********************************
+ * Advanced compression functions
+ * ********************************/
+typedef void* LZ4F_compressionContext_t;
+
+typedef struct {
+  unsigned stableSrc;    /* 1 == src content will remain available on future calls to LZ4F_compress(); avoid saving src content within tmp buffer as future dictionary */
+  unsigned reserved[3];
+} LZ4F_compressOptions_t;
+
+/* Resource Management */
+
+#define LZ4F_VERSION 100
+LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version);
+LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_compressionContext);
+/* LZ4F_createCompressionContext() :
+ * The first thing to do is to create a compressionContext object, which will be used in all compression operations.
+ * This is achieved using LZ4F_createCompressionContext(), which takes as argument a version and an LZ4F_preferences_t structure.
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential version differences between different binaries.
+ * The function will provide a pointer to a fully allocated LZ4F_compressionContext_t object.
+ * If the result LZ4F_errorCode_t is not zero, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeCompressionContext();
+ */
+
+
+/* Compression */
+
+size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_preferences_t* preferencesPtr);
+/* LZ4F_compressBegin() :
+ * will write the frame header into dstBuffer.
+ * dstBuffer must be large enough to accommodate a header (dstMaxSize). Maximum header size is 19 bytes.
+ * The LZ4F_preferences_t structure is optional : you can provide NULL as argument, all preferences will then be set to default.
+ * The result of the function is the number of bytes written into dstBuffer for the header
+ * or an error code (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr);
+/* LZ4F_compressBound() :
+ * Provides the minimum size of Dst buffer given srcSize to handle worst case situations.
+ * preferencesPtr is optional : you can provide NULL as argument, all preferences will then be set to default.
+ * Note that different preferences will produce in different results.
+ */
+
+size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const void* srcBuffer, size_t srcSize, const LZ4F_compressOptions_t* compressOptionsPtr);
+/* LZ4F_compressUpdate()
+ * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
+ * The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
+ * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode)
+ * You can get the minimum value of dstMaxSize by using LZ4F_compressBound()
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * The result of the function is the number of bytes written into dstBuffer : it can be zero, meaning input data was just buffered.
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr);
+/* LZ4F_flush()
+ * Should you need to create compressed data immediately, without waiting for a block to be filled,
+ * you can call LZ4_flush(), which will immediately compress any remaining data buffered within compressionContext.
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * The result of the function is the number of bytes written into dstBuffer
+ * (it can be zero, this means there was no data left within compressionContext)
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ */
+
+size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstBuffer, size_t dstMaxSize, const LZ4F_compressOptions_t* compressOptionsPtr);
+/* LZ4F_compressEnd()
+ * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
+ * It will flush whatever data remained within compressionContext (like LZ4_flush())
+ * but also properly finalize the frame, with an endMark and a checksum.
+ * The result of the function is the number of bytes written into dstBuffer (necessarily >= 4 (endMark size))
+ * The function outputs an error code if it fails (can be tested using LZ4F_isError())
+ * The LZ4F_compressOptions_t structure is optional : you can provide NULL as argument.
+ * compressionContext can then be used again, starting with LZ4F_compressBegin().
+ */
+
+
+/***********************************
+ * Decompression functions
+ * *********************************/
+
+typedef void* LZ4F_decompressionContext_t;
+
+typedef struct {
+  unsigned stableDst;       /* guarantee that decompressed data will still be there on next function calls (avoid storage into tmp buffers) */
+  unsigned reserved[3];
+} LZ4F_decompressOptions_t;
+
+
+/* Resource management */
+
+LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_decompressionContext_t* ctxPtr, unsigned version);
+LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_decompressionContext_t ctx);
+/* LZ4F_createDecompressionContext() :
+ * The first thing to do is to create a decompressionContext object, which will be used in all decompression operations.
+ * This is achieved using LZ4F_createDecompressionContext().
+ * The version provided MUST be LZ4F_VERSION. It is intended to track potential version differences between different binaries.
+ * The function will provide a pointer to a fully allocated and initialized LZ4F_decompressionContext_t object.
+ * If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation.
+ * Object can release its memory using LZ4F_freeDecompressionContext();
+ */
+
+
+/* Decompression */
+
+size_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t ctx,
+                         LZ4F_frameInfo_t* frameInfoPtr,
+                         const void* srcBuffer, size_t* srcSizePtr);
+/* LZ4F_getFrameInfo()
+ * This function decodes frame header information, such as blockSize.
+ * It is optional : you could start by calling directly LZ4F_decompress() instead.
+ * The objective is to extract header information without starting decompression, typically for allocation purposes.
+ * LZ4F_getFrameInfo() can also be used *after* starting decompression, on a valid LZ4F_decompressionContext_t.
+ * The number of bytes read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ * You are expected to resume decompression from where it stopped (srcBuffer + *srcSizePtr)
+ * The function result is an hint of how many srcSize bytes LZ4F_decompress() expects for next call,
+ * or an error code which can be tested using LZ4F_isError().
+ */
+
+size_t LZ4F_decompress(LZ4F_decompressionContext_t ctx,
+                       void* dstBuffer, size_t* dstSizePtr,
+                       const void* srcBuffer, size_t* srcSizePtr,
+                       const LZ4F_decompressOptions_t* optionsPtr);
+/* LZ4F_decompress()
+ * Call this function repetitively to regenerate data compressed within srcBuffer.
+ * The function will attempt to decode *srcSizePtr bytes from srcBuffer, into dstBuffer of maximum size *dstSizePtr.
+ *
+ * The number of bytes regenerated into dstBuffer will be provided within *dstSizePtr (necessarily <= original value).
+ *
+ * The number of bytes read from srcBuffer will be provided within *srcSizePtr (necessarily <= original value).
+ * If number of bytes read is < number of bytes provided, then decompression operation is not completed.
+ * It typically happens when dstBuffer is not large enough to contain all decoded data.
+ * LZ4F_decompress() must be called again, starting from where it stopped (srcBuffer + *srcSizePtr)
+ * The function will check this condition, and refuse to continue if it is not respected.
+ *
+ * dstBuffer is supposed to be flushed between each call to the function, since its content will be overwritten.
+ * dst arguments can be changed at will with each consecutive call to the function.
+ *
+ * The function result is an hint of how many srcSize bytes LZ4F_decompress() expects for next call.
+ * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
+ * Respecting the hint provides some boost to performance, since it does skip intermediate buffers.
+ * This is just a hint, you can always provide any srcSize you want.
+ * When a frame is fully decoded, the function result will be 0. (no more data expected)
+ * If decompression failed, function result is an error code, which can be tested using LZ4F_isError().
+ */
+
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lz4/lz4frame_static.h b/lz4/lz4frame_static.h
new file mode 100644
index 0000000..cde8186
--- /dev/null
+++ b/lz4/lz4frame_static.h
@@ -0,0 +1,72 @@
+/*
+   LZ4 auto-framing library
+   Header File for static linking only
+   Copyright (C) 2011-2015, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 source mirror : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* lz4frame_static.h should be used solely in the context of static linking.
+ * */
+
+
+/**************************************
+ * Error management
+ * ************************************/
+#define LZ4F_LIST_ERRORS(ITEM) \
+        ITEM(OK_NoError) ITEM(ERROR_GENERIC) \
+        ITEM(ERROR_maxBlockSize_invalid) ITEM(ERROR_blockMode_invalid) ITEM(ERROR_contentChecksumFlag_invalid) \
+        ITEM(ERROR_compressionLevel_invalid) \
+        ITEM(ERROR_allocation_failed) \
+        ITEM(ERROR_srcSize_tooLarge) ITEM(ERROR_dstMaxSize_tooSmall) \
+        ITEM(ERROR_decompressionFailed) \
+        ITEM(ERROR_checksum_invalid) \
+        ITEM(ERROR_maxCode)
+
+#define LZ4F_GENERATE_ENUM(ENUM) ENUM,
+typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;  /* enum is exposed, to handle specific errors; compare function result to -enum value */
+
+
+/**************************************
+   Includes
+**************************************/
+#include "lz4frame.h"
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lz4/lz4hc.c b/lz4/lz4hc.c
index 7324492..5549969 100644
--- a/lz4/lz4hc.c
+++ b/lz4/lz4hc.c
@@ -1,730 +1,751 @@
-/*
-   LZ4 HC - High Compression Mode of LZ4
-   Copyright (C) 2011-2012, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
-*/
-
-
-//**************************************
-// CPU Feature Detection
-//**************************************
-// 32 or 64 bits ?
-#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) )   // Detects 64 bits mode
-#  define LZ4_ARCH64 1
-#else
-#  define LZ4_ARCH64 0
-#endif
-
-// Little Endian or Big Endian ?
-// Overwrite the #define below if you know your architecture endianess
-#if defined (__GLIBC__)
-#  include <endian.h>
-#  if (__BYTE_ORDER == __BIG_ENDIAN)
-#     define LZ4_BIG_ENDIAN 1
-#  endif
-#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
-#  define LZ4_BIG_ENDIAN 1
-#elif defined(__sparc) || defined(__sparc__) \
-   || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
-   || defined(__hpux)  || defined(__hppa) \
-   || defined(_MIPSEB) || defined(__s390__)
-#  define LZ4_BIG_ENDIAN 1
-#else
-// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
-#endif
-
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
-#if defined(__ARM_FEATURE_UNALIGNED)
-#  define LZ4_FORCE_UNALIGNED_ACCESS 1
-#endif
-
-// Define this parameter if your target system or compiler does not support hardware bit count
-#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
-#  define LZ4_FORCE_SW_BITCOUNT
-#endif
-
-
-//**************************************
-// Compiler Options
-//**************************************
-#if __STDC_VERSION__ >= 199901L    // C99
-  /* "restrict" is a known keyword */
-#else
-#  define restrict  // Disable restrict
-#endif
-
-#ifdef _MSC_VER
-#  define inline __inline             // Visual is not C99, but supports some kind of inline
-#  define forceinline __forceinline   
-#  include <intrin.h>                 // For Visual 2005
-#  if LZ4_ARCH64	// 64-bit
-#    pragma intrinsic(_BitScanForward64) // For Visual 2005
-#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
-#  else
-#    pragma intrinsic(_BitScanForward)   // For Visual 2005
-#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
-#  endif
-#else 
-#  ifdef __GNUC__
-#    define forceinline inline __attribute__((always_inline))
-#  else
-#    define forceinline inline
-#  endif
-#endif
-
-#ifdef _MSC_VER  // Visual Studio
-#define lz4_bswap16(x) _byteswap_ushort(x)
-#else
-#define lz4_bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
-#endif
-
-
-//**************************************
-// Includes
-//**************************************
-#include <stdlib.h>   // calloc, free
-#include <string.h>   // memset, memcpy
-#include "lz4hc.h"
-
-#define ALLOCATOR(s) calloc(1,s)
-#define FREEMEM free
-#define MEM_INIT memset
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined(_MSC_VER)    // Visual Studio does not support 'stdint' natively
-#define BYTE	unsigned __int8
-#define U16		unsigned __int16
-#define U32		unsigned __int32
-#define S32		__int32
-#define U64		unsigned __int64
-#else
-#include <stdint.h>
-#define BYTE	uint8_t
-#define U16		uint16_t
-#define U32		uint32_t
-#define S32		int32_t
-#define U64		uint64_t
-#endif
-
-#ifndef LZ4_FORCE_UNALIGNED_ACCESS
-#pragma pack(push, 1) 
-#endif
-
-typedef struct _U16_S { U16 v; } U16_S;
-typedef struct _U32_S { U32 v; } U32_S;
-typedef struct _U64_S { U64 v; } U64_S;
-
-#ifndef LZ4_FORCE_UNALIGNED_ACCESS
-#pragma pack(pop) 
-#endif
-
-#define A64(x) (((U64_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A16(x) (((U16_S *)(x))->v)
-
-
-//**************************************
-// Constants
-//**************************************
-#define MINMATCH 4
-
-#define DICTIONARY_LOGSIZE 16
-#define MAXD (1<<DICTIONARY_LOGSIZE)
-#define MAXD_MASK ((U32)(MAXD - 1))
-#define MAX_DISTANCE (MAXD - 1)
-
-#define HASH_LOG (DICTIONARY_LOGSIZE-1)
-#define HASHTABLESIZE (1 << HASH_LOG)
-#define HASH_MASK (HASHTABLESIZE - 1)
-
-#define MAX_NB_ATTEMPTS 256
-
-#define ML_BITS  4
-#define ML_MASK  (size_t)((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
-
-#define COPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
-#define MINLENGTH (MFLIMIT+1)
-#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
-
-
-//**************************************
-// Architecture-specific macros
-//**************************************
-#if LZ4_ARCH64	// 64-bit
-#define STEPSIZE 8
-#define LZ4_COPYSTEP(s,d)		A64(d) = A64(s); d+=8; s+=8;
-#define LZ4_COPYPACKET(s,d)		LZ4_COPYSTEP(s,d)
-#define UARCH U64
-#define AARCH A64
-#define HTYPE					U32
-#define INITBASE(b,s)			const BYTE* const b = s
-#else		// 32-bit
-#define STEPSIZE 4
-#define LZ4_COPYSTEP(s,d)		A32(d) = A32(s); d+=4; s+=4;
-#define LZ4_COPYPACKET(s,d)		LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
-#define UARCH U32
-#define AARCH A32
-#define HTYPE					const BYTE*
-#define INITBASE(b,s)		    const int b = 0
-#endif
-
-#if defined(LZ4_BIG_ENDIAN)
-#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
-#define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
-#else		// Little Endian
-#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
-#define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
-#endif
-
-
-//************************************************************
-// Local Types
-//************************************************************
-typedef struct 
-{
-    const BYTE* base;
-    HTYPE hashTable[HASHTABLESIZE];
-    U16 chainTable[MAXD];
-    const BYTE* nextToUpdate;
-} LZ4HC_Data_Structure;
-
-
-//**************************************
-// Macros
-//**************************************
-#define LZ4_WILDCOPY(s,d,e)    do { LZ4_COPYPACKET(s,d) } while (d<e);
-#define LZ4_BLINDCOPY(s,d,l)   { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
-#define HASH_FUNCTION(i)	   (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
-#define HASH_VALUE(p)		   HASH_FUNCTION(A32(p))
-#define HASH_POINTER(p)		   (HashTable[HASH_VALUE(p)] + base)
-#define DELTANEXT(p)		   chainTable[(size_t)(p) & MAXD_MASK] 
-#define GETNEXT(p)			   ((p) - (size_t)DELTANEXT(p))
-
-
-//**************************************
-// Private functions
-//**************************************
-#if LZ4_ARCH64
-
-inline static int LZ4_NbCommonBytes (register U64 val)
-{
-#if defined(LZ4_BIG_ENDIAN)
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse64( &r, val );
-    return (int)(r>>3);
-    #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clzll(val) >> 3); 
-    #else
-    int r;
-    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-    r += (!val);
-    return r;
-    #endif
-#else
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanForward64( &r, val );
-    return (int)(r>>3);
-    #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctzll(val) >> 3); 
-    #else
-    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-    return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
-    #endif
-#endif
-}
-
-#else
-
-inline static int LZ4_NbCommonBytes (register U32 val)
-{
-#if defined(LZ4_BIG_ENDIAN)
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r;
-    _BitScanReverse( &r, val );
-    return (int)(r>>3);
-    #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clz(val) >> 3); 
-    #else
-    int r;
-    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-    r += (!val);
-    return r;
-    #endif
-#else
-    #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r;
-    _BitScanForward( &r, val );
-    return (int)(r>>3);
-    #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctz(val) >> 3); 
-    #else
-    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-    #endif
-#endif
-}
-
-#endif
-
-
-inline static int LZ4HC_Init (LZ4HC_Data_Structure* hc4, const BYTE* base)
-{
-    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
-    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
-    hc4->nextToUpdate = base + LZ4_ARCH64;
-    hc4->base = base;
-    return 1;
-}
-
-
-inline static void* LZ4HC_Create (const BYTE* base)
-{
-    void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
-
-    LZ4HC_Init ((LZ4HC_Data_Structure*)hc4, base);
-    return hc4;
-}
-
-
-inline static int LZ4HC_Free (void** LZ4HC_Data)
-{
-    FREEMEM(*LZ4HC_Data);
-    *LZ4HC_Data = NULL;
-    return (1);
-}
-
-
-// Update chains up to ip (excluded)
-forceinline static void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
-{
-    U16*   chainTable = hc4->chainTable;
-    HTYPE* HashTable  = hc4->hashTable;
-    INITBASE(base,hc4->base);
-
-    while(hc4->nextToUpdate < ip)
-    {
-        const BYTE* p = hc4->nextToUpdate;
-        size_t delta = (p) - HASH_POINTER(p); 
-        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; 
-        DELTANEXT(p) = (U16)delta; 
-        HashTable[HASH_VALUE(p)] = (p) - base;
-        hc4->nextToUpdate++;
-    }
-}
-
-
-forceinline static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
-{
-    const BYTE* p1t = p1;
-
-    while (p1t<matchlimit-(STEPSIZE-1))
-    {
-        UARCH diff = AARCH(p2) ^ AARCH(p1t);
-        if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; }
-        p1t += LZ4_NbCommonBytes(diff);
-        return (p1t - p1);
-    }
-    if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; }
-    if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; }
-    if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++;
-    return (p1t - p1);
-}
-
-
-forceinline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
-{
-    U16* const chainTable = hc4->chainTable;
-    HTYPE* const HashTable = hc4->hashTable;
-    const BYTE* ref;
-    INITBASE(base,hc4->base);
-    int nbAttempts=MAX_NB_ATTEMPTS;
-    size_t repl=0, ml=0;
-    U16 delta;
-
-    // HC4 match finder
-    LZ4HC_Insert(hc4, ip);
-    ref = HASH_POINTER(ip);
-
-#define REPEAT_OPTIMIZATION
-#ifdef REPEAT_OPTIMIZATION
-    // Detect repetitive sequences of length <= 4
-    if (ref >= ip-4)               // potential repetition
-    {
-        if (A32(ref) == A32(ip))   // confirmed
-        {
-            delta = (U16)(ip-ref);
-            repl = ml  = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
-            *matchpos = ref;
-        }
-        ref = GETNEXT(ref);
-    }
-#endif
-
-    while ((ref >= ip-MAX_DISTANCE) && (nbAttempts))
-    {
-        nbAttempts--;
-        if (*(ref+ml) == *(ip+ml))
-        if (A32(ref) == A32(ip))
-        {
-            size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
-            if (mlt > ml) { ml = mlt; *matchpos = ref; }
-        }
-        ref = GETNEXT(ref);
-    }
-
-#ifdef REPEAT_OPTIMIZATION
-    // Complete table
-    if (repl)
-    {
-        const BYTE* ptr = ip;
-        const BYTE* end;
-
-        end = ip + repl - (MINMATCH-1);
-        while(ptr < end-delta)
-        {
-            DELTANEXT(ptr) = delta;    // Pre-Load
-            ptr++;
-        }
-        do
-        {
-            DELTANEXT(ptr) = delta;    
-            HashTable[HASH_VALUE(ptr)] = (ptr) - base;     // Head of chain
-            ptr++;
-        } while(ptr < end);
-        hc4->nextToUpdate = end;
-    }
-#endif 
-
-    return (int)ml;
-}
-
-
-forceinline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
-{
-    U16* const  chainTable = hc4->chainTable;
-    HTYPE* const HashTable = hc4->hashTable;
-    INITBASE(base,hc4->base);
-    const BYTE*  ref;
-    int nbAttempts = MAX_NB_ATTEMPTS;
-    int delta = (int)(ip-startLimit);
-
-    // First Match
-    LZ4HC_Insert(hc4, ip);
-    ref = HASH_POINTER(ip);
-
-    while ((ref >= ip-MAX_DISTANCE) && (nbAttempts))
-    {
-        nbAttempts--;
-        if (*(startLimit + longest) == *(ref - delta + longest))
-        if (A32(ref) == A32(ip))
-        {
-#if 1
-            const BYTE* reft = ref+MINMATCH;
-            const BYTE* ipt = ip+MINMATCH;
-            const BYTE* startt = ip;
-
-            while (ipt<matchlimit-(STEPSIZE-1))
-            {
-                UARCH diff = AARCH(reft) ^ AARCH(ipt);
-                if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; }
-                ipt += LZ4_NbCommonBytes(diff);
-                goto _endCount;
-            }
-            if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; }
-            if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; }
-            if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
-_endCount:
-            reft = ref;
-#else
-            // Easier for code maintenance, but unfortunately slower too
-            const BYTE* startt = ip;
-            const BYTE* reft = ref;
-            const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit);
-#endif
-
-            while ((startt>startLimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) {startt--; reft--;}
-
-            if ((ipt-startt) > longest)
-            {
-                longest = (int)(ipt-startt);
-                *matchpos = reft;
-                *startpos = startt;
-            }
-        }
-        ref = GETNEXT(ref);
-    }
-
-    return longest;
-}
-
-
-forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref)
-{
-    int length, len; 
-    BYTE* token;
-
-    // Encode Literal length
-    length = (int)(*ip - *anchor);
-    token = (*op)++;
-    if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } 
-    else *token = (length<<ML_BITS);
-
-    // Copy Literals
-    LZ4_BLINDCOPY(*anchor, *op, length);
-
-    // Encode Offset
-    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
-
-    // Encode MatchLength
-    len = (int)(ml-MINMATCH);
-    if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; } 
-    else *token += len;	
-
-    // Prepare next loop
-    *ip += ml;
-    *anchor = *ip; 
-
-    return 0;
-}
-
-
-//****************************
-// Compression CODE
-//****************************
-
-int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx,
-                 const char* source, 
-                 char* dest,
-                 int isize)
-{	
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + isize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = (iend - LASTLITERALS);
-
-    BYTE* op = (BYTE*) dest;
-
-    int	ml, ml2, ml3, ml0;
-    const BYTE* ref=NULL;
-    const BYTE* start2=NULL;
-    const BYTE* ref2=NULL;
-    const BYTE* start3=NULL;
-    const BYTE* ref3=NULL;
-    const BYTE* start0;
-    const BYTE* ref0;
-
-    ip++;
-
-    // Main Loop
-    while (ip < mflimit)
-    {
-        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
-        if (!ml) { ip++; continue; }
-
-        // saved, in case we would skip too much
-        start0 = ip;
-        ref0 = ref;
-        ml0 = ml;
-
-_Search2:
-        if (ip+ml < mflimit)
-            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
-        else ml2 = ml;
-
-        if (ml2 == ml)  // No better match
-        {
-            LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
-            continue;
-        }
-
-        if (start0 < ip)
-        {
-            if (start2 < ip + ml0)   // empirical
-            {
-                ip = start0;
-                ref = ref0;
-                ml = ml0;
-            }
-        }
-
-        // Here, start0==ip
-        if ((start2 - ip) < 3)   // First Match too small : removed
-        {
-            ml = ml2;
-            ip = start2;
-            ref =ref2;
-            goto _Search2;
-        }
-
-_Search3:
-        // Currently we have :
-        // ml2 > ml1, and
-        // ip1+3 <= ip2 (usually < ip1+ml1)
-        if ((start2 - ip) < OPTIMAL_ML)
-        {
-            int correction;
-            int new_ml = ml;
-            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
-            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
-            correction = new_ml - (int)(start2 - ip);
-            if (correction > 0)
-            {
-                start2 += correction;
-                ref2 += correction;
-                ml2 -= correction;
-            }
-        }
-        // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
-
-        if (start2 + ml2 < mflimit)
-            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
-        else ml3 = ml2;
-
-        if (ml3 == ml2) // No better match : 2 sequences to encode
-        {
-            // ip & ref are known; Now for ml
-            if (start2 < ip+ml)  ml = (int)(start2 - ip);
-            // Now, encode 2 sequences
-            LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
-            ip = start2;
-            LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2);
-            continue;
-        }
-
-        if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
-        {
-            if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
-            {
-                if (start2 < ip+ml)
-                {
-                    int correction = (int)(ip+ml - start2);
-                    start2 += correction;
-                    ref2 += correction;
-                    ml2 -= correction;
-                    if (ml2 < MINMATCH)
-                    {
-                        start2 = start3;
-                        ref2 = ref3;
-                        ml2 = ml3;
-                    }
-                }
-
-                LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
-                ip  = start3;
-                ref = ref3;
-                ml  = ml3;
-
-                start0 = start2;
-                ref0 = ref2;
-                ml0 = ml2;
-                goto _Search2;
-            }
-
-            start2 = start3;
-            ref2 = ref3;
-            ml2 = ml3;
-            goto _Search3;
-        }
-
-        // OK, now we have 3 ascending matches; let's write at least the first one
-        // ip & ref are known; Now for ml
-        if (start2 < ip+ml)
-        {
-            if ((start2 - ip) < (int)ML_MASK)
-            {
-                int correction;
-                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
-                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
-                correction = ml - (int)(start2 - ip);
-                if (correction > 0)
-                {
-                    start2 += correction;
-                    ref2 += correction;
-                    ml2 -= correction;
-                }
-            }
-            else
-            {
-                ml = (int)(start2 - ip);
-            }
-        }
-        LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
-
-        ip = start2;
-        ref = ref2;
-        ml = ml2;
-
-        start2 = start3;
-        ref2 = ref3;
-        ml2 = ml3;
-
-        goto _Search3;
-
-    }
-
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 
-        else *op++ = (lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    } 
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-int LZ4_compressHC(const char* source, 
-                 char* dest,
-                 int isize)
-{
-    void* ctx = LZ4HC_Create((const BYTE*)source);
-    int result = LZ4_compressHCCtx(ctx, source, dest, isize);
-    LZ4HC_Free (&ctx);
-
-    return result;
-}
-
-
+/*
+LZ4 HC - High Compression Mode of LZ4
+Copyright (C) 2011-2014, Yann Collet.
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+- LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+
+
+/**************************************
+   Tuning Parameter
+**************************************/
+static const int LZ4HC_compressionLevel_default = 8;
+
+
+/**************************************
+   Includes
+**************************************/
+#include "lz4hc.h"
+
+
+/**************************************
+   Local Compiler Options
+**************************************/
+#if defined(__GNUC__)
+#  pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if defined (__clang__)
+#  pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+
+/**************************************
+   Common LZ4 definition
+**************************************/
+#define LZ4_COMMONDEFS_ONLY
+#include "lz4.c"
+
+
+/**************************************
+  Local Constants
+**************************************/
+#define DICTIONARY_LOGSIZE 16
+#define MAXD (1<<DICTIONARY_LOGSIZE)
+#define MAXD_MASK ((U32)(MAXD - 1))
+
+#define HASH_LOG (DICTIONARY_LOGSIZE-1)
+#define HASHTABLESIZE (1 << HASH_LOG)
+#define HASH_MASK (HASHTABLESIZE - 1)
+
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+
+static const int g_maxCompressionLevel = 16;
+
+
+/**************************************
+   Local Types
+**************************************/
+typedef struct
+{
+    U32 hashTable[HASHTABLESIZE];
+    U16   chainTable[MAXD];
+    const BYTE* end;        /* next block here to continue on current prefix */
+    const BYTE* base;       /* All index relative to this position */
+    const BYTE* dictBase;   /* alternate base for extDict */
+    const BYTE* inputBuffer;/* deprecated */
+    U32   dictLimit;        /* below that point, need extDict */
+    U32   lowLimit;         /* below that point, no more dict */
+    U32   nextToUpdate;
+    U32   compressionLevel;
+} LZ4HC_Data_Structure;
+
+
+/**************************************
+   Local Macros
+**************************************/
+#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
+#define DELTANEXT(p)           chainTable[(size_t)(p) & MAXD_MASK]
+#define GETNEXT(p)             ((p) - (size_t)DELTANEXT(p))
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+
+
+/**************************************
+   HC Compression
+**************************************/
+static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* start)
+{
+    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
+    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+    hc4->nextToUpdate = 64 KB;
+    hc4->base = start - 64 KB;
+    hc4->inputBuffer = start;
+    hc4->end = start;
+    hc4->dictBase = start - 64 KB;
+    hc4->dictLimit = 64 KB;
+    hc4->lowLimit = 64 KB;
+}
+
+
+/* Update chains up to ip (excluded) */
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+{
+    U16* chainTable = hc4->chainTable;
+    U32* HashTable  = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = hc4->nextToUpdate;
+
+    while(idx < target)
+    {
+        U32 h = LZ4HC_hashPtr(base+idx);
+        size_t delta = idx - HashTable[h];
+        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
+        chainTable[idx & 0xFFFF] = (U16)delta;
+        HashTable[h] = idx;
+        idx++;
+    }
+
+    hc4->nextToUpdate = target;
+}
+
+
+FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   /* Index table will be updated */
+                                               const BYTE* ip, const BYTE* const iLimit,
+                                               const BYTE** matchpos,
+                                               const int maxNbAttempts)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const BYTE* const dictBase = hc4->dictBase;
+    const U32 dictLimit = hc4->dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    U32 matchIndex;
+    const BYTE* match;
+    int nbAttempts=maxNbAttempts;
+    size_t ml=0;
+
+    /* HC4 match finder */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (matchIndex >= dictLimit)
+        {
+            match = base + matchIndex;
+            if (*(match+ml) == *(ip+ml)
+                && (LZ4_read32(match) == LZ4_read32(ip)))
+            {
+                size_t mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
+                if (mlt > ml) { ml = mlt; *matchpos = match; }
+            }
+        }
+        else
+        {
+            match = dictBase + matchIndex;
+            if (LZ4_read32(match) == LZ4_read32(ip))
+            {
+                size_t mlt;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iLimit) vLimit = iLimit;
+                mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iLimit);
+                if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; }   /* virtual matchpos */
+            }
+        }
+        matchIndex -= chainTable[matchIndex & 0xFFFF];
+    }
+
+    return (int)ml;
+}
+
+
+FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
+    LZ4HC_Data_Structure* hc4,
+    const BYTE* ip,
+    const BYTE* iLowLimit,
+    const BYTE* iHighLimit,
+    int longest,
+    const BYTE** matchpos,
+    const BYTE** startpos,
+    const int maxNbAttempts)
+{
+    U16* const chainTable = hc4->chainTable;
+    U32* const HashTable = hc4->hashTable;
+    const BYTE* const base = hc4->base;
+    const U32 dictLimit = hc4->dictLimit;
+    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
+    const BYTE* const dictBase = hc4->dictBase;
+    const BYTE* match;
+    U32   matchIndex;
+    int nbAttempts = maxNbAttempts;
+    int delta = (int)(ip-iLowLimit);
+
+
+    /* First Match */
+    LZ4HC_Insert(hc4, ip);
+    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+    while ((matchIndex>=lowLimit) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (matchIndex >= dictLimit)
+        {
+            match = base + matchIndex;
+            if (*(iLowLimit + longest) == *(match - delta + longest))
+                if (LZ4_read32(match) == LZ4_read32(ip))
+                {
+                    const BYTE* startt = ip;
+                    const BYTE* tmpMatch = match;
+                    const BYTE* const matchEnd = ip + MINMATCH + LZ4_count(ip+MINMATCH, match+MINMATCH, iHighLimit);
+
+                    while ((startt>iLowLimit) && (tmpMatch > iLowLimit) && (startt[-1] == tmpMatch[-1])) {startt--; tmpMatch--;}
+
+                    if ((matchEnd-startt) > longest)
+                    {
+                        longest = (int)(matchEnd-startt);
+                        *matchpos = tmpMatch;
+                        *startpos = startt;
+                    }
+                }
+        }
+        else
+        {
+            match = dictBase + matchIndex;
+            if (LZ4_read32(match) == LZ4_read32(ip))
+            {
+                size_t mlt;
+                int back=0;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iHighLimit) vLimit = iHighLimit;
+                mlt = LZ4_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
+                    mlt += LZ4_count(ip+mlt, base+dictLimit, iHighLimit);
+                while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--;
+                mlt -= back;
+                if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
+            }
+        }
+        matchIndex -= chainTable[matchIndex & 0xFFFF];
+    }
+
+    return longest;
+}
+
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+
+#define LZ4HC_DEBUG 0
+#if LZ4HC_DEBUG
+static unsigned debug = 0;
+#endif
+
+FORCE_INLINE int LZ4HC_encodeSequence (
+    const BYTE** ip,
+    BYTE** op,
+    const BYTE** anchor,
+    int matchLength,
+    const BYTE* const match,
+    limitedOutput_directive limitedOutputBuffer,
+    BYTE* oend)
+{
+    int length;
+    BYTE* token;
+
+#if LZ4HC_DEBUG
+    if (debug) printf("literal : %u  --  match : %u  --  offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));
+#endif
+
+    /* Encode Literal length */
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+    if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
+    else *token = (BYTE)(length<<ML_BITS);
+
+    /* Copy Literals */
+    LZ4_wildCopy(*op, *anchor, (*op) + length);
+    *op += length;
+
+    /* Encode Offset */
+    LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
+
+    /* Encode MatchLength */
+    length = (int)(matchLength-MINMATCH);
+    if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
+    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
+    else *token += (BYTE)(length);
+
+    /* Prepare next loop */
+    *ip += matchLength;
+    *anchor = *ip;
+
+    return 0;
+}
+
+
+static int LZ4HC_compress_generic (
+    void* ctxvoid,
+    const char* source,
+    char* dest,
+    int inputSize,
+    int maxOutputSize,
+    int compressionLevel,
+    limitedOutput_directive limit
+    )
+{
+    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    unsigned maxNbAttempts;
+    int   ml, ml2, ml3, ml0;
+    const BYTE* ref=NULL;
+    const BYTE* start2=NULL;
+    const BYTE* ref2=NULL;
+    const BYTE* start3=NULL;
+    const BYTE* ref3=NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+
+    /* init */
+    if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel;
+    if (compressionLevel < 1) compressionLevel = LZ4HC_compressionLevel_default;
+    maxNbAttempts = 1 << (compressionLevel-1);
+    ctx->end += inputSize;
+
+    ip++;
+
+    /* Main Loop */
+    while (ip < mflimit)
+    {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
+        if (!ml) { ip++; continue; }
+
+        /* saved, in case we would skip too much */
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
+
+_Search2:
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+        else ml2 = ml;
+
+        if (ml2 == ml)  /* No better match */
+        {
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start0 < ip)
+        {
+            if (start2 < ip + ml0)   /* empirical */
+            {
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        /* Here, start0==ip */
+        if ((start2 - ip) < 3)   /* First Match too small : removed */
+        {
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        /*
+        * Currently we have :
+        * ml2 > ml1, and
+        * ip1+3 <= ip2 (usually < ip1+ml1)
+        */
+        if ((start2 - ip) < OPTIMAL_ML)
+        {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0)
+            {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+        else ml3 = ml2;
+
+        if (ml3 == ml2) /* No better match : 2 sequences to encode */
+        {
+            /* ip & ref are known; Now for ml */
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            /* Now, encode 2 sequences */
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            ip = start2;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
+        {
+            if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+            {
+                if (start2 < ip+ml)
+                {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH)
+                    {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        /*
+        * OK, now we have 3 ascending matches; let's write at least the first one
+        * ip & ref are known; Now for ml
+        */
+        if (start2 < ip+ml)
+        {
+            if ((start2 - ip) < (int)ML_MASK)
+            {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0)
+                {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            }
+            else
+            {
+                ml = (int)(start2 - ip);
+            }
+        }
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+    }
+
+    /* Encode Last Literals */
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    /* End */
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compressHC2(const char* source, char* dest, int inputSize, int compressionLevel)
+{
+    LZ4HC_Data_Structure ctx;
+    LZ4HC_init(&ctx, (const BYTE*)source);
+    return LZ4HC_compress_generic (&ctx, source, dest, inputSize, 0, compressionLevel, noLimit);
+}
+
+int LZ4_compressHC(const char* source, char* dest, int inputSize) { return LZ4_compressHC2(source, dest, inputSize, 0); }
+
+int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
+{
+    LZ4HC_Data_Structure ctx;
+    LZ4HC_init(&ctx, (const BYTE*)source);
+    return LZ4HC_compress_generic (&ctx, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
+}
+
+int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compressHC2_limitedOutput(source, dest, inputSize, maxOutputSize, 0);
+}
+
+
+/*****************************
+ * Using external allocation
+ * ***************************/
+int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); }
+
+
+int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel)
+{
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)source);
+    return LZ4HC_compress_generic (state, source, dest, inputSize, 0, compressionLevel, noLimit);
+}
+
+int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize)
+{ return LZ4_compressHC2_withStateHC (state, source, dest, inputSize, 0); }
+
+
+int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
+{
+    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
+    LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)source);
+    return LZ4HC_compress_generic (state, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
+}
+
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize)
+{ return LZ4_compressHC2_limitedOutput_withStateHC (state, source, dest, inputSize, maxOutputSize, 0); }
+
+
+
+/**************************************
+ * Streaming Functions
+ * ************************************/
+/* allocation */
+LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
+int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; }
+
+
+/* initialization */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+    LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= LZ4_STREAMHCSIZE);   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL;
+    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel;
+}
+
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
+{
+    LZ4HC_Data_Structure* ctxPtr = (LZ4HC_Data_Structure*) LZ4_streamHCPtr;
+    if (dictSize > 64 KB)
+    {
+        dictionary += dictSize - 64 KB;
+        dictSize = 64 KB;
+    }
+    LZ4HC_init (ctxPtr, (const BYTE*)dictionary);
+    if (dictSize >= 4) LZ4HC_Insert (ctxPtr, (const BYTE*)dictionary +(dictSize-3));
+    ctxPtr->end = (const BYTE*)dictionary + dictSize;
+    return dictSize;
+}
+
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock)
+{
+    if (ctxPtr->end >= ctxPtr->base + 4)
+        LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   /* Referencing remaining dictionary content */
+    /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+    ctxPtr->lowLimit  = ctxPtr->dictLimit;
+    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+    ctxPtr->dictBase  = ctxPtr->base;
+    ctxPtr->base = newBlock - ctxPtr->dictLimit;
+    ctxPtr->end  = newBlock;
+    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   /* match referencing will resume from there */
+}
+
+static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* ctxPtr,
+                                            const char* source, char* dest,
+                                            int inputSize, int maxOutputSize, limitedOutput_directive limit)
+{
+    /* auto-init if forgotten */
+    if (ctxPtr->base == NULL)
+        LZ4HC_init (ctxPtr, (const BYTE*) source);
+
+    /* Check overflow */
+    if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB)
+    {
+        size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+        if (dictSize > 64 KB) dictSize = 64 KB;
+
+        LZ4_loadDictHC((LZ4_streamHC_t*)ctxPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+    }
+
+    /* Check if blocks follow each other */
+    if ((const BYTE*)source != ctxPtr->end) LZ4HC_setExternalDict(ctxPtr, (const BYTE*)source);
+
+    /* Check overlapping input/dictionary space */
+    {
+        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        const BYTE* dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+        const BYTE* dictEnd   = ctxPtr->dictBase + ctxPtr->dictLimit;
+        if ((sourceEnd > dictBegin) && ((BYTE*)source < dictEnd))
+        {
+            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+            ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+            if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+        }
+    }
+
+    return LZ4HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, 0, noLimit);
+}
+
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
+}
+
+
+/* dictionary saving */
+
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
+{
+    LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*)LZ4_streamHCPtr;
+    int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+    if (dictSize > 64 KB) dictSize = 64 KB;
+    if (dictSize < 4) dictSize = 0;
+    if (dictSize > prefixSize) dictSize = prefixSize;
+    memcpy(safeBuffer, streamPtr->end - dictSize, dictSize);
+    {
+        U32 endIndex = (U32)(streamPtr->end - streamPtr->base);
+        streamPtr->end = (const BYTE*)safeBuffer + dictSize;
+        streamPtr->base = streamPtr->end - endIndex;
+        streamPtr->dictLimit = endIndex - dictSize;
+        streamPtr->lowLimit = endIndex - dictSize;
+        if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
+    }
+    return dictSize;
+}
+
+
+/***********************************
+ * Deprecated Functions
+ ***********************************/
+int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
+
+int LZ4_resetStreamStateHC(void* state, const char* inputBuffer)
+{
+    if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
+    LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
+    return 0;
+}
+
+void* LZ4_createHC (const char* inputBuffer)
+{
+    void* hc4 = ALLOCATOR(1, sizeof(LZ4HC_Data_Structure));
+    LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
+    return hc4;
+}
+
+int LZ4_freeHC (void* LZ4HC_Data)
+{
+    FREEMEM(LZ4HC_Data);
+    return (0);
+}
+
+/*
+int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize)
+{
+return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, 0, noLimit);
+}
+int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, 0, limitedOutput);
+}
+*/
+
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit);
+}
+
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
+}
+
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+    LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
+    int dictSize = LZ4_saveDictHC((LZ4_streamHC_t*)LZ4HC_Data, (char*)(hc4->inputBuffer), 64 KB);
+    return (char*)(hc4->inputBuffer + dictSize);
+}
diff --git a/lz4/lz4hc.h b/lz4/lz4hc.h
index cb74689..ce813ab 100644
--- a/lz4/lz4hc.h
+++ b/lz4/lz4hc.h
@@ -1,60 +1,174 @@
-/*
-   LZ4 HC - High Compression Mode of LZ4
-   Header File
-   Copyright (C) 2011-2012, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
-*/
-#pragma once
-
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-int LZ4_compressHC (const char* source, char* dest, int isize);
-
-/*
-LZ4_compressHC :
-	return : the number of bytes in compressed buffer dest
-	note : destination buffer must be already allocated. 
-		To avoid any problem, size it to handle worst cases situations (input data not compressible)
-		Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
-*/
-
-
-/* Note :
-Decompression functions are provided within regular LZ4 source code (see "lz4.h") (BSD license)
-*/
-
-
-#if defined (__cplusplus)
-}
-#endif
+/*
+   LZ4 HC - High Compression Mode of LZ4
+   Header File
+   Copyright (C) 2011-2014, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+   - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+#pragma once
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+int LZ4_compressHC (const char* source, char* dest, int inputSize);
+/*
+LZ4_compressHC :
+    return : the number of bytes in compressed buffer dest
+             or 0 if compression fails.
+    note : destination buffer must be already allocated.
+        To avoid any problem, size it to handle worst cases situations (input data not compressible)
+        Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
+*/
+
+int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+/*
+LZ4_compress_limitedOutput() :
+    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    If it cannot achieve it, compression will stop, and result of the function will be zero.
+    This function never writes outside of provided output buffer.
+
+    inputSize  : Max supported value is 1 GB
+    maxOutputSize : is maximum allowed size into the destination buffer (which must be already allocated)
+    return : the number of output bytes written in buffer 'dest'
+             or 0 if compression fails.
+*/
+
+
+int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+/*
+    Same functions as above, but with programmable 'compressionLevel'.
+    Recommended values are between 4 and 9, although any value between 0 and 16 will work.
+    'compressionLevel'==0 means use default 'compressionLevel' value.
+    Values above 16 behave the same as 16.
+    Equivalent variants exist for all other compression functions below.
+*/
+
+/* Note :
+   Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
+*/
+
+
+/**************************************
+   Using an external allocation
+**************************************/
+int LZ4_sizeofStateHC(void);
+int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+
+/*
+These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods.
+To know how much memory must be allocated for the compression tables, use :
+int LZ4_sizeofStateHC();
+
+Note that tables must be aligned for pointer (32 or 64 bits), otherwise compression will fail (return code 0).
+
+The allocated memory can be provided to the compression functions using 'void* state' parameter.
+LZ4_compress_withStateHC() and LZ4_compress_limitedOutput_withStateHC() are equivalent to previously described functions.
+They just use the externally allocated memory for state instead of allocating their own (on stack, or on heap).
+*/
+
+
+
+/**************************************
+   Experimental Streaming Functions
+**************************************/
+#define LZ4_STREAMHCSIZE_U64 32774
+#define LZ4_STREAMHCSIZE     (LZ4_STREAMHCSIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMHCSIZE_U64]; } LZ4_streamHC_t;
+/*
+LZ4_streamHC_t
+This structure allows static allocation of LZ4 HC streaming state.
+State must then be initialized using LZ4_resetStreamHC() before first use.
+
+Static allocation should only be used with statically linked library.
+If you want to use LZ4 as a DLL, please use construction functions below, which are more future-proof.
+*/
+
+
+LZ4_streamHC_t* LZ4_createStreamHC(void);
+int             LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr);
+/*
+These functions create and release memory for LZ4 HC streaming state.
+Newly created states are already initialized.
+Existing state space can be re-used anytime using LZ4_resetStreamHC().
+If you use LZ4 as a DLL, please use these functions instead of direct struct allocation,
+to avoid size mismatch between different versions.
+*/
+
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+int  LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize);
+
+int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int maxDictSize);
+
+/*
+These functions compress data in successive blocks of any size, using previous blocks as dictionary.
+One key assumption is that each previous block will remain read-accessible while compressing next block.
+
+Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
+A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
+
+Then, use LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue() to compress each successive block.
+They work like usual LZ4_compressHC() or LZ4_compressHC_limitedOutput(), but use previous memory blocks to improve compression.
+Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
+
+If, for any reason, previous data block can't be preserved in memory during next compression block,
+you must save it to a safer memory space,
+using LZ4_saveDictHC().
+*/
+
+
+
+/**************************************
+ * Deprecated Streaming Functions
+ * ************************************/
+/* Note : these streaming functions follows the older model, and should no longer be used */
+void* LZ4_createHC (const char* inputBuffer);
+char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+int   LZ4_freeHC (void* LZ4HC_Data);
+
+int   LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+int   LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+
+int   LZ4_sizeofStreamStateHC(void);
+int   LZ4_resetStreamStateHC(void* state, const char* inputBuffer);
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lz4/xxhash.c b/lz4/xxhash.c
new file mode 100644
index 0000000..093564c
--- /dev/null
+++ b/lz4/xxhash.c
@@ -0,0 +1,935 @@
+/*
+xxHash - Fast Hash algorithm
+Copyright (C) 2012-2015, Yann Collet
+
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash source repository : http://code.google.com/p/xxhash/
+- xxHash source mirror : https://github.com/Cyan4973/xxHash
+- public discussion board : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+/**************************************
+*  Tuning parameters
+***************************************/
+/* Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+ * For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
+ * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
+ * You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
+ */
+#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#  define XXH_USE_UNALIGNED_ACCESS 1
+#endif
+
+/* XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
+ * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
+ * By default, this option is disabled. To enable it, uncomment below define :
+ */
+/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
+
+/* XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independance be of no importance for your application, you may set the #define below to 1.
+ * It will improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#define XXH_FORCE_NATIVE_FORMAT 0
+
+
+/**************************************
+*  Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER    /* Visual Studio */
+#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
+#  define FORCE_INLINE static __forceinline
+#else
+#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#    ifdef __GNUC__
+#      define FORCE_INLINE static inline __attribute__((always_inline))
+#    else
+#      define FORCE_INLINE static inline
+#    endif
+#  else
+#    define FORCE_INLINE static
+#  endif /* __STDC_VERSION__ */
+#endif
+
+
+/**************************************
+*  Includes & Memory related functions
+***************************************/
+#include "xxhash.h"
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/* for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+    return memcpy(dest,src,size);
+}
+
+
+/**************************************
+*  Basic Types
+***************************************/
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+# include <stdint.h>
+typedef uint8_t  BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef  int32_t S32;
+typedef uint64_t U64;
+#else
+typedef unsigned char      BYTE;
+typedef unsigned short     U16;
+typedef unsigned int       U32;
+typedef   signed int       S32;
+typedef unsigned long long U64;
+#endif
+
+#if defined(__GNUC__)  && !defined(XXH_USE_UNALIGNED_ACCESS)
+#  define _PACKED __attribute__ ((packed))
+#else
+#  define _PACKED
+#endif
+
+#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  ifdef __IBMC__
+#    pragma pack(1)
+#  else
+#    pragma pack(push, 1)
+#  endif
+#endif
+
+typedef struct _U32_S
+{
+    U32 v;
+} _PACKED U32_S;
+typedef struct _U64_S
+{
+    U64 v;
+} _PACKED U64_S;
+
+#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  pragma pack(pop)
+#endif
+
+#define A32(x) (((U32_S *)(x))->v)
+#define A64(x) (((U64_S *)(x))->v)
+
+
+/*****************************************
+*  Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+#  define XXH_rotl32(x,r) _rotl(x,r)
+#  define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+#  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER)     /* Visual Studio */
+#  define XXH_swap32 _byteswap_ulong
+#  define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+#  define XXH_swap32 __builtin_bswap32
+#  define XXH_swap64 __builtin_bswap64
+#else
+static U32 XXH_swap32 (U32 x)
+{
+    return  ((x << 24) & 0xff000000 ) |
+            ((x <<  8) & 0x00ff0000 ) |
+            ((x >>  8) & 0x0000ff00 ) |
+            ((x >> 24) & 0x000000ff );
+}
+static U64 XXH_swap64 (U64 x)
+{
+    return  ((x << 56) & 0xff00000000000000ULL) |
+            ((x << 40) & 0x00ff000000000000ULL) |
+            ((x << 24) & 0x0000ff0000000000ULL) |
+            ((x << 8)  & 0x000000ff00000000ULL) |
+            ((x >> 8)  & 0x00000000ff000000ULL) |
+            ((x >> 24) & 0x0000000000ff0000ULL) |
+            ((x >> 40) & 0x000000000000ff00ULL) |
+            ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+
+/**************************************
+*  Constants
+***************************************/
+#define PRIME32_1   2654435761U
+#define PRIME32_2   2246822519U
+#define PRIME32_3   3266489917U
+#define PRIME32_4    668265263U
+#define PRIME32_5    374761393U
+
+#define PRIME64_1 11400714785074694791ULL
+#define PRIME64_2 14029467366897019727ULL
+#define PRIME64_3  1609587929392839161ULL
+#define PRIME64_4  9650029242287828579ULL
+#define PRIME64_5  2870177450012600261ULL
+
+
+/***************************************
+*  Architecture Macros
+****************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+#ifndef XXH_CPU_LITTLE_ENDIAN   /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example using a compiler switch */
+static const int one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(char*)(&one))
+#endif
+
+
+/**************************************
+*  Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
+
+
+/****************************
+*  Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
+    else
+        return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+    if (align==XXH_unaligned)
+        return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
+    else
+        return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+    return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+
+/****************************
+*  Simple Hash Functions
+*****************************/
+FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U32 h32;
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)16;
+    }
+#endif
+
+    if (len>=16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = seed + PRIME32_1 + PRIME32_2;
+        U32 v2 = seed + PRIME32_2;
+        U32 v3 = seed + 0;
+        U32 v4 = seed - PRIME32_1;
+
+        do
+        {
+            v1 += XXH_get32bits(p) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_get32bits(p) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_get32bits(p) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_get32bits(p) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+    }
+    else
+    {
+        h32  = seed + PRIME32_5;
+    }
+
+    h32 += (U32) len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_get32bits(p) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+unsigned int XXH32 (const void* input, size_t len, unsigned seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH32_state_t state;
+    XXH32_reset(&state, seed);
+    XXH32_update(&state, input, len);
+    return XXH32_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 3) == 0)   /* Input is aligned, let's leverage the speed advantage */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* bEnd = p + len;
+    U64 h64;
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (p==NULL)
+    {
+        len=0;
+        bEnd=p=(const BYTE*)(size_t)32;
+    }
+#endif
+
+    if (len>=32)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = seed + PRIME64_1 + PRIME64_2;
+        U64 v2 = seed + PRIME64_2;
+        U64 v3 = seed + 0;
+        U64 v4 = seed - PRIME64_1;
+
+        do
+        {
+            v1 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            v2 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            v3 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            v4 += XXH_get64bits(p) * PRIME64_2;
+            p+=8;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+        }
+        while (p<=limit);
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64 * PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = seed + PRIME64_5;
+    }
+
+    h64 += (U64) len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_get64bits(p);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+    XXH64_state_t state;
+    XXH64_reset(&state, seed);
+    XXH64_update(&state, input, len);
+    return XXH64_digest(&state);
+#else
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+#  if !defined(XXH_USE_UNALIGNED_ACCESS)
+    if ((((size_t)input) & 7)==0)   /* Input is aligned, let's leverage the speed advantage */
+    {
+        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+        else
+            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }
+#  endif
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+    else
+        return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/****************************************************
+ *  Advanced Hash Functions
+****************************************************/
+
+/*** Allocation ***/
+typedef struct
+{
+    U64 total_len;
+    U32 seed;
+    U32 v1;
+    U32 v2;
+    U32 v3;
+    U32 v4;
+    U32 mem32[4];   /* defined as U32 for alignment */
+    U32 memsize;
+} XXH_istate32_t;
+
+typedef struct
+{
+    U64 total_len;
+    U64 seed;
+    U64 v1;
+    U64 v2;
+    U64 v3;
+    U64 v4;
+    U64 mem64[4];   /* defined as U64 for alignment */
+    U32 memsize;
+} XXH_istate64_t;
+
+
+XXH32_state_t* XXH32_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */
+    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+XXH64_state_t* XXH64_createState(void)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */
+    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+    XXH_free(statePtr);
+    return XXH_OK;
+}
+
+
+/*** Hash feed ***/
+
+XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME32_1 + PRIME32_2;
+    state->v2 = seed + PRIME32_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME32_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+{
+    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
+    state->seed = seed;
+    state->v1 = seed + PRIME64_1 + PRIME64_2;
+    state->v2 = seed + PRIME64_2;
+    state->v3 = seed + 0;
+    state->v4 = seed - PRIME64_1;
+    state->total_len = 0;
+    state->memsize = 0;
+    return XXH_OK;
+}
+
+
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 16)   /* fill in tmp buffer */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+        {
+            const U32* p32 = state->mem32;
+            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v1 = XXH_rotl32(state->v1, 13);
+            state->v1 *= PRIME32_1;
+            p32++;
+            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v2 = XXH_rotl32(state->v2, 13);
+            state->v2 *= PRIME32_1;
+            p32++;
+            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v3 = XXH_rotl32(state->v3, 13);
+            state->v3 *= PRIME32_1;
+            p32++;
+            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
+            state->v4 = XXH_rotl32(state->v4, 13);
+            state->v4 *= PRIME32_1;
+            p32++;
+        }
+        p += 16-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p <= bEnd-16)
+    {
+        const BYTE* const limit = bEnd - 16;
+        U32 v1 = state->v1;
+        U32 v2 = state->v2;
+        U32 v3 = state->v3;
+        U32 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE32(p, endian) * PRIME32_2;
+            v1 = XXH_rotl32(v1, 13);
+            v1 *= PRIME32_1;
+            p+=4;
+            v2 += XXH_readLE32(p, endian) * PRIME32_2;
+            v2 = XXH_rotl32(v2, 13);
+            v2 *= PRIME32_1;
+            p+=4;
+            v3 += XXH_readLE32(p, endian) * PRIME32_2;
+            v3 = XXH_rotl32(v3, 13);
+            v3 *= PRIME32_1;
+            p+=4;
+            v4 += XXH_readLE32(p, endian) * PRIME32_2;
+            v4 = XXH_rotl32(v4, 13);
+            v4 *= PRIME32_1;
+            p+=4;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem32, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+{
+    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
+    const BYTE * p = (const BYTE*)state->mem32;
+    BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize;
+    U32 h32;
+
+    if (state->total_len >= 16)
+    {
+        h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
+    }
+    else
+    {
+        h32  = state->seed + PRIME32_5;
+    }
+
+    h32 += (U32) state->total_len;
+
+    while (p+4<=bEnd)
+    {
+        h32 += XXH_readLE32(p, endian) * PRIME32_3;
+        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h32 += (*p) * PRIME32_5;
+        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        p++;
+    }
+
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+
+U32 XXH32_digest (const XXH32_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH32_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
+
+#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
+    if (input==NULL) return XXH_ERROR;
+#endif
+
+    state->total_len += len;
+
+    if (state->memsize + len < 32)   /* fill in tmp buffer */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+        state->memsize += (U32)len;
+        return XXH_OK;
+    }
+
+    if (state->memsize)   /* some data left from previous update */
+    {
+        XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+        {
+            const U64* p64 = state->mem64;
+            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v1 = XXH_rotl64(state->v1, 31);
+            state->v1 *= PRIME64_1;
+            p64++;
+            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v2 = XXH_rotl64(state->v2, 31);
+            state->v2 *= PRIME64_1;
+            p64++;
+            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v3 = XXH_rotl64(state->v3, 31);
+            state->v3 *= PRIME64_1;
+            p64++;
+            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
+            state->v4 = XXH_rotl64(state->v4, 31);
+            state->v4 *= PRIME64_1;
+            p64++;
+        }
+        p += 32-state->memsize;
+        state->memsize = 0;
+    }
+
+    if (p+32 <= bEnd)
+    {
+        const BYTE* const limit = bEnd - 32;
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        do
+        {
+            v1 += XXH_readLE64(p, endian) * PRIME64_2;
+            v1 = XXH_rotl64(v1, 31);
+            v1 *= PRIME64_1;
+            p+=8;
+            v2 += XXH_readLE64(p, endian) * PRIME64_2;
+            v2 = XXH_rotl64(v2, 31);
+            v2 *= PRIME64_1;
+            p+=8;
+            v3 += XXH_readLE64(p, endian) * PRIME64_2;
+            v3 = XXH_rotl64(v3, 31);
+            v3 *= PRIME64_1;
+            p+=8;
+            v4 += XXH_readLE64(p, endian) * PRIME64_2;
+            v4 = XXH_rotl64(v4, 31);
+            v4 *= PRIME64_1;
+            p+=8;
+        }
+        while (p<=limit);
+
+        state->v1 = v1;
+        state->v2 = v2;
+        state->v3 = v3;
+        state->v4 = v4;
+    }
+
+    if (p < bEnd)
+    {
+        XXH_memcpy(state->mem64, p, bEnd-p);
+        state->memsize = (int)(bEnd-p);
+    }
+
+    return XXH_OK;
+}
+
+XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+    else
+        return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+{
+    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
+    const BYTE * p = (const BYTE*)state->mem64;
+    BYTE* bEnd = (BYTE*)state->mem64 + state->memsize;
+    U64 h64;
+
+    if (state->total_len >= 32)
+    {
+        U64 v1 = state->v1;
+        U64 v2 = state->v2;
+        U64 v3 = state->v3;
+        U64 v4 = state->v4;
+
+        h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+        v1 *= PRIME64_2;
+        v1 = XXH_rotl64(v1, 31);
+        v1 *= PRIME64_1;
+        h64 ^= v1;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v2 *= PRIME64_2;
+        v2 = XXH_rotl64(v2, 31);
+        v2 *= PRIME64_1;
+        h64 ^= v2;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v3 *= PRIME64_2;
+        v3 = XXH_rotl64(v3, 31);
+        v3 *= PRIME64_1;
+        h64 ^= v3;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+
+        v4 *= PRIME64_2;
+        v4 = XXH_rotl64(v4, 31);
+        v4 *= PRIME64_1;
+        h64 ^= v4;
+        h64 = h64*PRIME64_1 + PRIME64_4;
+    }
+    else
+    {
+        h64  = state->seed + PRIME64_5;
+    }
+
+    h64 += (U64) state->total_len;
+
+    while (p+8<=bEnd)
+    {
+        U64 k1 = XXH_readLE64(p, endian);
+        k1 *= PRIME64_2;
+        k1 = XXH_rotl64(k1,31);
+        k1 *= PRIME64_1;
+        h64 ^= k1;
+        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        p+=8;
+    }
+
+    if (p+4<=bEnd)
+    {
+        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
+        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        p+=4;
+    }
+
+    while (p<bEnd)
+    {
+        h64 ^= (*p) * PRIME64_5;
+        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        p++;
+    }
+
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+
+    return h64;
+}
+
+
+unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+    XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+    if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+        return XXH64_digest_endian(state_in, XXH_littleEndian);
+    else
+        return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
diff --git a/lz4/xxhash.h b/lz4/xxhash.h
new file mode 100644
index 0000000..55b4501
--- /dev/null
+++ b/lz4/xxhash.h
@@ -0,0 +1,156 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2014, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : http://code.google.com/p/xxhash/
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+*/
+
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/*****************************
+   Includes
+*****************************/
+#include <stddef.h>   /* size_t */
+
+
+/*****************************
+   Type
+*****************************/
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+
+/*****************************
+   Simple Hash Functions
+*****************************/
+
+unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
+unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*
+XXH32() :
+    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    This function successfully passes all SMHasher tests.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+XXH64() :
+    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+*/
+
+
+
+/*****************************
+   Advanced Hash Functions
+*****************************/
+typedef struct { long long ll[ 6]; } XXH32_state_t;
+typedef struct { long long ll[11]; } XXH64_state_t;
+
+/*
+These structures allow static allocation of XXH states.
+States must then be initialized using XXHnn_reset() before first use.
+
+If you prefer dynamic allocation, please refer to functions below.
+*/
+
+XXH32_state_t* XXH32_createState(void);
+XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH64_state_t* XXH64_createState(void);
+XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+/*
+These functions create and release memory for XXH state.
+States must then be initialized using XXHnn_reset() before first use.
+*/
+
+
+XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
+XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+
+/*
+These functions calculate the xxHash of an input provided in multiple smaller packets,
+as opposed to an input provided as a single block.
+
+XXH state space must first be allocated, using either static or dynamic method provided above.
+
+Start a new hash by initializing state with a seed, using XXHnn_reset().
+
+Then, feed the hash state by calling XXHnn_update() as many times as necessary.
+Obviously, input must be valid, meaning allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, you can produce a hash anytime, by using XXHnn_digest().
+This function returns the final nn-bits hash.
+You can nonetheless continue feeding the hash state with more input,
+and therefore get some new hashes, by calling again XXHnn_digest().
+
+When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
+*/
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/setup.py b/setup.py
index 10cee25..be7b8ea 100644
--- a/setup.py
+++ b/setup.py
@@ -1,39 +1,52 @@
-from setuptools import setup, find_packages, distutils, Extension
-from setuptools.command.build_py import build_py as BaseBuild
-import os, subprocess
+from setuptools import setup, Extension
+import lz4
 
-ext = Extension('lz4/liblz4',
-        sources = ['lz4/lz4.c', 'lz4/lz4hc.c', 'lz4/lz4_offset_hack.c'],
-        extra_compile_args = ["-O3", "-fPIC", "-std=c99", "-Wall", "-W", "-Wundef", "-Wno-implicit-function-declaration"],
-    )
+VERSION = (1, 0, 0)
+VERSION_STR = '.'.join([str(x) for x in VERSION])
 
-f = open(os.path.join(os.path.dirname(__file__), 'README'))
-readme = f.read()
-f.close()
+"""
+Extension('lz4f', [
+    'src/lz4.c',
+    'src/lz4hc.c',
+    'src/lz4frame.c',
+    'src/xxhash.c',
+], extra_compile_args=[
+    '-std=c99',
+    '-O3',
+    '-Wall',
+    '-Wextra',
+    '-Wundef',
+    '-Wshadow',
+    '-Wcast-align',
+    '-Wstrict-prototypes',
+    '-pedantic',
+]),
+"""
 
 setup(
-    name='lz4',
-    version=0.2,
-    description='A port of the python-lz4 package from the CPython API to CFFI.',
-    long_description=readme,
-    author='Andrew Pendleton',
-    author_email='apendleton@sunlightfoundation.com',
-    url='http://github.com/apendleton/python-lz4-cffi/',
-    packages=find_packages(),
-    license='BSD License',
-    platforms=["any"],
-    py_modules=["lz4"],
+    name='lz4-cffi',
+    version=VERSION_STR,
+    description='A port of the lz4tools package from to CFFI.',
+    long_description=open('README', 'r').read(),
+    author='Greg Bowyer',
+    author_email='gbowyer@fastmail.co.uk',
+    url='http://github.com/GregBowyer/lz4-cffi/',
+    packages=['lz4'],
+    ext_modules=[
+        lz4.ffi.verifier.get_extension(),
+    ],
+    setup_requires=['nose>=1.0'],
+    test_suite='nose.collector',
+    install_requires=['cffi>=0.8'],
+    include_package_data=False,
+    zip_safe=False,
+    keywords=['lz4', 'pypy'],
     classifiers=[
-        'Development Status :: 3 - Alpha',
-        'Intended Audience :: Developers',
+        'Development Status :: 5 - Production/Stable',
         'License :: OSI Approved :: BSD License',
-        'Natural Language :: English',
-        'Operating System :: OS Independent',
+        'Intended Audience :: Developers',
+        'Programming Language :: C',
         'Programming Language :: Python',
-        'Environment :: Web Environment',
+        'Programming Language :: Python :: Implementation :: PyPy',
     ],
-    install_requires=["cffi"],
-    include_package_data=True,
-    package_data={'': ['*.c','*.h']},
-    ext_modules=[ext]
 )
diff --git a/tests/test.py b/tests/test.py
index 200ec7b..cf7ff0f 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -1,5 +1,19 @@
 import lz4
 import sys
 
-DATA = open("/dev/urandom", "rb").read(128 * 1024)  # Read 128kb
-sys.exit(DATA != lz4.uncompress(lz4.compress(DATA)) and 1 or 0)
+
+import unittest
+import os
+
+class TestLZ4(unittest.TestCase):
+
+    def test_random(self):
+        DATA = os.urandom(128 * 1024)  # Read 128kb
+        self.assertEqual(DATA, lz4.loads(lz4.dumps(DATA)))
+
+    def test_string(self):
+        DATA = "test" * (5 * 1024 * 1024) # 5mb of string
+        self.assertEqual(DATA, lz4.loads(lz4.dumps(DATA)))
+
+if __name__ == '__main__':
+    unittest.main()