Skip to content

Commit 5fc8293

Browse files
authored
Merge pull request #5 from autinitysystems/DecompressionContextLeakFix
Memory leak fix when content size header is 0
2 parents 0878706 + 49683b8 commit 5fc8293

File tree

13 files changed

+791
-606
lines changed

13 files changed

+791
-606
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,8 @@
55
# sublime
66
*.sublime*
77

8+
# VS Code
9+
.vscode
10+
811
# dzil build dir
912
.build/

Changes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
{{$NEXT}}
22

3+
- Bug fix for memory leak when content size header is 0 and no errors occur.
4+
35
0.012001 2017-09-13 23:37:49+02:00 Europe/Berlin
46

57
- Bug fix for github issue #4 (Could not read frame info: ERROR_frameHeader_incomplete)

CompressLZ4Frame.xs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ SV * decompress_single_frame(pTHX_ char * src, size_t src_len, size_t * bytes_pr
113113
// done uncompressing, now put the stuff into a scalar
114114
decompressed = newSV(0);
115115
sv_usepvn_flags(decompressed, dest, dest_len, SV_SMAGIC);
116+
LZ4F_freeDecompressionContext(ctx);
116117
}
117118

118119
return decompressed;

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ which uses its own format: [Compress::LZ4](https://github.com/gray/compress-lz4)
1313

1414
## Copyright & License
1515

16-
Copyright © 2015 - 2017, afr-consulting GmbH
16+
Copyright © 2015 - 2018, afr-consulting GmbH
1717

1818
This library is free software; you can redistribute it and/or modify it under
1919
the same terms as Perl itself.

dist.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ name = Compress-LZ4Frame
22
author = Felix Bytow <[email protected]>
33
license = Perl_5
44
copyright_holder = afr-consulting GmbH
5-
copyright_year = 2017
5+
copyright_year = 2018
66

7-
version = 0.012001
7+
version = 0.012002
88

99
[MinimumPerl]
1010

lz4.c

Lines changed: 106 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
#endif
8686

8787

88+
8889
/*-************************************
8990
* Dependency
9091
**************************************/
@@ -117,6 +118,28 @@
117118
# endif /* _MSC_VER */
118119
#endif /* LZ4_FORCE_INLINE */
119120

121+
/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
122+
* Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
123+
* together with a simple 8-byte copy loop as a fall-back path.
124+
* However, this optimization hurts the decompression speed by >30%,
125+
* because the execution does not go to the optimized loop
126+
* for typical compressible data, and all of the preamble checks
127+
* before going to the fall-back path become useless overhead.
128+
* This optimization happens only with the -O3 flag, and -O2 generates
129+
* a simple 8-byte copy loop.
130+
* With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
131+
* functions are annotated with __attribute__((optimize("O2"))),
132+
* and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
133+
* of LZ4_wildCopy does not affect the compression speed.
134+
*/
135+
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
136+
# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
137+
# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
138+
#else
139+
# define LZ4_FORCE_O2_GCC_PPC64LE
140+
# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
141+
#endif
142+
120143
#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
121144
# define expect(expr,value) (__builtin_expect ((expr),(value)) )
122145
#else
@@ -253,7 +276,8 @@ static void LZ4_copy8(void* dst, const void* src)
253276
}
254277

255278
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
256-
static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
279+
LZ4_FORCE_O2_INLINE_GCC_PPC64LE
280+
void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
257281
{
258282
BYTE* d = (BYTE*)dstPtr;
259283
const BYTE* s = (const BYTE*)srcPtr;
@@ -289,15 +313,24 @@ static const int LZ4_minLength = (MFLIMIT+1);
289313
/*-************************************
290314
* Error detection
291315
**************************************/
316+
#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
317+
# include <assert.h>
318+
#else
319+
# ifndef assert
320+
# define assert(condition) ((void)0)
321+
# endif
322+
#endif
323+
292324
#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
293325

294326
#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
295327
# include <stdio.h>
296-
# define DEBUGLOG(l, ...) { \
297-
if (l<=LZ4_DEBUG) { \
298-
fprintf(stderr, __FILE__ ": "); \
299-
fprintf(stderr, __VA_ARGS__); \
300-
fprintf(stderr, " \n"); \
328+
static int g_debuglog_enable = 1;
329+
# define DEBUGLOG(l, ...) { \
330+
if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
331+
fprintf(stderr, __FILE__ ": "); \
332+
fprintf(stderr, __VA_ARGS__); \
333+
fprintf(stderr, " \n"); \
301334
} }
302335
#else
303336
# define DEBUGLOG(l, ...) {} /* disabled */
@@ -307,7 +340,7 @@ static const int LZ4_minLength = (MFLIMIT+1);
307340
/*-************************************
308341
* Common functions
309342
**************************************/
310-
static unsigned LZ4_NbCommonBytes (register reg_t val)
343+
static unsigned LZ4_NbCommonBytes (reg_t val)
311344
{
312345
if (LZ4_isLittleEndian()) {
313346
if (sizeof(val)==8) {
@@ -318,7 +351,14 @@ static unsigned LZ4_NbCommonBytes (register reg_t val)
318351
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
319352
return (__builtin_ctzll((U64)val) >> 3);
320353
# else
321-
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
354+
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
355+
0, 3, 1, 3, 1, 4, 2, 7,
356+
0, 2, 3, 6, 1, 5, 3, 5,
357+
1, 3, 4, 4, 2, 5, 6, 7,
358+
7, 0, 1, 2, 3, 3, 4, 6,
359+
2, 6, 5, 5, 3, 4, 5, 6,
360+
7, 1, 2, 4, 6, 4, 4, 5,
361+
7, 2, 6, 5, 7, 6, 7, 7 };
322362
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
323363
# endif
324364
} else /* 32 bits */ {
@@ -329,21 +369,27 @@ static unsigned LZ4_NbCommonBytes (register reg_t val)
329369
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
330370
return (__builtin_ctz((U32)val) >> 3);
331371
# else
332-
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
372+
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
373+
3, 2, 2, 1, 3, 2, 0, 1,
374+
3, 3, 1, 2, 2, 2, 2, 0,
375+
3, 1, 2, 0, 1, 0, 1, 1 };
333376
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
334377
# endif
335378
}
336379
} else /* Big Endian CPU */ {
337-
if (sizeof(val)==8) {
380+
if (sizeof(val)==8) { /* 64-bits */
338381
# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
339382
unsigned long r = 0;
340383
_BitScanReverse64( &r, val );
341384
return (unsigned)(r>>3);
342385
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
343386
return (__builtin_clzll((U64)val) >> 3);
344387
# else
388+
static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits.
389+
Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
390+
Note that this code path is never triggered in 32-bits mode. */
345391
unsigned r;
346-
if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
392+
if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
347393
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
348394
r += (!val);
349395
return r;
@@ -366,11 +412,20 @@ static unsigned LZ4_NbCommonBytes (register reg_t val)
366412
}
367413

368414
#define STEPSIZE sizeof(reg_t)
369-
static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
415+
LZ4_FORCE_INLINE
416+
unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
370417
{
371418
const BYTE* const pStart = pIn;
372419

373-
while (likely(pIn<pInLimit-(STEPSIZE-1))) {
420+
if (likely(pIn < pInLimit-(STEPSIZE-1))) {
421+
reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
422+
if (!diff) {
423+
pIn+=STEPSIZE; pMatch+=STEPSIZE;
424+
} else {
425+
return LZ4_NbCommonBytes(diff);
426+
} }
427+
428+
while (likely(pIn < pInLimit-(STEPSIZE-1))) {
374429
reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
375430
if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
376431
pIn += LZ4_NbCommonBytes(diff);
@@ -944,6 +999,7 @@ LZ4_stream_t* LZ4_createStream(void)
944999

9451000
void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
9461001
{
1002+
DEBUGLOG(4, "LZ4_resetStream");
9471003
MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
9481004
}
9491005

@@ -1109,6 +1165,7 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
11091165
* Note that it is important for performance that this function really get inlined,
11101166
* in order to remove useless branches during compilation optimization.
11111167
*/
1168+
LZ4_FORCE_O2_GCC_PPC64LE
11121169
LZ4_FORCE_INLINE int LZ4_decompress_generic(
11131170
const char* const src,
11141171
char* const dst,
@@ -1119,7 +1176,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
11191176
int partialDecoding, /* full, partial */
11201177
int targetOutputSize, /* only used if partialDecoding==partial */
11211178
int dict, /* noDict, withPrefix64k, usingExtDict */
1122-
const BYTE* const lowPrefix, /* == dst when no prefix */
1179+
const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
11231180
const BYTE* const dictStart, /* only if dict==usingExtDict */
11241181
const size_t dictSize /* note : = 0 if noDict */
11251182
)
@@ -1133,15 +1190,15 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
11331190
BYTE* oexit = op + targetOutputSize;
11341191

11351192
const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
1136-
const unsigned dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
1137-
const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
1193+
const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
1194+
const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
11381195

11391196
const int safeDecode = (endOnInput==endOnInputSize);
11401197
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
11411198

11421199

11431200
/* Special cases */
1144-
if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */
1201+
if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => just decode everything */
11451202
if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
11461203
if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
11471204

@@ -1151,8 +1208,27 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
11511208
const BYTE* match;
11521209
size_t offset;
11531210

1154-
/* get literal length */
11551211
unsigned const token = *ip++;
1212+
1213+
/* shortcut for common case :
1214+
* in most circumstances, we expect to decode small matches (<= 18 bytes) separated by few literals (<= 14 bytes).
1215+
* this shortcut was tested on x86 and x64, where it improves decoding speed.
1216+
* it has not yet been benchmarked on ARM, Power, mips, etc. */
1217+
if (((ip + 14 /*maxLL*/ + 2 /*offset*/ <= iend)
1218+
& (op + 14 /*maxLL*/ + 18 /*maxML*/ <= oend))
1219+
& ((token < (15<<ML_BITS)) & ((token & ML_MASK) != 15)) ) {
1220+
size_t const ll = token >> ML_BITS;
1221+
size_t const off = LZ4_readLE16(ip+ll);
1222+
const BYTE* const matchPtr = op + ll - off; /* pointer underflow risk ? */
1223+
if ((off >= 18) /* do not deal with overlapping matches */ & (matchPtr >= lowPrefix)) {
1224+
size_t const ml = (token & ML_MASK) + MINMATCH;
1225+
memcpy(op, ip, 16); op += ll; ip += ll + 2 /*offset*/;
1226+
memcpy(op, matchPtr, 18); op += ml;
1227+
continue;
1228+
}
1229+
}
1230+
1231+
/* decode literal length */
11561232
if ((length=(token>>ML_BITS)) == RUN_MASK) {
11571233
unsigned s;
11581234
do {
@@ -1230,14 +1306,13 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
12301306
/* copy match within block */
12311307
cpy = op + length;
12321308
if (unlikely(offset<8)) {
1233-
const int dec64 = dec64table[offset];
12341309
op[0] = match[0];
12351310
op[1] = match[1];
12361311
op[2] = match[2];
12371312
op[3] = match[3];
1238-
match += dec32table[offset];
1313+
match += inc32table[offset];
12391314
memcpy(op+4, match, 4);
1240-
match -= dec64;
1315+
match -= dec64table[offset];
12411316
} else { LZ4_copy8(op, match); match+=8; }
12421317
op += 8;
12431318

@@ -1254,7 +1329,7 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
12541329
LZ4_copy8(op, match);
12551330
if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
12561331
}
1257-
op=cpy; /* correction */
1332+
op = cpy; /* correction */
12581333
}
12591334

12601335
/* end of decoding */
@@ -1269,16 +1344,19 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic(
12691344
}
12701345

12711346

1347+
LZ4_FORCE_O2_GCC_PPC64LE
12721348
int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
12731349
{
12741350
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
12751351
}
12761352

1353+
LZ4_FORCE_O2_GCC_PPC64LE
12771354
int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
12781355
{
12791356
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
12801357
}
12811358

1359+
LZ4_FORCE_O2_GCC_PPC64LE
12821360
int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
12831361
{
12841362
return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
@@ -1324,6 +1402,7 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
13241402
If it's not possible, save the relevant part of decoded data into a safe buffer,
13251403
and indicate where it stands using LZ4_setStreamDecode()
13261404
*/
1405+
LZ4_FORCE_O2_GCC_PPC64LE
13271406
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
13281407
{
13291408
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -1350,6 +1429,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
13501429
return result;
13511430
}
13521431

1432+
LZ4_FORCE_O2_GCC_PPC64LE
13531433
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
13541434
{
13551435
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -1384,6 +1464,7 @@ Advanced decoding functions :
13841464
the dictionary must be explicitly provided within parameters
13851465
*/
13861466

1467+
LZ4_FORCE_O2_GCC_PPC64LE
13871468
LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
13881469
{
13891470
if (dictSize==0)
@@ -1396,17 +1477,20 @@ LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char*
13961477
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
13971478
}
13981479

1480+
LZ4_FORCE_O2_GCC_PPC64LE
13991481
int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
14001482
{
14011483
return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
14021484
}
14031485

1486+
LZ4_FORCE_O2_GCC_PPC64LE
14041487
int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
14051488
{
14061489
return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
14071490
}
14081491

14091492
/* debug function */
1493+
LZ4_FORCE_O2_GCC_PPC64LE
14101494
int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
14111495
{
14121496
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);

0 commit comments

Comments
 (0)