Skip to content

Commit 0be1c3d

Browse files
committed
MCOL-5429 Fix high memory consumption in GROUP_CONCAT() processing.
1. Input and output RowGroup's used in GROUP_CONCAT classes are currently allocating a raw memory buffer of size equal to the actual width of the string datatype. As an example, for the following query: SELECT col1, GROUP_CONCAT(col2) FROM t GROUP BY col1; If col2 is a TEXT field with default width, the input RowGroup containing the target rows to be concatenated will assign 64kb of memory for every input row in the RowGroup. This is wasteful as actual field values in real workloads would be much smaller. We fix this by enabling the RowGroup to use the StringStore when the RowGroup contains long strings. 2. RowAggregation::initialize() allocates a memory buffer for a NULL row. The size of this buffer is equal to the row size for the output RowGroup. For the above scenario, using the default group_concat_max_len (which is a server variable that sets the maximum length of the GROUP_CONCAT string) value of 1mb, the buffer size would be (1mb + 64kb + some additional metadata). If the user sets group_concat_max_len to a higher value, say 3gb, this buffer size would be ~3gb. Now if the runtime initiates several instances of RowAggregation, total memory consumption by PrimProc could exceed the hardware memory limits causing the OS OOM to kill the process. We fix this problem by again enabling the StringStore for the NULL row allocation. 3. In the plugin code in buildAggregateColumn(), there is an integer overflow when the server group_concat_max_len variable (which is an uint32_t) is set to a value > INT32_MAX (such as 3gb) and is assigned to CalpontSystemCatalog::ColType::colWidth (which is an int32_t). As a short term fix, we saturate the assigned value to colWidth to INT32_MAX. Proper fix would be to upgrade CalpontSystemCatalog::ColType::colWidth to an uint32_t.
1 parent 4fe9cd6 commit 0be1c3d

File tree

6 files changed

+78
-14
lines changed

6 files changed

+78
-14
lines changed

dbcon/joblist/groupconcat.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,14 @@ void GroupConcatInfo::mapColumns(const RowGroup& projRG)
267267

268268
(*k)->fRowGroup = RowGroup(oids.size(), pos, oids, keys, types, csNums, scale, precision,
269269
projRG.getStringTableThreshold(), false);
270+
271+
// MCOL-5429 Use stringstore if the datatype of the groupconcat
272+
// field is a long string.
273+
if ((*k)->fRowGroup.hasLongString())
274+
{
275+
(*k)->fRowGroup.setUseStringTable(true);
276+
}
277+
270278
(*k)->fMapping = makeMapping(projRG, (*k)->fRowGroup);
271279
}
272280
}
@@ -318,10 +326,24 @@ void GroupConcatAgUM::initialize()
318326

319327
fConcator->initialize(fGroupConcat);
320328

321-
fGroupConcat->fRowGroup.initRow(&fRow, true);
322-
fData.reset(new uint8_t[fRow.getSize()]);
323-
324-
fRow.setData(rowgroup::Row::Pointer(fData.get()));
329+
// MCOL-5429 Use stringstore if the datatype of the groupconcat
330+
// field is a long string.
331+
if (fGroupConcat->fRowGroup.hasLongString())
332+
{
333+
fRowGroup = fGroupConcat->fRowGroup;
334+
fRowGroup.setUseStringTable(true);
335+
fRowRGData.reinit(fRowGroup, 1);
336+
fRowGroup.setData(&fRowRGData);
337+
fRowGroup.resetRowGroup(0);
338+
fRowGroup.initRow(&fRow);
339+
fRowGroup.getRow(0, &fRow);
340+
}
341+
else
342+
{
343+
fGroupConcat->fRowGroup.initRow(&fRow, true);
344+
fData.reset(new uint8_t[fRow.getSize()]);
345+
fRow.setData(rowgroup::Row::Pointer(fData.get()));
346+
}
325347
}
326348

327349
void GroupConcatAgUM::processRow(const rowgroup::Row& inRow)
@@ -392,7 +414,7 @@ GroupConcator::~GroupConcator()
392414
void GroupConcator::initialize(const rowgroup::SP_GroupConcat& gcc)
393415
{
394416
// MCOL-901 This value comes from the Server and it is
395-
// too high(3MB) to allocate it for every instance.
417+
// too high(1MB or 3MB by default) to allocate it for every instance.
396418
fGroupConcatLen = gcc->fSize;
397419
size_t sepSize = gcc->fSeparator.size();
398420
fCurrentLength -= sepSize; // XXX Yet I have to find out why spearator has c_str() as nullptr here.

dbcon/joblist/groupconcat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ class GroupConcatAgUM : public rowgroup::GroupConcatAg
9292
boost::scoped_ptr<GroupConcator> fConcator;
9393
boost::scoped_array<uint8_t> fData;
9494
rowgroup::Row fRow;
95+
rowgroup::RGData fRowRGData;
96+
rowgroup::RowGroup fRowGroup;
9597
bool fNoOrder;
9698
};
9799

dbcon/mysql/ha_mcs_execplan.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5348,7 +5348,24 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
53485348
// Item_func_group_concat* gc = (Item_func_group_concat*)isp;
53495349
CalpontSystemCatalog::ColType ct;
53505350
ct.colDataType = CalpontSystemCatalog::VARCHAR;
5351-
ct.colWidth = isp->max_length;
5351+
5352+
// MCOL-5429 CalpontSystemCatalog::ColType::colWidth is currently
5353+
// stored as an int32_t (see calpontsystemcatalog.h). However,
5354+
// Item_sum::max_length is an uint32_t. This means there will be an
5355+
// integer overflow when Item_sum::max_length > colWidth. This ultimately
5356+
// causes an array index out of bound in GroupConcator::swapStreamWithStringAndReturnBuf()
5357+
// in groupconcat.cpp when ExeMgr processes groupconcat. As a temporary
5358+
// fix, we cap off the max groupconcat length to std::numeric_limits<int32_t>::max().
5359+
// The proper fix would be to change colWidth type to uint32_t.
5360+
if (isp->max_length <= std::numeric_limits<int32_t>::max())
5361+
{
5362+
ct.colWidth = isp->max_length;
5363+
}
5364+
else
5365+
{
5366+
ct.colWidth = std::numeric_limits<int32_t>::max();
5367+
}
5368+
53525369
ct.precision = 0;
53535370
ac->resultType(ct);
53545371
}

utils/rowgroup/rowaggregation.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ void RowAggregation::resetUDAF(RowUDAFFunctionCol* rowUDAF, uint64_t funcColsIdx
656656
// Initilalize the data members to meaningful values, setup the hashmap.
657657
// The fRowGroupOut must have a valid data pointer before this.
658658
//------------------------------------------------------------------------------
659-
void RowAggregation::initialize()
659+
void RowAggregation::initialize(bool hasGroupConcat)
660660
{
661661
// Calculate the length of the hashmap key.
662662
fAggMapKeyCount = fGroupByCols.size();
@@ -694,9 +694,25 @@ void RowAggregation::initialize()
694694
makeAggFieldsNull(fRow);
695695

696696
// Keep a copy of the null row to initialize new map entries.
697-
fRowGroupOut->initRow(&fNullRow, true);
698-
fNullRowData.reset(new uint8_t[fNullRow.getSize()]);
699-
fNullRow.setData(rowgroup::Row::Pointer(fNullRowData.get()));
697+
// MCOL-5429 Use stringstore if the datatype of the groupconcat
698+
// field is a long string.
699+
if (hasGroupConcat && fRowGroupOut->hasLongString())
700+
{
701+
fNullRowGroup = *fRowGroupOut;
702+
fNullRowGroup.setUseStringTable(true);
703+
fNullRowRGData.reinit(fNullRowGroup, 1);
704+
fNullRowGroup.setData(&fNullRowRGData);
705+
fNullRowGroup.resetRowGroup(0);
706+
fNullRowGroup.initRow(&fNullRow);
707+
fNullRowGroup.getRow(0, &fNullRow);
708+
}
709+
else
710+
{
711+
fRowGroupOut->initRow(&fNullRow, true);
712+
fNullRowData.reset(new uint8_t[fNullRow.getSize()]);
713+
fNullRow.setData(rowgroup::Row::Pointer(fNullRowData.get()));
714+
}
715+
700716
copyRow(fRow, &fNullRow);
701717

702718
// Lazy approach w/o a mapping b/w fFunctionCols idx and fRGContextColl idx
@@ -2413,7 +2429,7 @@ void RowAggregationUM::endOfInput()
24132429
//------------------------------------------------------------------------------
24142430
// Initilalize the Group Concat data
24152431
//------------------------------------------------------------------------------
2416-
void RowAggregationUM::initialize()
2432+
void RowAggregationUM::initialize(bool hasGroupConcat)
24172433
{
24182434
if (fGroupConcat.size() > 0)
24192435
fFunctionColGc = fFunctionCols;
@@ -2423,7 +2439,7 @@ void RowAggregationUM::initialize()
24232439
fKeyRG = fRowGroupIn.truncate(fGroupByCols.size());
24242440
}
24252441

2426-
RowAggregation::initialize();
2442+
RowAggregation::initialize(fGroupConcat.size() > 0);
24272443
}
24282444

24292445
//------------------------------------------------------------------------------

utils/rowgroup/rowaggregation.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ class RowAggregation : public messageqcpp::Serializeable
529529
}
530530

531531
protected:
532-
virtual void initialize();
532+
virtual void initialize(bool hasGroupConcat = false);
533533
virtual void initMapData(const Row& row);
534534
virtual void attachGroupConcatAg();
535535

@@ -580,6 +580,8 @@ class RowAggregation : public messageqcpp::Serializeable
580580
Row fNullRow;
581581
Row* tmpRow; // used by the hashers & eq functors
582582
boost::scoped_array<uint8_t> fNullRowData;
583+
rowgroup::RGData fNullRowRGData;
584+
rowgroup::RowGroup fNullRowGroup;
583585

584586
std::unique_ptr<RowAggStorage> fRowAggStorage;
585587

@@ -724,7 +726,7 @@ class RowAggregationUM : public RowAggregation
724726

725727
protected:
726728
// virtual methods from base
727-
void initialize() override;
729+
void initialize(bool hasGroupConcat = false) override;
728730

729731
void attachGroupConcatAg() override;
730732
void updateEntry(const Row& row, std::vector<mcsv1sdk::mcsv1Context>* rgContextColl = nullptr) override;

utils/rowgroup/rowgroup.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,6 +1544,11 @@ class RowGroup : public messageqcpp::Serializeable
15441544
inline bool usesStringTable() const;
15451545
inline void setUseStringTable(bool);
15461546

1547+
bool hasLongString() const
1548+
{
1549+
return hasLongStringField;
1550+
}
1551+
15471552
void serializeRGData(messageqcpp::ByteStream&) const;
15481553
inline uint32_t getStringTableThreshold() const;
15491554

0 commit comments

Comments
 (0)