Skip to content

Commit 945d755

Browse files
committed
Fixed ColumnLowCardinality::Clear() and handling of null-item at other places
1 parent 2cdb0d8 commit 945d755

File tree

3 files changed

+34
-9
lines changed

3 files changed

+34
-9
lines changed

clickhouse/columns/column.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class Column : public std::enable_shared_from_this<Column> {
5757
virtual void Swap(Column&) = 0;
5858

5959
/// Get a view on raw item data if it is supported by column, will throw an exception if index is out of range.
60-
/// Please note that view is invalidated once column is items are added or deleted, column is loaded from strean or destroyed.
60+
/// Please note that view is invalidated once column items are added or deleted, column is loaded from strean or destroyed.
6161
virtual ItemView GetItem(size_t) const {
6262
throw std::runtime_error("GetItem() is not supported for column of " + type_->GetName());
6363
}

clickhouse/columns/lowcardinality.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include <string_view>
1111
#include <type_traits>
1212

13+
#include <cassert>
14+
1315
namespace {
1416
using namespace clickhouse;
1517

@@ -105,13 +107,13 @@ inline void AppendToDictionary(Column& dictionary, const ItemView & item) {
105107
}
106108
}
107109

108-
// Add special NULL-item, which is expected at pos(0) in dictionary,
110+
// A special NULL-item, which is expected at pos(0) in dictionary,
109111
// note that we distinguish empty string from NULL-value.
110-
inline void AppendNullItemToDictionary(ColumnRef dictionary) {
112+
inline auto GetNullItemForDictionary(const ColumnRef dictionary) {
111113
if (auto n = dictionary->As<ColumnNullable>()) {
112-
AppendToDictionary(*dictionary, ItemView{});
114+
return ItemView{};
113115
} else {
114-
AppendToDictionary(*dictionary, ItemView{dictionary->Type()->GetCode(), std::string_view{}});
116+
return ItemView{dictionary->Type()->GetCode(), std::string_view{}};
115117
}
116118
}
117119

@@ -125,18 +127,19 @@ ColumnLowCardinality::ColumnLowCardinality(ColumnRef dictionary_column)
125127
{
126128
if (dictionary_column_->Size() != 0) {
127129
// When dictionary column was constructed with values, re-add values by copying to update index and unique_items_map.
130+
// TODO: eliminate data copying by coming with better solution than doing AppendUnsafe() N times.
128131

129132
// Steal values into temporary column.
130133
auto values = dictionary_column_->Slice(0, 0);
131134
values->Swap(*dictionary_column_);
132135

133-
AppendNullItemToDictionary(dictionary_column_);
136+
AppendNullItemToEmptyColumn();
134137

135138
// Re-add values, updating index and unique_items_map.
136139
for (size_t i = 0; i < values->Size(); ++i)
137140
AppendUnsafe(values->GetItem(i));
138141
} else {
139-
AppendNullItemToDictionary(dictionary_column_);
142+
AppendNullItemToEmptyColumn();
140143
}
141144
}
142145

@@ -288,6 +291,9 @@ void ColumnLowCardinality::Save(CodedOutputStream* output) {
288291
void ColumnLowCardinality::Clear() {
289292
index_column_->Clear();
290293
dictionary_column_->Clear();
294+
unique_items_map_.clear();
295+
296+
AppendNullItemToEmptyColumn();
291297
}
292298

293299
size_t ColumnLowCardinality::Size() const {
@@ -353,6 +359,19 @@ void ColumnLowCardinality::AppendUnsafe(const ItemView & value) {
353359
}
354360
}
355361

362+
void ColumnLowCardinality::AppendNullItemToEmptyColumn()
363+
{
364+
// INVARIANT: Empty LC column has an (invisible) null-item at pos 0, which MUST be present in
365+
// unique_items_map_ in order to reuse dictionary posistion on subsequent Append()-s.
366+
367+
// Should be only performed on empty LC column.
368+
assert(dictionary_column_->Size() == 0 && unique_items_map_.empty());
369+
370+
const auto null_item = GetNullItemForDictionary(dictionary_column_);
371+
AppendToDictionary(*dictionary_column_, null_item);
372+
unique_items_map_.emplace(computeHashKey(null_item), dictionary_column_->Size());
373+
}
374+
356375
size_t ColumnLowCardinality::GetDictionarySize() const {
357376
return dictionary_column_->Size();
358377
}

clickhouse/columns/lowcardinality.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ class ColumnLowCardinality : public Column {
8282
ColumnRef GetDictionary();
8383
void AppendUnsafe(const ItemView &);
8484

85+
private:
86+
void AppendNullItemToEmptyColumn();
87+
8588
public:
8689
static details::LowCardinalityHashKey computeHashKey(const ItemView &);
8790
};
@@ -90,7 +93,9 @@ class ColumnLowCardinality : public Column {
9093
*/
9194
template <typename DictionaryColumnType>
9295
class ColumnLowCardinalityT : public ColumnLowCardinality {
96+
9397
DictionaryColumnType& typed_dictionary_;
98+
const Type::Code type_;
9499

95100
public:
96101
using WrappedColumnType = DictionaryColumnType;
@@ -100,7 +105,8 @@ class ColumnLowCardinalityT : public ColumnLowCardinality {
100105
template <typename ...Args>
101106
explicit ColumnLowCardinalityT(Args &&... args)
102107
: ColumnLowCardinality(std::make_shared<DictionaryColumnType>(std::forward<Args>(args)...)),
103-
typed_dictionary_(dynamic_cast<DictionaryColumnType &>(*GetDictionary()))
108+
typed_dictionary_(dynamic_cast<DictionaryColumnType &>(*GetDictionary())),
109+
type_(typed_dictionary_.Type()->GetCode())
104110
{}
105111

106112
/// Extended interface to simplify reading/adding individual items.
@@ -119,7 +125,7 @@ class ColumnLowCardinalityT : public ColumnLowCardinality {
119125
using ColumnLowCardinality::Append;
120126

121127
inline void Append(const ValueType & value) {
122-
AppendUnsafe(ItemView{typed_dictionary_.Type()->GetCode(), value});
128+
AppendUnsafe(ItemView{type_, value});
123129
}
124130

125131
template <typename T>

0 commit comments

Comments
 (0)