17
17
**************************************************************************/
18
18
19
19
#include < realm/array_string.hpp>
20
+ #include < realm/impl/array_writer.hpp>
20
21
#include < realm/spec.hpp>
21
22
#include < realm/mixed.hpp>
22
23
@@ -52,14 +53,24 @@ void ArrayString::init_from_mem(MemRef mem) noexcept
52
53
else {
53
54
auto arr = new (&m_storage) Array (m_alloc);
54
55
arr->init_from_mem (mem);
55
- m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
56
- ArrayParent* p;
57
- REALM_ASSERT (m_spec != nullptr );
58
- REALM_ASSERT (m_col_ndx != realm::npos);
59
- ref_type r = m_spec->get_enumkeys_ref (m_col_ndx, p);
60
- m_string_enum_values->init_from_ref (r);
61
- m_string_enum_values->set_parent (p, m_col_ndx);
62
- m_type = Type::enum_strings;
56
+ // The context flag is used to indicate interned strings vs old enum strings
57
+ // (in conjunction with has_refs() == false)
58
+ if (arr->get_context_flag_from_header (arr->get_header ())) {
59
+ // init for new interned strings (replacing old enum strings)
60
+ m_type = Type::interned_strings;
61
+ // consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
62
+ }
63
+ else {
64
+ // init for old enum strings
65
+ m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
66
+ ArrayParent* p;
67
+ REALM_ASSERT (m_spec != nullptr );
68
+ REALM_ASSERT (m_col_ndx != realm::npos);
69
+ ref_type r = m_spec->get_enumkeys_ref (m_col_ndx, p);
70
+ m_string_enum_values->init_from_ref (r);
71
+ m_string_enum_values->set_parent (p, m_col_ndx);
72
+ m_type = Type::enum_strings;
73
+ }
63
74
}
64
75
}
65
76
else {
@@ -111,6 +122,7 @@ size_t ArrayString::size() const
111
122
case Type::big_strings:
112
123
return static_cast <ArrayBigBlobs*>(m_arr)->size ();
113
124
case Type::enum_strings:
125
+ case Type::interned_strings:
114
126
return static_cast <Array*>(m_arr)->size ();
115
127
}
116
128
return {};
@@ -128,7 +140,8 @@ void ArrayString::add(StringData value)
128
140
case Type::big_strings:
129
141
static_cast <ArrayBigBlobs*>(m_arr)->add_string (value);
130
142
break ;
131
- case Type::enum_strings: {
143
+ case Type::enum_strings:
144
+ case Type::interned_strings: {
132
145
auto a = static_cast <Array*>(m_arr);
133
146
size_t ndx = a->size ();
134
147
a->add (0 );
@@ -150,6 +163,11 @@ void ArrayString::set(size_t ndx, StringData value)
150
163
case Type::big_strings:
151
164
static_cast <ArrayBigBlobs*>(m_arr)->set_string (ndx, value);
152
165
break ;
166
+ case Type::interned_strings: {
167
+ auto id = m_string_interner->intern (value);
168
+ static_cast <Array*>(m_arr)->set (ndx, id);
169
+ break ;
170
+ }
153
171
case Type::enum_strings: {
154
172
size_t sz = m_string_enum_values->size ();
155
173
size_t res = m_string_enum_values->find_first (value, 0 , sz);
@@ -178,6 +196,12 @@ void ArrayString::insert(size_t ndx, StringData value)
178
196
case Type::enum_strings: {
179
197
static_cast <Array*>(m_arr)->insert (ndx, 0 );
180
198
set (ndx, value);
199
+ break ;
200
+ }
201
+ case Type::interned_strings: {
202
+ static_cast <Array*>(m_arr)->insert (ndx, 0 );
203
+ set (ndx, value);
204
+ break ;
181
205
}
182
206
}
183
207
}
@@ -195,6 +219,10 @@ StringData ArrayString::get(size_t ndx) const
195
219
size_t index = size_t (static_cast <Array*>(m_arr)->get (ndx));
196
220
return m_string_enum_values->get (index );
197
221
}
222
+ case Type::interned_strings: {
223
+ size_t id = size_t (static_cast <Array*>(m_arr)->get (ndx));
224
+ return m_string_interner->get (id);
225
+ }
198
226
}
199
227
return {};
200
228
}
@@ -212,6 +240,10 @@ StringData ArrayString::get_legacy(size_t ndx) const
212
240
size_t index = size_t (static_cast <Array*>(m_arr)->get (ndx));
213
241
return m_string_enum_values->get (index );
214
242
}
243
+ case Type::interned_strings: {
244
+ size_t id = size_t (static_cast <Array*>(m_arr)->get (ndx));
245
+ return m_string_interner->get (id);
246
+ }
215
247
}
216
248
return {};
217
249
}
@@ -231,8 +263,12 @@ bool ArrayString::is_null(size_t ndx) const
231
263
case Type::big_strings:
232
264
return static_cast <ArrayBigBlobs*>(m_arr)->is_null (ndx);
233
265
case Type::enum_strings: {
234
- size_t index = size_t (static_cast <Array*>(m_arr)->get (ndx));
235
- return m_string_enum_values->is_null (index );
266
+ size_t id = size_t (static_cast <Array*>(m_arr)->get (ndx));
267
+ return m_string_enum_values->is_null (id);
268
+ }
269
+ case Type::interned_strings: {
270
+ size_t id = size_t (static_cast <Array*>(m_arr)->get (ndx));
271
+ return id == 0 ;
236
272
}
237
273
}
238
274
return {};
@@ -250,6 +286,7 @@ void ArrayString::erase(size_t ndx)
250
286
case Type::big_strings:
251
287
static_cast <ArrayBigBlobs*>(m_arr)->erase (ndx);
252
288
break ;
289
+ case Type::interned_strings:
253
290
case Type::enum_strings:
254
291
static_cast <Array*>(m_arr)->erase (ndx);
255
292
break ;
@@ -277,6 +314,9 @@ void ArrayString::move(ArrayString& dst, size_t ndx)
277
314
// this operation will never be called for enumerated columns
278
315
REALM_UNREACHABLE ();
279
316
break ;
317
+ case Type::interned_strings:
318
+ m_arr->truncate (ndx);
319
+ break ;
280
320
}
281
321
}
282
322
@@ -293,6 +333,7 @@ void ArrayString::clear()
293
333
static_cast <ArrayBigBlobs*>(m_arr)->clear ();
294
334
break ;
295
335
case Type::enum_strings:
336
+ case Type::interned_strings:
296
337
static_cast <Array*>(m_arr)->clear ();
297
338
break ;
298
339
}
@@ -321,6 +362,15 @@ size_t ArrayString::find_first(StringData value, size_t begin, size_t end) const
321
362
}
322
363
break ;
323
364
}
365
+ case Type::interned_strings: {
366
+ // we need a way to avoid this lookup for each leaf array. The lookup must appear
367
+ // higher up the call stack and passed down.
368
+ auto id = m_string_interner->lookup (value);
369
+ if (id) {
370
+ return static_cast <Array*>(m_arr)->find_first (*id, begin, end);
371
+ }
372
+ break ;
373
+ }
324
374
}
325
375
return not_found;
326
376
}
@@ -371,6 +421,9 @@ size_t ArrayString::lower_bound(StringData value)
371
421
return lower_bound_string (static_cast <ArrayBigBlobs*>(m_arr), value);
372
422
case Type::enum_strings:
373
423
break ;
424
+ case Type::interned_strings:
425
+ REALM_UNREACHABLE ();
426
+ break ;
374
427
}
375
428
return realm::npos;
376
429
}
@@ -383,6 +436,9 @@ ArrayString::Type ArrayString::upgrade_leaf(size_t value_size)
383
436
if (m_type == Type::enum_strings)
384
437
return Type::enum_strings;
385
438
439
+ if (m_type == Type::interned_strings)
440
+ return Type::interned_strings;
441
+
386
442
if (m_type == Type::medium_strings) {
387
443
if (value_size <= medium_string_max_size)
388
444
return Type::medium_strings;
@@ -473,8 +529,25 @@ void ArrayString::verify() const
473
529
static_cast <ArrayBigBlobs*>(m_arr)->verify ();
474
530
break ;
475
531
case Type::enum_strings:
532
+ case Type::interned_strings:
476
533
static_cast <Array*>(m_arr)->verify ();
477
534
break ;
478
535
}
479
536
#endif
480
537
}
538
+
539
+ ref_type ArrayString::write (_impl::ArrayWriterBase& out, StringInterner* interner)
540
+ {
541
+ REALM_ASSERT (interner);
542
+ // we have to write out all, modified or not, to match the total cleanup
543
+ Array interned (Allocator::get_default ());
544
+ auto sz = size ();
545
+ interned.create (NodeHeader::type_Normal, true , sz);
546
+ for (size_t i = 0 ; i < sz; ++i) {
547
+ interned.set (i, interner->intern (get (i)));
548
+ }
549
+ auto retval = interned.write (out, false , false , out.compress );
550
+ interned.destroy ();
551
+ return retval;
552
+ // return m_arr->write(out, true, false, false);
553
+ }
0 commit comments