Skip to content
This repository was archived by the owner on Jan 28, 2021. It is now read-only.

Commit 14d8bbe

Browse files
committed
sql/index/pilosa: parallelize index creation
Signed-off-by: Miguel Molina <[email protected]>
1 parent 7b6a0f6 commit 14d8bbe

File tree

7 files changed

+183
-66
lines changed

7 files changed

+183
-66
lines changed

sql/index/pilosa/driver.go

+90-43
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ import (
88
"io/ioutil"
99
"os"
1010
"path/filepath"
11+
"runtime"
1112
"strings"
13+
"sync"
14+
"sync/atomic"
1215
"time"
1316

1417
opentracing "github.com/opentracing/opentracing-go"
@@ -217,13 +220,13 @@ func (d *Driver) savePartition(
217220
kviter sql.IndexKeyValueIter,
218221
idx *pilosaIndex,
219222
pilosaIndex *concurrentPilosaIndex,
220-
offset uint64,
221223
b *batch,
222224
) (uint64, error) {
223225
var (
224226
colID uint64
225227
err error
226228
)
229+
227230
for i, e := range idx.Expressions() {
228231
name := fieldName(idx.ID(), e, p)
229232
pilosaIndex.DeleteField(name)
@@ -254,7 +257,7 @@ func (d *Driver) savePartition(
254257
kviter.Close()
255258
}()
256259

257-
for colID = offset; err == nil; colID++ {
260+
for colID = 0; err == nil; colID++ {
258261
// commit each batch of objects (pilosa and boltdb)
259262
if colID%sql.IndexBatchSize == 0 && colID != 0 {
260263
if err = d.saveBatch(ctx, idx.mapping, colID, b); err != nil {
@@ -265,28 +268,30 @@ func (d *Driver) savePartition(
265268
select {
266269
case <-ctx.Context.Done():
267270
return 0, ctx.Context.Err()
268-
269271
default:
270-
var (
271-
values []interface{}
272-
location []byte
273-
)
274-
if values, location, err = kviter.Next(); err != nil {
275-
break
276-
}
272+
}
277273

278-
for i, field := range b.fields {
279-
if values[i] == nil {
280-
continue
281-
}
274+
values, location, err := kviter.Next()
275+
if err != nil {
276+
break
277+
}
282278

283-
rowID, err := idx.mapping.getRowID(field.Name(), values[i])
284-
if err != nil {
285-
return 0, err
286-
}
287-
b.bitBatches[i].Add(rowID, colID)
279+
for i, field := range b.fields {
280+
if values[i] == nil {
281+
continue
288282
}
289-
err = idx.mapping.putLocation(pilosaIndex.Name(), colID, location)
283+
284+
rowID, err := idx.mapping.getRowID(field.Name(), values[i])
285+
if err != nil {
286+
return 0, err
287+
}
288+
289+
b.bitBatches[i].Add(rowID, colID)
290+
}
291+
292+
err = idx.mapping.putLocation(pilosaIndex.Name(), p, colID, location)
293+
if err != nil {
294+
return 0, err
290295
}
291296
}
292297

@@ -307,7 +312,7 @@ func (d *Driver) savePartition(
307312
}
308313
}
309314

310-
return colID - offset, err
315+
return colID, err
311316
}
312317

313318
// Save the given index (mapping and bitmap)
@@ -331,44 +336,86 @@ func (d *Driver) Save(
331336
idx.wg.Add(1)
332337
defer idx.wg.Done()
333338

334-
var b = batch{
335-
fields: make([]*pilosa.Field, len(idx.Expressions())),
336-
bitBatches: make([]*bitBatch, len(idx.Expressions())),
337-
}
338-
339339
ctx.Context, idx.cancel = context.WithCancel(ctx.Context)
340340
processingFile := d.processingFilePath(i.Database(), i.Table(), i.ID())
341-
if err := index.WriteProcessingFile(
341+
err = index.WriteProcessingFile(
342342
processingFile,
343343
[]byte{processingFileOnSave},
344-
); err != nil {
344+
)
345+
if err != nil {
345346
return err
346347
}
347348

348349
defer iter.Close()
349350
pilosaIndex := idx.index
350-
var rows uint64
351+
352+
var (
353+
rows, timePilosa, timeMapping uint64
354+
355+
wg sync.WaitGroup
356+
tokens = make(chan struct{}, runtime.NumCPU())
357+
358+
errors []error
359+
errmut sync.Mutex
360+
)
361+
351362
for {
363+
select {
364+
case <-ctx.Done():
365+
return
366+
default:
367+
}
368+
352369
p, kviter, err := iter.Next()
353370
if err != nil {
354371
if err == io.EOF {
355372
break
356373
}
357-
return err
358-
}
359374

360-
numRows, err := d.savePartition(ctx, p, kviter, idx, pilosaIndex, rows, &b)
361-
if err != nil {
375+
idx.cancel()
376+
wg.Wait()
362377
return err
363378
}
364379

365-
rows += numRows
380+
wg.Add(1)
381+
382+
go func() {
383+
defer func() {
384+
wg.Done()
385+
<-tokens
386+
}()
387+
388+
tokens <- struct{}{}
389+
390+
var b = &batch{
391+
fields: make([]*pilosa.Field, len(idx.Expressions())),
392+
bitBatches: make([]*bitBatch, len(idx.Expressions())),
393+
}
394+
395+
numRows, err := d.savePartition(ctx, p, kviter, idx, pilosaIndex, b)
396+
if err != nil {
397+
errmut.Lock()
398+
errors = append(errors, err)
399+
idx.cancel()
400+
errmut.Unlock()
401+
return
402+
}
403+
404+
atomic.AddUint64(&timeMapping, uint64(b.timeMapping))
405+
atomic.AddUint64(&timePilosa, uint64(b.timePilosa))
406+
atomic.AddUint64(&rows, numRows)
407+
}()
408+
}
409+
410+
wg.Wait()
411+
if len(errors) > 0 {
412+
return errors[0]
366413
}
367414

368415
logrus.WithFields(logrus.Fields{
369416
"duration": time.Since(start),
370-
"pilosa": b.timePilosa,
371-
"mapping": b.timeMapping,
417+
"pilosa": timePilosa,
418+
"mapping": timeMapping,
372419
"rows": rows,
373420
"id": i.ID(),
374421
}).Debugf("finished pilosa indexing")
@@ -421,18 +468,18 @@ func (d *Driver) Delete(i sql.Index, partitions sql.PartitionIter) error {
421468
return partitions.Close()
422469
}
423470

424-
func (d *Driver) saveBatch(ctx *sql.Context, m *mapping, colID uint64, b *batch) error {
425-
err := d.savePilosa(ctx, colID, b)
471+
func (d *Driver) saveBatch(ctx *sql.Context, m *mapping, cols uint64, b *batch) error {
472+
err := d.savePilosa(ctx, cols, b)
426473
if err != nil {
427474
return err
428475
}
429476

430-
return d.saveMapping(ctx, m, colID, true, b)
477+
return d.saveMapping(ctx, m, cols, true, b)
431478
}
432479

433-
func (d *Driver) savePilosa(ctx *sql.Context, colID uint64, b *batch) error {
480+
func (d *Driver) savePilosa(ctx *sql.Context, cols uint64, b *batch) error {
434481
span, _ := ctx.Span("pilosa.Save.bitBatch",
435-
opentracing.Tag{Key: "cols", Value: colID},
482+
opentracing.Tag{Key: "cols", Value: cols},
436483
opentracing.Tag{Key: "fields", Value: len(b.fields)},
437484
)
438485
defer span.Finish()
@@ -457,12 +504,12 @@ func (d *Driver) savePilosa(ctx *sql.Context, colID uint64, b *batch) error {
457504
func (d *Driver) saveMapping(
458505
ctx *sql.Context,
459506
m *mapping,
460-
colID uint64,
507+
cols uint64,
461508
cont bool,
462509
b *batch,
463510
) error {
464511
span, _ := ctx.Span("pilosa.Save.mapping",
465-
opentracing.Tag{Key: "cols", Value: colID},
512+
opentracing.Tag{Key: "cols", Value: cols},
466513
opentracing.Tag{Key: "continues", Value: cont},
467514
)
468515
defer span.Finish()

sql/index/pilosa/driver_test.go

+15-3
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ func TestSaveAndLoad(t *testing.T) {
189189
require.Equal(1, len(indexes))
190190

191191
var locations = make([][]string, len(it.records))
192+
192193
for partition, records := range it.records {
193194
for _, r := range records {
194195
lookup, err := sqlIdx.Get(r.values...)
@@ -555,6 +556,7 @@ func TestIntersection(t *testing.T) {
555556

556557
lookupLang, err := sqlIdxLang.Get(itLang.records[0][0].values...)
557558
require.NoError(err)
559+
558560
lookupPath, err := sqlIdxPath.Get(itPath.records[0][itPath.total-1].values...)
559561
require.NoError(err)
560562

@@ -1291,10 +1293,10 @@ func (it *testIndexKeyValueIter) Next() ([]interface{}, []byte, error) {
12911293
values[i] = e + "-" + loc + "-" + string(it.partition.Key())
12921294
}
12931295

1294-
*it.records = append(*it.records, testRecord{
1296+
(*it.records)[it.offset] = testRecord{
12951297
values,
12961298
[]byte(loc),
1297-
})
1299+
}
12981300
it.offset++
12991301

13001302
return values, []byte(loc), nil
@@ -1430,13 +1432,23 @@ type partitionKeyValueIter struct {
14301432
records [][]testRecord
14311433
}
14321434

1435+
func (i *partitionKeyValueIter) init() {
1436+
i.records = make([][]testRecord, i.partitions)
1437+
for j := 0; j < i.partitions; j++ {
1438+
i.records[j] = make([]testRecord, i.total)
1439+
}
1440+
}
1441+
14331442
func (i *partitionKeyValueIter) Next() (sql.Partition, sql.IndexKeyValueIter, error) {
14341443
if i.pos >= i.partitions {
14351444
return nil, nil, io.EOF
14361445
}
14371446

1447+
if i.pos == 0 {
1448+
i.init()
1449+
}
1450+
14381451
i.pos++
1439-
i.records = append(i.records, []testRecord{})
14401452
return testPartition(i.pos - 1), &testIndexKeyValueIter{
14411453
offset: i.offset,
14421454
total: i.total,

sql/index/pilosa/iterator.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55

66
"github.com/sirupsen/logrus"
77
bolt "go.etcd.io/bbolt"
8+
"gopkg.in/src-d/go-mysql-server.v0/sql"
89
)
910

1011
type locationValueIter struct {
@@ -31,6 +32,7 @@ type indexValueIter struct {
3132
total uint64
3233
bits []uint64
3334
mapping *mapping
35+
partition sql.Partition
3436
indexName string
3537

3638
// share transaction and bucket on all getLocation calls
@@ -45,7 +47,7 @@ func (it *indexValueIter) Next() ([]byte, error) {
4547
return nil, err
4648
}
4749

48-
bucket, err := it.mapping.getBucket(it.indexName, false)
50+
bucket, err := it.mapping.getBucket(it.indexName, it.partition, false)
4951
if err != nil {
5052
_ = it.Close()
5153
return nil, err

sql/index/pilosa/lookup.go

+19-4
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,11 @@ func (l *indexLookup) Values(p sql.Partition) (sql.IndexValueIter, error) {
154154
}
155155

156156
if row == nil {
157-
return &indexValueIter{mapping: l.mapping, indexName: l.index.Name()}, nil
157+
return &indexValueIter{
158+
mapping: l.mapping,
159+
indexName: l.index.Name(),
160+
partition: p,
161+
}, nil
158162
}
159163

160164
bits := row.Columns()
@@ -163,6 +167,7 @@ func (l *indexLookup) Values(p sql.Partition) (sql.IndexValueIter, error) {
163167
bits: bits,
164168
mapping: l.mapping,
165169
indexName: l.index.Name(),
170+
partition: p,
166171
}, nil
167172
}
168173

@@ -315,15 +320,20 @@ func (l *filteredLookup) Values(p sql.Partition) (sql.IndexValueIter, error) {
315320
}
316321

317322
if row == nil {
318-
return &indexValueIter{mapping: l.mapping, indexName: l.index.Name()}, nil
323+
return &indexValueIter{
324+
mapping: l.mapping,
325+
indexName: l.index.Name(),
326+
partition: p,
327+
}, nil
319328
}
320329

321330
bits := row.Columns()
322331
if err := l.mapping.open(); err != nil {
323332
return nil, err
324333
}
334+
325335
defer l.mapping.close()
326-
locations, err := l.mapping.sortedLocations(l.index.Name(), bits, l.reverse)
336+
locations, err := l.mapping.sortedLocations(l.index.Name(), p, bits, l.reverse)
327337
if err != nil {
328338
return nil, err
329339
}
@@ -500,7 +510,11 @@ func (l *negateLookup) Values(p sql.Partition) (sql.IndexValueIter, error) {
500510
}
501511

502512
if row == nil {
503-
return &indexValueIter{mapping: l.mapping, indexName: l.index.Name()}, nil
513+
return &indexValueIter{
514+
mapping: l.mapping,
515+
indexName: l.index.Name(),
516+
partition: p,
517+
}, nil
504518
}
505519

506520
bits := row.Columns()
@@ -509,6 +523,7 @@ func (l *negateLookup) Values(p sql.Partition) (sql.IndexValueIter, error) {
509523
bits: bits,
510524
mapping: l.mapping,
511525
indexName: l.index.Name(),
526+
partition: p,
512527
}, nil
513528
}
514529

0 commit comments

Comments
 (0)