Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ Optimizations

* GITHUB#14674: Optimize AbstractKnnVectorQuery#createBitSet with intoBitset. (Guo Feng)

* GITHUB#14714: Move HitQueue in TopScoreDocCollector to a LongHeap. (Guo Feng)

* GITHUB#14720: Cache high-order bits of hashcode to speed up BytesRefHash. (Pan Guixin)

* GITHUB#14753: Implement IndexedDISI#docIDRunEnd. (Ge Song)
Expand Down
42 changes: 42 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/DocScoreEncoder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.search;

import org.apache.lucene.util.NumericUtils;

/**
* An encoder do encode (doc, score) pair as a long whose sort order is same as {@code (o1, o2) ->
* Float.compare(o1.score, o2.score)).thenComparing(Comparator.comparingInt((ScoreDoc o) ->
* o.doc).reversed())}
*/
class DocScoreEncoder {

static final long LEAST_COMPETITIVE_CODE = encode(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY);

static long encode(int docId, float score) {
return (((long) NumericUtils.floatToSortableInt(score)) << 32) | (Integer.MAX_VALUE - docId);
}

static float toScore(long value) {
return NumericUtils.sortableIntToFloat((int) (value >>> 32));
}

static int docId(long value) {
return Integer.MAX_VALUE - ((int) value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
/** Maintains the maximum score and its corresponding document id concurrently */
final class MaxScoreAccumulator {
// we use 2^10-1 to check the remainder with a bitwise operation
static final int DEFAULT_INTERVAL = 0x3ff;
private static final int DEFAULT_INTERVAL = 0x3ff;

// scores are always positive
final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE);
final LongAccumulator acc = new LongAccumulator(Math::max, Long.MIN_VALUE);

// non-final and visible for tests
long modInterval;
Expand All @@ -34,35 +34,8 @@ final class MaxScoreAccumulator {
this.modInterval = DEFAULT_INTERVAL;
}

/**
* Return the max encoded docId and score found in the two longs, following the encoding in {@link
* #accumulate}.
*/
private static long maxEncode(long v1, long v2) {
float score1 = Float.intBitsToFloat((int) (v1 >> 32));
float score2 = Float.intBitsToFloat((int) (v2 >> 32));
int cmp = Float.compare(score1, score2);
if (cmp == 0) {
// tie-break on the minimum doc base
return (int) v1 < (int) v2 ? v1 : v2;
} else if (cmp > 0) {
return v1;
}
return v2;
}

void accumulate(int docId, float score) {
assert docId >= 0 && score >= 0;
long encode = (((long) Float.floatToIntBits(score)) << 32) | docId;
acc.accumulate(encode);
}

public static float toScore(long value) {
return Float.intBitsToFloat((int) (value >> 32));
}

public static int docId(long value) {
return (int) value;
void accumulate(long code) {
acc.accumulate(code);
}

long getRaw() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,25 @@ public TopDocs topDocs(int start, int howMany) {
howMany = Math.min(size - start, howMany);
ScoreDoc[] results = new ScoreDoc[howMany];

// pq's pop() returns the 'least' element in the queue, therefore need
// to discard the first ones, until we reach the requested range.
// Prune the least competitive hits until we reach the requested range.
// Note that this loop will usually not be executed, since the common usage
// should be that the caller asks for the last howMany results. However it's
// needed here for completeness.
for (int i = pq.size() - start - howMany; i > 0; i--) {
pq.pop();
}
pruneLeastCompetitiveHitsTo(start + howMany);

// Get the requested results from pq.
populateResults(results, howMany);

return newTopDocs(results, start);
}

/**
* Prune the least competitive hits until the number of candidates is less than or equal to {@code
* keep}. This is typically called before {@link #populateResults} to ensure we are at right pos.
*/
protected void pruneLeastCompetitiveHitsTo(int keep) {
for (int i = pq.size() - keep; i > 0; i--) {
pq.pop();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOExcepti
long maxMinScore = minScoreAcc.getRaw();
float score;
if (maxMinScore != Long.MIN_VALUE
&& (score = MaxScoreAccumulator.toScore(maxMinScore)) > minCompetitiveScore) {
&& (score = DocScoreEncoder.toScore(maxMinScore)) > minCompetitiveScore) {
scorer.setMinCompetitiveScore(score);
minCompetitiveScore = score;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
Expand All @@ -384,7 +384,7 @@ protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
minCompetitiveScore = minScore;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
if (minScoreAcc != null) {
minScoreAcc.accumulate(docBase, minScore);
minScoreAcc.accumulate(DocScoreEncoder.encode(docBase, minScore));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.LongHeap;

/**
* A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link
Expand All @@ -32,31 +33,20 @@
public class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {

private final ScoreDoc after;
private final LongHeap heap;
final int totalHitsThreshold;
final MaxScoreAccumulator minScoreAcc;

// prevents instantiation
TopScoreDocCollector(
int numHits, ScoreDoc after, int totalHitsThreshold, MaxScoreAccumulator minScoreAcc) {
super(new HitQueue(numHits, true));
super(null);
this.heap = new LongHeap(numHits, DocScoreEncoder.LEAST_COMPETITIVE_CODE);
this.after = after;
this.totalHitsThreshold = totalHitsThreshold;
this.minScoreAcc = minScoreAcc;
}

@Override
protected int topDocsSize() {
// Note: this relies on sentinel values having Integer.MAX_VALUE as a doc ID.
int[] validTopHitCount = new int[1];
pq.forEach(
scoreDoc -> {
if (scoreDoc.doc != Integer.MAX_VALUE) {
validTopHitCount[0]++;
}
});
return validTopHitCount[0];
}

@Override
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
return results == null
Expand Down Expand Up @@ -86,9 +76,8 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
return new LeafCollector() {

private Scorable scorer;
// HitQueue implements getSentinelObject to return a ScoreDoc, so we know
// that at this point top() is already initialized.
private ScoreDoc pqTop = pq.top();
private long topCode = heap.top();
private float topScore = DocScoreEncoder.toScore(topCode);
private float minCompetitiveScore;

@Override
Expand Down Expand Up @@ -121,7 +110,7 @@ public void collect(int doc) throws IOException {
return;
}

if (score <= pqTop.score) {
if (score <= topScore) {
// Note: for queries that match lots of hits, this is the common case: most hits are not
// competitive.
if (hitCountSoFar == totalHitsThreshold + 1) {
Expand All @@ -139,9 +128,9 @@ public void collect(int doc) throws IOException {
}

private void collectCompetitiveHit(int doc, float score) throws IOException {
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
final long code = DocScoreEncoder.encode(doc + docBase, score);
topCode = heap.updateTop(code);
topScore = DocScoreEncoder.toScore(topCode);
updateMinCompetitiveScore(scorer);
}

Expand All @@ -152,8 +141,8 @@ private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException
// since we tie-break on doc id and collect in doc id order we can require
// the next float if the global minimum score is set on a document id that is
// smaller than the ids in the current leaf
float score = MaxScoreAccumulator.toScore(maxMinScore);
score = docBase >= MaxScoreAccumulator.docId(maxMinScore) ? Math.nextUp(score) : score;
float score = DocScoreEncoder.toScore(maxMinScore);
score = docBase >= DocScoreEncoder.docId(maxMinScore) ? Math.nextUp(score) : score;
if (score > minCompetitiveScore) {
scorer.setMinCompetitiveScore(score);
minCompetitiveScore = score;
Expand All @@ -168,19 +157,45 @@ private void updateMinCompetitiveScore(Scorable scorer) throws IOException {
// pqTop is never null since TopScoreDocCollector fills the priority queue with sentinel
// values if the top element is a sentinel value, its score will be -Infty and the below
// logic is still valid
float localMinScore = Math.nextUp(pqTop.score);
float localMinScore = Math.nextUp(topScore);
if (localMinScore > minCompetitiveScore) {
scorer.setMinCompetitiveScore(localMinScore);
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
minCompetitiveScore = localMinScore;
if (minScoreAcc != null) {
// we don't use the next float but we register the document id so that other leaves or
// leaf partitions can require it if they are after the current maximum
minScoreAcc.accumulate(pqTop.doc, pqTop.score);
minScoreAcc.accumulate(topCode);
}
}
}
}
};
}

@Override
protected int topDocsSize() {
int cnt = 0;
for (int i = 1; i <= heap.size(); i++) {
if (heap.get(i) != DocScoreEncoder.LEAST_COMPETITIVE_CODE) {
cnt++;
}
}
return cnt;
}

@Override
protected void populateResults(ScoreDoc[] results, int howMany) {
for (int i = howMany - 1; i >= 0; i--) {
long encode = heap.pop();
results[i] = new ScoreDoc(DocScoreEncoder.docId(encode), DocScoreEncoder.toScore(encode));
}
}

@Override
protected void pruneLeastCompetitiveHitsTo(int keep) {
for (int i = heap.size() - keep; i > 0; i--) {
heap.pop();
}
}
}
14 changes: 14 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/LongHeap.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.lucene.util;

import java.util.Arrays;

/**
* A min heap that stores longs; a primitive priority queue that like all priority queues maintains
* a partial ordering of its elements such that the least element can always be found in constant
Expand All @@ -33,6 +35,18 @@ public final class LongHeap {
private long[] heap;
private int size = 0;

/**
* Constructs a heap with specified size and initializes all elements with the given value.
*
* @param size the number of elements to initialize in the heap.
* @param initialValue the value to fill the heap with.
*/
public LongHeap(int size, long initialValue) {
this(size);
Arrays.fill(heap, 1, size + 1, initialValue);
this.size = size;
}

/**
* Create an empty priority queue of the configured initial size.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.search;

import org.apache.lucene.tests.util.LuceneTestCase;

public class TestDocScoreEncoder extends LuceneTestCase {

public void testRandom() {
for (int i = 0; i < 1000; i++) {
doAssert(
Float.intBitsToFloat(random().nextInt()),
random().nextInt(Integer.MAX_VALUE),
Float.intBitsToFloat(random().nextInt()),
random().nextInt(Integer.MAX_VALUE));
}
}

public void testSameDoc() {
for (int i = 0; i < 1000; i++) {
doAssert(
Float.intBitsToFloat(random().nextInt()), 1, Float.intBitsToFloat(random().nextInt()), 1);
}
}

public void testSameScore() {
for (int i = 0; i < 1000; i++) {
doAssert(1f, random().nextInt(Integer.MAX_VALUE), 1f, random().nextInt(Integer.MAX_VALUE));
}
}

private void doAssert(float score1, int doc1, float score2, int doc2) {
if (Float.isNaN(score1) || Float.isNaN(score2)) {
return;
}

long code1 = DocScoreEncoder.encode(doc1, score1);
long code2 = DocScoreEncoder.encode(doc2, score2);

assertEquals(doc1, DocScoreEncoder.docId(code1));
assertEquals(doc2, DocScoreEncoder.docId(code2));
assertEquals(score1, DocScoreEncoder.toScore(code1), 0f);
assertEquals(score2, DocScoreEncoder.toScore(code2), 0f);

if (score1 < score2) {
assertTrue(code1 < code2);
} else if (score1 > score2) {
assertTrue(code1 > code2);
} else if (doc1 == doc2) {
assertEquals(code1, code2);
} else {
assertEquals(code1 > code2, doc1 < doc2);
}
}
}
Loading