mirror of https://github.com/apache/lucene.git
LUCENE-2531: fix string sort to only compare-by-value when necessary
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@963654 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fa7d244cee
commit
e0a831d49c
|
@ -197,6 +197,10 @@ Optimizations
|
||||||
|
|
||||||
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
|
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
|
||||||
(Mike McCandless)
|
(Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-2531: Fix issue when sorting by a String field that was
|
||||||
|
causing too many fallbacks to compare-by-value (instead of by-ord).
|
||||||
|
(Mike McCandless)
|
||||||
|
|
||||||
======================= Lucene 3.x (not yet released) =======================
|
======================= Lucene 3.x (not yet released) =======================
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.FieldCache.ShortParser;
|
||||||
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
||||||
import org.apache.lucene.search.FieldCache.DocTerms;
|
import org.apache.lucene.search.FieldCache.DocTerms;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: a FieldComparator compares hits so as to determine their
|
* Expert: a FieldComparator compares hits so as to determine their
|
||||||
|
@ -709,23 +710,21 @@ public abstract class FieldComparator {
|
||||||
private final BytesRef[] values;
|
private final BytesRef[] values;
|
||||||
private final int[] readerGen;
|
private final int[] readerGen;
|
||||||
|
|
||||||
|
private PackedInts.Reader currentDocToOrd;
|
||||||
private int currentReaderGen = -1;
|
private int currentReaderGen = -1;
|
||||||
private DocTermsIndex termsIndex;
|
private DocTermsIndex termsIndex;
|
||||||
private final String field;
|
private final String field;
|
||||||
|
|
||||||
private int bottomSlot = -1;
|
private int bottomSlot = -1;
|
||||||
private int bottomOrd;
|
private int bottomOrd;
|
||||||
|
private boolean bottomSameReader;
|
||||||
private BytesRef bottomValue;
|
private BytesRef bottomValue;
|
||||||
private final boolean reversed;
|
|
||||||
private final int sortPos;
|
|
||||||
private final BytesRef tempBR = new BytesRef();
|
private final BytesRef tempBR = new BytesRef();
|
||||||
|
|
||||||
public TermOrdValComparator(int numHits, String field, int sortPos, boolean reversed) {
|
public TermOrdValComparator(int numHits, String field, int sortPos, boolean reversed) {
|
||||||
ords = new int[numHits];
|
ords = new int[numHits];
|
||||||
values = new BytesRef[numHits];
|
values = new BytesRef[numHits];
|
||||||
readerGen = new int[numHits];
|
readerGen = new int[numHits];
|
||||||
this.sortPos = sortPos;
|
|
||||||
this.reversed = reversed;
|
|
||||||
this.field = field;
|
this.field = field;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -754,59 +753,38 @@ public abstract class FieldComparator {
|
||||||
@Override
|
@Override
|
||||||
public int compareBottom(int doc) {
|
public int compareBottom(int doc) {
|
||||||
assert bottomSlot != -1;
|
assert bottomSlot != -1;
|
||||||
int order = termsIndex.getOrd(doc);
|
if (bottomSameReader) {
|
||||||
final int cmp = bottomOrd - order;
|
// ord is precisely comparable, even in the equal case
|
||||||
if (cmp != 0) {
|
return bottomOrd - (int) currentDocToOrd.get(doc);
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bottomValue == null) {
|
|
||||||
if (order == 0) {
|
|
||||||
// unset
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// bottom wins
|
|
||||||
return -1;
|
|
||||||
} else if (order == 0) {
|
|
||||||
// doc wins
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
termsIndex.lookup(order, tempBR);
|
|
||||||
return bottomValue.compareTo(tempBR);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void convert(int slot) {
|
|
||||||
readerGen[slot] = currentReaderGen;
|
|
||||||
int index = 0;
|
|
||||||
BytesRef value = values[slot];
|
|
||||||
if (value == null) {
|
|
||||||
// 0 ord is null for all segments
|
|
||||||
assert ords[slot] == 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) {
|
|
||||||
// Since we are the primary sort, the entries in the
|
|
||||||
// queue are bounded by bottomOrd:
|
|
||||||
if (reversed) {
|
|
||||||
index = binarySearch(tempBR, termsIndex, value, bottomOrd, termsIndex.numOrd()-1);
|
|
||||||
} else {
|
|
||||||
index = binarySearch(tempBR, termsIndex, value, 0, bottomOrd);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Full binary search
|
// ord is only approx comparable: if they are not
|
||||||
index = binarySearch(tempBR, termsIndex, value);
|
// equal, we can use that; if they are equal, we
|
||||||
}
|
// must fallback to compare by value
|
||||||
|
final int order = (int) currentDocToOrd.get(doc);
|
||||||
|
final int cmp = bottomOrd - order;
|
||||||
|
if (cmp != 0) {
|
||||||
|
return cmp;
|
||||||
|
}
|
||||||
|
|
||||||
if (index < 0) {
|
if (bottomValue == null) {
|
||||||
index = -index - 2;
|
if (order == 0) {
|
||||||
|
// unset
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// bottom wins
|
||||||
|
return -1;
|
||||||
|
} else if (order == 0) {
|
||||||
|
// doc wins
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
termsIndex.lookup(order, tempBR);
|
||||||
|
return bottomValue.compareTo(tempBR);
|
||||||
}
|
}
|
||||||
ords[slot] = index;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void copy(int slot, int doc) {
|
public void copy(int slot, int doc) {
|
||||||
final int ord = termsIndex.getOrd(doc);
|
final int ord = (int) currentDocToOrd.get(doc);
|
||||||
if (ord == 0) {
|
if (ord == 0) {
|
||||||
values[slot] = null;
|
values[slot] = null;
|
||||||
} else {
|
} else {
|
||||||
|
@ -823,21 +801,34 @@ public abstract class FieldComparator {
|
||||||
@Override
|
@Override
|
||||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||||
termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field);
|
termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field);
|
||||||
|
currentDocToOrd = termsIndex.getDocToOrd();
|
||||||
currentReaderGen++;
|
currentReaderGen++;
|
||||||
if (bottomSlot != -1) {
|
if (bottomSlot != -1) {
|
||||||
convert(bottomSlot);
|
setBottom(bottomSlot);
|
||||||
bottomOrd = ords[bottomSlot];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setBottom(final int bottom) {
|
public void setBottom(final int bottom) {
|
||||||
bottomSlot = bottom;
|
bottomSlot = bottom;
|
||||||
if (readerGen[bottom] != currentReaderGen) {
|
|
||||||
convert(bottomSlot);
|
bottomValue = values[bottomSlot];
|
||||||
|
if (bottomValue == null) {
|
||||||
|
// 0 ord is null for all segments
|
||||||
|
assert ords[bottomSlot] == 0;
|
||||||
|
bottomOrd = 0;
|
||||||
|
bottomSameReader = true;
|
||||||
|
} else {
|
||||||
|
final int index = binarySearch(tempBR, termsIndex, bottomValue);
|
||||||
|
if (index < 0) {
|
||||||
|
bottomOrd = -index - 2;
|
||||||
|
bottomSameReader = false;
|
||||||
|
} else {
|
||||||
|
bottomOrd = index;
|
||||||
|
// exact value match
|
||||||
|
bottomSameReader = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
bottomOrd = ords[bottom];
|
|
||||||
bottomValue = values[bottom];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue