LUCENE-2531: fix string sort to only compare-by-value when necessary

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@963654 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2010-07-13 10:00:05 +00:00
parent fa7d244cee
commit e0a831d49c
2 changed files with 51 additions and 56 deletions

View File

@ -198,6 +198,10 @@ Optimizations
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
(Mike McCandless)
* LUCENE-2531: Fix issue when sorting by a String field that was
causing too many fallbacks to compare-by-value (instead of by-ord).
(Mike McCandless)
======================= Lucene 3.x (not yet released) =======================
Changes in backwards compatibility policy

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.FieldCache.ShortParser;
import org.apache.lucene.search.FieldCache.DocTermsIndex;
import org.apache.lucene.search.FieldCache.DocTerms;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.packed.PackedInts;
/**
* Expert: a FieldComparator compares hits so as to determine their
@ -709,23 +710,21 @@ public abstract class FieldComparator {
private final BytesRef[] values;
private final int[] readerGen;
private PackedInts.Reader currentDocToOrd;
private int currentReaderGen = -1;
private DocTermsIndex termsIndex;
private final String field;
private int bottomSlot = -1;
private int bottomOrd;
private boolean bottomSameReader;
private BytesRef bottomValue;
private final boolean reversed;
private final int sortPos;
private final BytesRef tempBR = new BytesRef();
public TermOrdValComparator(int numHits, String field, int sortPos, boolean reversed) {
ords = new int[numHits];
values = new BytesRef[numHits];
readerGen = new int[numHits];
this.sortPos = sortPos;
this.reversed = reversed;
this.field = field;
}
@ -754,7 +753,14 @@ public abstract class FieldComparator {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
int order = termsIndex.getOrd(doc);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - (int) currentDocToOrd.get(doc);
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = (int) currentDocToOrd.get(doc);
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
@ -774,39 +780,11 @@ public abstract class FieldComparator {
termsIndex.lookup(order, tempBR);
return bottomValue.compareTo(tempBR);
}
private void convert(int slot) {
readerGen[slot] = currentReaderGen;
int index = 0;
BytesRef value = values[slot];
if (value == null) {
// 0 ord is null for all segments
assert ords[slot] == 0;
return;
}
if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) {
// Since we are the primary sort, the entries in the
// queue are bounded by bottomOrd:
if (reversed) {
index = binarySearch(tempBR, termsIndex, value, bottomOrd, termsIndex.numOrd()-1);
} else {
index = binarySearch(tempBR, termsIndex, value, 0, bottomOrd);
}
} else {
// Full binary search
index = binarySearch(tempBR, termsIndex, value);
}
if (index < 0) {
index = -index - 2;
}
ords[slot] = index;
}
@Override
public void copy(int slot, int doc) {
final int ord = termsIndex.getOrd(doc);
final int ord = (int) currentDocToOrd.get(doc);
if (ord == 0) {
values[slot] = null;
} else {
@ -823,21 +801,34 @@ public abstract class FieldComparator {
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, field);
currentDocToOrd = termsIndex.getDocToOrd();
currentReaderGen++;
if (bottomSlot != -1) {
convert(bottomSlot);
bottomOrd = ords[bottomSlot];
setBottom(bottomSlot);
}
}
@Override
public void setBottom(final int bottom) {
bottomSlot = bottom;
if (readerGen[bottom] != currentReaderGen) {
convert(bottomSlot);
bottomValue = values[bottomSlot];
if (bottomValue == null) {
// 0 ord is null for all segments
assert ords[bottomSlot] == 0;
bottomOrd = 0;
bottomSameReader = true;
} else {
final int index = binarySearch(tempBR, termsIndex, bottomValue);
if (index < 0) {
bottomOrd = -index - 2;
bottomSameReader = false;
} else {
bottomOrd = index;
// exact value match
bottomSameReader = true;
}
}
bottomOrd = ords[bottom];
bottomValue = values[bottom];
}
@Override