LUCENE-3824: don't do pointless by-value cmp in TermOrdVal/DocValuesComparator.setBottom

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-02-28 22:20:18 +00:00
parent 28fb9a4cbc
commit 9ab8f9b83c
12 changed files with 257 additions and 174 deletions

View File

@ -63,6 +63,7 @@ final class VarSortedBytesImpl {
this.comp = comp;
size = 0;
}
@Override
public void merge(MergeState mergeState, DocValues[] docValues)
throws IOException {

View File

@ -337,6 +337,9 @@ final class DocFieldProcessor extends DocConsumer {
if (perDocConsumer == null) {
PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState("");
perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState);
if (perDocConsumer == null) {
throw new IllegalStateException("codec=" + docState.docWriter.codec + " does not support docValues: from docValuesFormat().docsConsumer(...) returned null; field=" + fieldInfo.name);
}
}
DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo);
fieldInfo.setDocValuesType(valueType, false);

View File

@ -148,6 +148,7 @@ public abstract class DocValues implements Closeable {
protected Source(Type type) {
this.type = type;
}
/**
* Returns a <tt>long</tt> for the given document id or throws an
* {@link UnsupportedOperationException} if this source doesn't support
@ -239,9 +240,10 @@ public abstract class DocValues implements Closeable {
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int ord = ord(docID);
if (ord < 0) {
// Negative ord means doc was missing?
bytesRef.length = 0;
} else {
getByOrd(ord , bytesRef);
getByOrd(ord, bytesRef);
}
return bytesRef;
}
@ -253,7 +255,7 @@ public abstract class DocValues implements Closeable {
public abstract int ord(int docID);
/** Returns value for specified ord. */
public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
public abstract BytesRef getByOrd(int ord, BytesRef result);
/** Return true if it's safe to call {@link
* #getDocToOrd}. */
@ -274,7 +276,7 @@ public abstract class DocValues implements Closeable {
}
/**
* Performs a lookup by value.
* Lookup ord by value.
*
* @param value
* the value to look up
@ -283,11 +285,11 @@ public abstract class DocValues implements Closeable {
* values to the given value. Must not be <code>null</code>
* @return the given values ordinal if found or otherwise
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
* element that is greater than the given value. This guarantees
* that the return value will always be &gt;= 0 if the given value
* is found.
* element that is greater than the given value (the insertion
* point). This guarantees that the return value will always be
* &gt;= 0 if the given value is found.
*/
public int getByValue(BytesRef value, BytesRef spare) {
public int getOrdByValue(BytesRef value, BytesRef spare) {
return binarySearch(value, spare, 0, getValueCount() - 1);
}
@ -405,7 +407,7 @@ public abstract class DocValues implements Closeable {
}
@Override
public int getByValue(BytesRef value, BytesRef spare) {
public int getOrdByValue(BytesRef value, BytesRef spare) {
if (value.length == 0) {
return 0;
} else {
@ -414,7 +416,7 @@ public abstract class DocValues implements Closeable {
}
@Override
public int getValueCount() {
public int getValueCount() {
return 1;
}
};

View File

@ -81,7 +81,7 @@ public final class SortedBytesMergeUtils {
}
}
public static List<SortedSourceSlice> buildSlices(int[] docBases ,int[][] docMaps,
public static List<SortedSourceSlice> buildSlices(int[] docBases, int[][] docMaps,
DocValues[] docValues, MergeContext ctx) throws IOException {
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
for (int i = 0; i < docValues.length; i++) {
@ -111,7 +111,7 @@ public final class SortedBytesMergeUtils {
* mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
* contains the new global ordinals for the relative index.
*/
private static void createOrdMapping(int[] docBases ,int[][] docMaps,
private static void createOrdMapping(int[] docBases, int[][] docMaps,
SortedSourceSlice currentSlice) {
final int readerIdx = currentSlice.readerIdx;
final int[] currentDocMap = docMaps[readerIdx];

View File

@ -1055,32 +1055,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = (readerOrds[doc]&0xFF);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - (readerOrds[doc]&0xFF);
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = readerOrds[doc]&0xFF;
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
if (bottomValue == null) {
if (order == 0) {
// unset
return 0;
}
// bottom wins
return -1;
} else if (order == 0) {
// doc wins
return 1;
}
termsIndex.lookup(order, tempBR);
return bottomValue.compareTo(tempBR);
return -1;
}
}
@ -1116,32 +1101,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = (readerOrds[doc]&0xFFFF);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - (readerOrds[doc]&0xFFFF);
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = readerOrds[doc]&0xFFFF;
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
if (bottomValue == null) {
if (order == 0) {
// unset
return 0;
}
// bottom wins
return -1;
} else if (order == 0) {
// doc wins
return 1;
}
termsIndex.lookup(order, tempBR);
return bottomValue.compareTo(tempBR);
return -1;
}
}
@ -1177,32 +1147,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = readerOrds[doc];
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - readerOrds[doc];
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = readerOrds[doc];
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
if (bottomValue == null) {
if (order == 0) {
// unset
return 0;
}
// bottom wins
return -1;
} else if (order == 0) {
// doc wins
return 1;
}
termsIndex.lookup(order, tempBR);
return bottomValue.compareTo(tempBR);
return -1;
}
}
@ -1239,32 +1194,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = (int) readerOrds.get(doc);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - (int) readerOrds.get(doc);
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = (int) readerOrds.get(doc);
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
if (bottomValue == null) {
if (order == 0) {
// unset
return 0;
}
// bottom wins
return -1;
} else if (order == 0) {
// doc wins
return 1;
}
termsIndex.lookup(order, tempBR);
return bottomValue.compareTo(tempBR);
return -1;
}
}
@ -1499,21 +1439,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = readerOrds[doc]&0xFF;
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - (readerOrds[doc]&0xFF);
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = readerOrds[doc]&0xFF;
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
termsIndex.getByOrd(order, tempBR);
return comp.compare(bottomValue, tempBR);
return -1;
}
}
@ -1544,21 +1480,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = readerOrds[doc]&0xFFFF;
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - (readerOrds[doc]&0xFFFF);
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = readerOrds[doc]&0xFFFF;
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
termsIndex.getByOrd(order, tempBR);
return comp.compare(bottomValue, tempBR);
return -1;
}
}
@ -1589,20 +1521,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = readerOrds[doc];
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - readerOrds[doc];
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = readerOrds[doc];
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
termsIndex.getByOrd(order, tempBR);
return comp.compare(bottomValue, tempBR);
return -1;
}
}
@ -1632,20 +1561,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = (int) readerOrds.get(doc);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - (int) readerOrds.get(doc);
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = (int) readerOrds.get(doc);
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
termsIndex.getByOrd(order, tempBR);
return comp.compare(bottomValue, tempBR);
return -1;
}
}
@ -1672,21 +1598,17 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = termsIndex.ord(doc);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - termsIndex.ord(doc);
return bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
// setBottom):
return 1;
} else {
// ord is only approx comparable: if they are not
// equal, we can use that; if they are equal, we
// must fallback to compare by value
final int order = termsIndex.ord(doc);
final int cmp = bottomOrd - order;
if (cmp != 0) {
return cmp;
}
termsIndex.getByOrd(order, tempBR);
return comp.compare(bottomValue, tempBR);
return -1;
}
}
@ -1775,7 +1697,7 @@ public abstract class FieldComparator<T> {
bottomSameReader = true;
readerGen[bottomSlot] = currentReaderGen;
} else {
final int index = termsIndex.getByValue(bottomValue, tempBR);
final int index = termsIndex.getOrdByValue(bottomValue, tempBR);
if (index < 0) {
bottomOrd = -index - 2;
bottomSameReader = false;

View File

@ -70,7 +70,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
if (queueFull) {
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// this document is larger than anything else in the queue, and
// therefore not competitive.
return;
}

View File

@ -114,7 +114,7 @@ public class TestDocValues extends LuceneTestCase {
assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx),
bytesRef).utf8ToString());
int ord = ss
.getByValue(new BytesRef(values[idx]), new BytesRef());
.getOrdByValue(new BytesRef(values[idx]), new BytesRef());
assertTrue(ord >= 0);
assertEquals(ss.ord(idx), ord);
}
@ -125,7 +125,7 @@ public class TestDocValues extends LuceneTestCase {
final int valueCount = ss.getValueCount();
for (int i = 0; i < 1000; i++) {
BytesRef bytesValue = new BytesRef(_TestUtil.randomFixedByteLengthUnicodeString(random, fixedSize? fixedLength : 1 + random.nextInt(39)));
int ord = ss.getByValue(bytesValue, new BytesRef());
int ord = ss.getOrdByValue(bytesValue, new BytesRef());
if (ord >= 0) {
assertTrue(bytesValue
.bytesEquals(ss.getByOrd(ord, bytesRef)));

View File

@ -793,7 +793,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
hash.get(sort[i], expected);
asSortedSource.getByOrd(i, actual);
assertEquals(expected.utf8ToString(), actual.utf8ToString());
int ord = asSortedSource.getByValue(expected, actual);
int ord = asSortedSource.getOrdByValue(expected, actual);
assertEquals(i, ord);
}
AtomicReader slowR = SlowCompositeReaderWrapper.wrap(reader);

View File

@ -20,27 +20,32 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.store.Directory;
@ -48,6 +53,7 @@ import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdBitSet;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.BeforeClass;
@ -1294,4 +1300,151 @@ public class TestSort extends LuceneTestCase {
reader.close();
indexStore.close();
}
private static class RandomFilter extends Filter {
private final Random random;
private float density;
private final List<BytesRef> docValues;
public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
// density should be 0.0 ... 1.0
public RandomFilter(Random random, float density, List<BytesRef> docValues) {
this.random = random;
this.density = density;
this.docValues = docValues;
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final int maxDoc = context.reader().maxDoc();
final DocValues.Source idSource = context.reader().docValues("id").getSource();
assertNotNull(idSource);
final FixedBitSet bits = new FixedBitSet(maxDoc);
for(int docID=0;docID<maxDoc;docID++) {
if (random.nextFloat() <= density && (acceptDocs == null || acceptDocs.get(docID))) {
bits.set(docID);
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
matchValues.add(docValues.get((int) idSource.getInt(docID)));
}
}
return bits;
}
}
public void testRandomStringSort() throws Exception {
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<String>();
final int maxLength = _TestUtil.nextInt(random, 5, 100);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<BytesRef>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final String s;
if (random.nextBoolean()) {
s = _TestUtil.randomSimpleString(random, maxLength);
} else {
s = _TestUtil.randomUnicodeString(random, maxLength);
}
final BytesRef br = new BytesRef(s);
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
final Document doc = new Document();
doc.add(new DocValuesField("stringdv", br, DocValues.Type.BYTES_VAR_SORTED));
doc.add(newField("string", s, StringField.TYPE_UNSTORED));
doc.add(new DocValuesField("id", numDocs, DocValues.Type.VAR_INTS));
docValues.add(br);
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
final IndexReader r = writer.getReader();
writer.close();
if (VERBOSE) {
System.out.println(" reader=" + r);
}
final IndexSearcher s = newSearcher(r, false);
final int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
final boolean reverse = random.nextBoolean();
final TopFieldDocs hits;
final SortField sf;
if (random.nextBoolean()) {
sf = new SortField("stringdv", SortField.Type.STRING, reverse);
sf.setUseIndexValues(true);
} else {
sf = new SortField("string", SortField.Type.STRING, reverse);
}
final Sort sort = new Sort(sf);
final int hitCount = _TestUtil.nextInt(random, 1, r.maxDoc() + 20);
final RandomFilter f = new RandomFilter(random, random.nextFloat(), docValues);
if (random.nextBoolean()) {
hits = s.search(new ConstantScoreQuery(f),
hitCount,
sort);
} else {
hits = s.search(new MatchAllDocsQuery(),
f,
hitCount,
sort);
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse);
}
// Compute expected results:
Collections.sort(f.matchValues);
if (reverse) {
Collections.reverse(f.matchValues);
}
final List<BytesRef> expected = f.matchValues;
if (VERBOSE) {
System.out.println(" expected:");
for(int idx=0;idx<expected.size();idx++) {
System.out.println(" " + idx + ": " + expected.get(idx).utf8ToString());
if (idx == hitCount-1) {
break;
}
}
}
if (VERBOSE) {
System.out.println(" actual:");
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
System.out.println(" " + hitIDX + ": " + ((BytesRef) fd.fields[0]).utf8ToString());
}
}
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
assertEquals(expected.get(hitIDX), (BytesRef) fd.fields[0]);
}
}
r.close();
dir.close();
}
}

View File

@ -193,8 +193,8 @@ public class _TestUtil {
return start + r.nextInt(end-start+1);
}
public static String randomSimpleString(Random r) {
final int end = r.nextInt(10);
public static String randomSimpleString(Random r, int maxLength) {
final int end = r.nextInt(maxLength);
if (end == 0) {
// allow 0 length
return "";
@ -206,6 +206,10 @@ public class _TestUtil {
return new String(buffer, 0, end);
}
public static String randomSimpleString(Random r) {
return randomSimpleString(r, 10);
}
/** Returns random string, including full unicode range. */
public static String randomUnicodeString(Random r) {
return randomUnicodeString(r, 20);

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search.grouping.dv;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type; // javadocs
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.grouping.AbstractAllGroupsCollector;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.BytesRef;
@ -239,7 +238,7 @@ public abstract class DVAllGroupsCollector<GROUP_VALUE_TYPE> extends AbstractAll
ordSet.clear();
for (BytesRef countedGroup : groups) {
int ord = this.source.getByValue(countedGroup, spare);
int ord = this.source.getOrdByValue(countedGroup, spare);
if (ord >= 0) {
ordSet.put(ord);
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search.grouping.dv;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type; // javadocs
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector;
import org.apache.lucene.search.grouping.SearchGroup;
@ -215,7 +214,7 @@ public abstract class DVSecondPassGroupingCollector<GROUP_VALUE> extends Abstrac
ordSet.clear();
for (SearchGroupDocs<BytesRef> group : groupMap.values()) {
int ord = this.source.getByValue(group.groupValue, spare);
int ord = this.source.getOrdByValue(group.groupValue, spare);
if (ord >= 0) {
groupDocs[ordSet.put(ord)] = group;
}