mirror of https://github.com/apache/lucene.git
LUCENE-3824: don't do pointless by-value cmp in TermOrdVal/DocValuesComparator.setBottom
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1294856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
28fb9a4cbc
commit
9ab8f9b83c
|
@ -63,6 +63,7 @@ final class VarSortedBytesImpl {
|
|||
this.comp = comp;
|
||||
size = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void merge(MergeState mergeState, DocValues[] docValues)
|
||||
throws IOException {
|
||||
|
|
|
@ -337,6 +337,9 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
if (perDocConsumer == null) {
|
||||
PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState("");
|
||||
perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState);
|
||||
if (perDocConsumer == null) {
|
||||
throw new IllegalStateException("codec=" + docState.docWriter.codec + " does not support docValues: from docValuesFormat().docsConsumer(...) returned null; field=" + fieldInfo.name);
|
||||
}
|
||||
}
|
||||
DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo);
|
||||
fieldInfo.setDocValuesType(valueType, false);
|
||||
|
|
|
@ -148,6 +148,7 @@ public abstract class DocValues implements Closeable {
|
|||
protected Source(Type type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a <tt>long</tt> for the given document id or throws an
|
||||
* {@link UnsupportedOperationException} if this source doesn't support
|
||||
|
@ -239,9 +240,10 @@ public abstract class DocValues implements Closeable {
|
|||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
final int ord = ord(docID);
|
||||
if (ord < 0) {
|
||||
// Negative ord means doc was missing?
|
||||
bytesRef.length = 0;
|
||||
} else {
|
||||
getByOrd(ord , bytesRef);
|
||||
getByOrd(ord, bytesRef);
|
||||
}
|
||||
return bytesRef;
|
||||
}
|
||||
|
@ -253,7 +255,7 @@ public abstract class DocValues implements Closeable {
|
|||
public abstract int ord(int docID);
|
||||
|
||||
/** Returns value for specified ord. */
|
||||
public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
|
||||
public abstract BytesRef getByOrd(int ord, BytesRef result);
|
||||
|
||||
/** Return true if it's safe to call {@link
|
||||
* #getDocToOrd}. */
|
||||
|
@ -274,7 +276,7 @@ public abstract class DocValues implements Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Performs a lookup by value.
|
||||
* Lookup ord by value.
|
||||
*
|
||||
* @param value
|
||||
* the value to look up
|
||||
|
@ -283,11 +285,11 @@ public abstract class DocValues implements Closeable {
|
|||
* values to the given value. Must not be <code>null</code>
|
||||
* @return the given values ordinal if found or otherwise
|
||||
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
|
||||
* element that is greater than the given value. This guarantees
|
||||
* that the return value will always be >= 0 if the given value
|
||||
* is found.
|
||||
* element that is greater than the given value (the insertion
|
||||
* point). This guarantees that the return value will always be
|
||||
* >= 0 if the given value is found.
|
||||
*/
|
||||
public int getByValue(BytesRef value, BytesRef spare) {
|
||||
public int getOrdByValue(BytesRef value, BytesRef spare) {
|
||||
return binarySearch(value, spare, 0, getValueCount() - 1);
|
||||
}
|
||||
|
||||
|
@ -405,7 +407,7 @@ public abstract class DocValues implements Closeable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int getByValue(BytesRef value, BytesRef spare) {
|
||||
public int getOrdByValue(BytesRef value, BytesRef spare) {
|
||||
if (value.length == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
|
@ -414,7 +416,7 @@ public abstract class DocValues implements Closeable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
public int getValueCount() {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -81,7 +81,7 @@ public final class SortedBytesMergeUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static List<SortedSourceSlice> buildSlices(int[] docBases ,int[][] docMaps,
|
||||
public static List<SortedSourceSlice> buildSlices(int[] docBases, int[][] docMaps,
|
||||
DocValues[] docValues, MergeContext ctx) throws IOException {
|
||||
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
|
||||
for (int i = 0; i < docValues.length; i++) {
|
||||
|
@ -111,7 +111,7 @@ public final class SortedBytesMergeUtils {
|
|||
* mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
|
||||
* contains the new global ordinals for the relative index.
|
||||
*/
|
||||
private static void createOrdMapping(int[] docBases ,int[][] docMaps,
|
||||
private static void createOrdMapping(int[] docBases, int[][] docMaps,
|
||||
SortedSourceSlice currentSlice) {
|
||||
final int readerIdx = currentSlice.readerIdx;
|
||||
final int[] currentDocMap = docMaps[readerIdx];
|
||||
|
|
|
@ -1055,32 +1055,17 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = (readerOrds[doc]&0xFF);
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - (readerOrds[doc]&0xFF);
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = readerOrds[doc]&0xFF;
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
if (bottomValue == null) {
|
||||
if (order == 0) {
|
||||
// unset
|
||||
return 0;
|
||||
}
|
||||
// bottom wins
|
||||
return -1;
|
||||
} else if (order == 0) {
|
||||
// doc wins
|
||||
return 1;
|
||||
}
|
||||
termsIndex.lookup(order, tempBR);
|
||||
return bottomValue.compareTo(tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1116,32 +1101,17 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = (readerOrds[doc]&0xFFFF);
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - (readerOrds[doc]&0xFFFF);
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = readerOrds[doc]&0xFFFF;
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
if (bottomValue == null) {
|
||||
if (order == 0) {
|
||||
// unset
|
||||
return 0;
|
||||
}
|
||||
// bottom wins
|
||||
return -1;
|
||||
} else if (order == 0) {
|
||||
// doc wins
|
||||
return 1;
|
||||
}
|
||||
termsIndex.lookup(order, tempBR);
|
||||
return bottomValue.compareTo(tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1177,32 +1147,17 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = readerOrds[doc];
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - readerOrds[doc];
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = readerOrds[doc];
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
if (bottomValue == null) {
|
||||
if (order == 0) {
|
||||
// unset
|
||||
return 0;
|
||||
}
|
||||
// bottom wins
|
||||
return -1;
|
||||
} else if (order == 0) {
|
||||
// doc wins
|
||||
return 1;
|
||||
}
|
||||
termsIndex.lookup(order, tempBR);
|
||||
return bottomValue.compareTo(tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1239,32 +1194,17 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = (int) readerOrds.get(doc);
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - (int) readerOrds.get(doc);
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = (int) readerOrds.get(doc);
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
if (bottomValue == null) {
|
||||
if (order == 0) {
|
||||
// unset
|
||||
return 0;
|
||||
}
|
||||
// bottom wins
|
||||
return -1;
|
||||
} else if (order == 0) {
|
||||
// doc wins
|
||||
return 1;
|
||||
}
|
||||
termsIndex.lookup(order, tempBR);
|
||||
return bottomValue.compareTo(tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1499,21 +1439,17 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = readerOrds[doc]&0xFF;
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - (readerOrds[doc]&0xFF);
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = readerOrds[doc]&0xFF;
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
termsIndex.getByOrd(order, tempBR);
|
||||
return comp.compare(bottomValue, tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1544,21 +1480,17 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = readerOrds[doc]&0xFFFF;
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - (readerOrds[doc]&0xFFFF);
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = readerOrds[doc]&0xFFFF;
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
termsIndex.getByOrd(order, tempBR);
|
||||
return comp.compare(bottomValue, tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1589,20 +1521,17 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = readerOrds[doc];
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - readerOrds[doc];
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = readerOrds[doc];
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
termsIndex.getByOrd(order, tempBR);
|
||||
return comp.compare(bottomValue, tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1632,20 +1561,17 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = (int) readerOrds.get(doc);
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - (int) readerOrds.get(doc);
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = (int) readerOrds.get(doc);
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
termsIndex.getByOrd(order, tempBR);
|
||||
return comp.compare(bottomValue, tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1672,21 +1598,17 @@ public abstract class FieldComparator<T> {
|
|||
|
||||
@Override
|
||||
public int compareBottom(int doc) {
|
||||
assert bottomSlot != -1;
|
||||
final int docOrd = termsIndex.ord(doc);
|
||||
if (bottomSameReader) {
|
||||
// ord is precisely comparable, even in the equal case
|
||||
return bottomOrd - termsIndex.ord(doc);
|
||||
return bottomOrd - docOrd;
|
||||
} else if (bottomOrd >= docOrd) {
|
||||
// the equals case always means bottom is > doc
|
||||
// (because we set bottomOrd to the lower bound in
|
||||
// setBottom):
|
||||
return 1;
|
||||
} else {
|
||||
// ord is only approx comparable: if they are not
|
||||
// equal, we can use that; if they are equal, we
|
||||
// must fallback to compare by value
|
||||
final int order = termsIndex.ord(doc);
|
||||
final int cmp = bottomOrd - order;
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
termsIndex.getByOrd(order, tempBR);
|
||||
return comp.compare(bottomValue, tempBR);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1775,7 +1697,7 @@ public abstract class FieldComparator<T> {
|
|||
bottomSameReader = true;
|
||||
readerGen[bottomSlot] = currentReaderGen;
|
||||
} else {
|
||||
final int index = termsIndex.getByValue(bottomValue, tempBR);
|
||||
final int index = termsIndex.getOrdByValue(bottomValue, tempBR);
|
||||
if (index < 0) {
|
||||
bottomOrd = -index - 2;
|
||||
bottomSameReader = false;
|
||||
|
|
|
@ -70,7 +70,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
if (queueFull) {
|
||||
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
|
||||
// since docs are visited in doc Id order, if compare is 0, it means
|
||||
// this document is largest than anything else in the queue, and
|
||||
// this document is larger than anything else in the queue, and
|
||||
// therefore not competitive.
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx),
|
||||
bytesRef).utf8ToString());
|
||||
int ord = ss
|
||||
.getByValue(new BytesRef(values[idx]), new BytesRef());
|
||||
.getOrdByValue(new BytesRef(values[idx]), new BytesRef());
|
||||
assertTrue(ord >= 0);
|
||||
assertEquals(ss.ord(idx), ord);
|
||||
}
|
||||
|
@ -125,7 +125,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
final int valueCount = ss.getValueCount();
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
BytesRef bytesValue = new BytesRef(_TestUtil.randomFixedByteLengthUnicodeString(random, fixedSize? fixedLength : 1 + random.nextInt(39)));
|
||||
int ord = ss.getByValue(bytesValue, new BytesRef());
|
||||
int ord = ss.getOrdByValue(bytesValue, new BytesRef());
|
||||
if (ord >= 0) {
|
||||
assertTrue(bytesValue
|
||||
.bytesEquals(ss.getByOrd(ord, bytesRef)));
|
||||
|
|
|
@ -793,7 +793,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
hash.get(sort[i], expected);
|
||||
asSortedSource.getByOrd(i, actual);
|
||||
assertEquals(expected.utf8ToString(), actual.utf8ToString());
|
||||
int ord = asSortedSource.getByValue(expected, actual);
|
||||
int ord = asSortedSource.getOrdByValue(expected, actual);
|
||||
assertEquals(i, ord);
|
||||
}
|
||||
AtomicReader slowR = SlowCompositeReaderWrapper.wrap(reader);
|
||||
|
|
|
@ -20,27 +20,32 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.FieldValueHitQueue.Entry;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -48,6 +53,7 @@ import org.apache.lucene.store.LockObtainFailedException;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.DocIdBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -1294,4 +1300,151 @@ public class TestSort extends LuceneTestCase {
|
|||
reader.close();
|
||||
indexStore.close();
|
||||
}
|
||||
|
||||
private static class RandomFilter extends Filter {
|
||||
private final Random random;
|
||||
private float density;
|
||||
private final List<BytesRef> docValues;
|
||||
public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
|
||||
|
||||
// density should be 0.0 ... 1.0
|
||||
public RandomFilter(Random random, float density, List<BytesRef> docValues) {
|
||||
this.random = random;
|
||||
this.density = density;
|
||||
this.docValues = docValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
final DocValues.Source idSource = context.reader().docValues("id").getSource();
|
||||
assertNotNull(idSource);
|
||||
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||
for(int docID=0;docID<maxDoc;docID++) {
|
||||
if (random.nextFloat() <= density && (acceptDocs == null || acceptDocs.get(docID))) {
|
||||
bits.set(docID);
|
||||
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
|
||||
matchValues.add(docValues.get((int) idSource.getInt(docID)));
|
||||
}
|
||||
}
|
||||
|
||||
return bits;
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomStringSort() throws Exception {
|
||||
final int NUM_DOCS = atLeast(100);
|
||||
final Directory dir = newDirectory();
|
||||
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
final boolean allowDups = random.nextBoolean();
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
final int maxLength = _TestUtil.nextInt(random, 5, 100);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
|
||||
}
|
||||
int numDocs = 0;
|
||||
final List<BytesRef> docValues = new ArrayList<BytesRef>();
|
||||
// TODO: deletions
|
||||
while (numDocs < NUM_DOCS) {
|
||||
final String s;
|
||||
if (random.nextBoolean()) {
|
||||
s = _TestUtil.randomSimpleString(random, maxLength);
|
||||
} else {
|
||||
s = _TestUtil.randomUnicodeString(random, maxLength);
|
||||
}
|
||||
final BytesRef br = new BytesRef(s);
|
||||
|
||||
if (!allowDups) {
|
||||
if (seen.contains(s)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(s);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" " + numDocs + ": s=" + s);
|
||||
}
|
||||
|
||||
final Document doc = new Document();
|
||||
doc.add(new DocValuesField("stringdv", br, DocValues.Type.BYTES_VAR_SORTED));
|
||||
doc.add(newField("string", s, StringField.TYPE_UNSTORED));
|
||||
doc.add(new DocValuesField("id", numDocs, DocValues.Type.VAR_INTS));
|
||||
docValues.add(br);
|
||||
writer.addDocument(doc);
|
||||
numDocs++;
|
||||
|
||||
if (random.nextInt(40) == 17) {
|
||||
// force flush
|
||||
writer.getReader().close();
|
||||
}
|
||||
}
|
||||
|
||||
final IndexReader r = writer.getReader();
|
||||
writer.close();
|
||||
if (VERBOSE) {
|
||||
System.out.println(" reader=" + r);
|
||||
}
|
||||
|
||||
final IndexSearcher s = newSearcher(r, false);
|
||||
final int ITERS = atLeast(100);
|
||||
for(int iter=0;iter<ITERS;iter++) {
|
||||
final boolean reverse = random.nextBoolean();
|
||||
final TopFieldDocs hits;
|
||||
final SortField sf;
|
||||
if (random.nextBoolean()) {
|
||||
sf = new SortField("stringdv", SortField.Type.STRING, reverse);
|
||||
sf.setUseIndexValues(true);
|
||||
} else {
|
||||
sf = new SortField("string", SortField.Type.STRING, reverse);
|
||||
}
|
||||
final Sort sort = new Sort(sf);
|
||||
final int hitCount = _TestUtil.nextInt(random, 1, r.maxDoc() + 20);
|
||||
final RandomFilter f = new RandomFilter(random, random.nextFloat(), docValues);
|
||||
if (random.nextBoolean()) {
|
||||
hits = s.search(new ConstantScoreQuery(f),
|
||||
hitCount,
|
||||
sort);
|
||||
} else {
|
||||
hits = s.search(new MatchAllDocsQuery(),
|
||||
f,
|
||||
hitCount,
|
||||
sort);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse);
|
||||
}
|
||||
|
||||
// Compute expected results:
|
||||
Collections.sort(f.matchValues);
|
||||
if (reverse) {
|
||||
Collections.reverse(f.matchValues);
|
||||
}
|
||||
final List<BytesRef> expected = f.matchValues;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" expected:");
|
||||
for(int idx=0;idx<expected.size();idx++) {
|
||||
System.out.println(" " + idx + ": " + expected.get(idx).utf8ToString());
|
||||
if (idx == hitCount-1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" actual:");
|
||||
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
|
||||
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
|
||||
System.out.println(" " + hitIDX + ": " + ((BytesRef) fd.fields[0]).utf8ToString());
|
||||
}
|
||||
}
|
||||
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
|
||||
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
|
||||
assertEquals(expected.get(hitIDX), (BytesRef) fd.fields[0]);
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -193,8 +193,8 @@ public class _TestUtil {
|
|||
return start + r.nextInt(end-start+1);
|
||||
}
|
||||
|
||||
public static String randomSimpleString(Random r) {
|
||||
final int end = r.nextInt(10);
|
||||
public static String randomSimpleString(Random r, int maxLength) {
|
||||
final int end = r.nextInt(maxLength);
|
||||
if (end == 0) {
|
||||
// allow 0 length
|
||||
return "";
|
||||
|
@ -206,6 +206,10 @@ public class _TestUtil {
|
|||
return new String(buffer, 0, end);
|
||||
}
|
||||
|
||||
public static String randomSimpleString(Random r) {
|
||||
return randomSimpleString(r, 10);
|
||||
}
|
||||
|
||||
/** Returns random string, including full unicode range. */
|
||||
public static String randomUnicodeString(Random r) {
|
||||
return randomUnicodeString(r, 20);
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.search.grouping.dv;
|
|||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Type; // javadocs
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.grouping.AbstractAllGroupsCollector;
|
||||
import org.apache.lucene.util.SentinelIntSet;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -239,7 +238,7 @@ public abstract class DVAllGroupsCollector<GROUP_VALUE_TYPE> extends AbstractAll
|
|||
|
||||
ordSet.clear();
|
||||
for (BytesRef countedGroup : groups) {
|
||||
int ord = this.source.getByValue(countedGroup, spare);
|
||||
int ord = this.source.getOrdByValue(countedGroup, spare);
|
||||
if (ord >= 0) {
|
||||
ordSet.put(ord);
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.search.grouping.dv;
|
|||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Type; // javadocs
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector;
|
||||
import org.apache.lucene.search.grouping.SearchGroup;
|
||||
|
@ -215,7 +214,7 @@ public abstract class DVSecondPassGroupingCollector<GROUP_VALUE> extends Abstrac
|
|||
|
||||
ordSet.clear();
|
||||
for (SearchGroupDocs<BytesRef> group : groupMap.values()) {
|
||||
int ord = this.source.getByValue(group.groupValue, spare);
|
||||
int ord = this.source.getOrdByValue(group.groupValue, spare);
|
||||
if (ord >= 0) {
|
||||
groupDocs[ordSet.put(ord)] = group;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue