diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java index a4185e9349d..0229199f0c8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java @@ -63,6 +63,7 @@ final class VarSortedBytesImpl { this.comp = comp; size = 0; } + @Override public void merge(MergeState mergeState, DocValues[] docValues) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java index c249e117b96..ae36b8f9a60 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -337,6 +337,9 @@ final class DocFieldProcessor extends DocConsumer { if (perDocConsumer == null) { PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(""); perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState); + if (perDocConsumer == null) { + throw new IllegalStateException("codec=" + docState.docWriter.codec + " does not support docValues: from docValuesFormat().docsConsumer(...) returned null; field=" + fieldInfo.name); + } } DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo); fieldInfo.setDocValuesType(valueType, false); diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValues.java b/lucene/core/src/java/org/apache/lucene/index/DocValues.java index b3ce0635d3d..23999f49e0c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValues.java @@ -148,6 +148,7 @@ public abstract class DocValues implements Closeable { protected Source(Type type) { this.type = type; } + /** * Returns a long for the given document id or throws an * {@link UnsupportedOperationException} if this source doesn't support @@ -239,9 +240,10 @@ public abstract class DocValues implements Closeable { public BytesRef getBytes(int docID, BytesRef bytesRef) { final int ord = ord(docID); if (ord < 0) { + // Negative ord means doc was missing? bytesRef.length = 0; } else { - getByOrd(ord , bytesRef); + getByOrd(ord, bytesRef); } return bytesRef; } @@ -253,7 +255,7 @@ public abstract class DocValues implements Closeable { public abstract int ord(int docID); /** Returns value for specified ord. */ - public abstract BytesRef getByOrd(int ord, BytesRef bytesRef); + public abstract BytesRef getByOrd(int ord, BytesRef result); /** Return true if it's safe to call {@link * #getDocToOrd}. */ @@ -274,7 +276,7 @@ public abstract class DocValues implements Closeable { } /** - * Performs a lookup by value. + * Lookup ord by value. * * @param value * the value to look up @@ -283,11 +285,11 @@ public abstract class DocValues implements Closeable { * values to the given value. Must not be null * @return the given values ordinal if found or otherwise * (-(ord)-1), defined as the ordinal of the first - * element that is greater than the given value. This guarantees - * that the return value will always be >= 0 if the given value - * is found. + * element that is greater than the given value (the insertion + * point). This guarantees that the return value will always be + * >= 0 if the given value is found. */ - public int getByValue(BytesRef value, BytesRef spare) { + public int getOrdByValue(BytesRef value, BytesRef spare) { return binarySearch(value, spare, 0, getValueCount() - 1); } @@ -405,7 +407,7 @@ public abstract class DocValues implements Closeable { } @Override - public int getByValue(BytesRef value, BytesRef spare) { + public int getOrdByValue(BytesRef value, BytesRef spare) { if (value.length == 0) { return 0; } else { @@ -414,7 +416,7 @@ public abstract class DocValues implements Closeable { } @Override - public int getValueCount() { + public int getValueCount() { return 1; } }; diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java b/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java index dce3011565d..188a6d1d380 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java @@ -81,7 +81,7 @@ public final class SortedBytesMergeUtils { } } - public static List buildSlices(int[] docBases ,int[][] docMaps, + public static List buildSlices(int[] docBases, int[][] docMaps, DocValues[] docValues, MergeContext ctx) throws IOException { final List slices = new ArrayList(); for (int i = 0; i < docValues.length; i++) { @@ -111,7 +111,7 @@ public final class SortedBytesMergeUtils { * mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping * contains the new global ordinals for the relative index. */ - private static void createOrdMapping(int[] docBases ,int[][] docMaps, + private static void createOrdMapping(int[] docBases, int[][] docMaps, SortedSourceSlice currentSlice) { final int readerIdx = currentSlice.readerIdx; final int[] currentDocMap = docMaps[readerIdx]; diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java index 4bcdb8a1ee4..cf3f6931bd3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java @@ -1055,32 +1055,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; + final int docOrd = (readerOrds[doc]&0xFF); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - (readerOrds[doc]&0xFF); + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = readerOrds[doc]&0xFF; - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - - if (bottomValue == null) { - if (order == 0) { - // unset - return 0; - } - // bottom wins - return -1; - } else if (order == 0) { - // doc wins - return 1; - } - termsIndex.lookup(order, tempBR); - return bottomValue.compareTo(tempBR); + return -1; } } @@ -1116,32 +1101,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; + final int docOrd = (readerOrds[doc]&0xFFFF); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - (readerOrds[doc]&0xFFFF); + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = readerOrds[doc]&0xFFFF; - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - - if (bottomValue == null) { - if (order == 0) { - // unset - return 0; - } - // bottom wins - return -1; - } else if (order == 0) { - // doc wins - return 1; - } - termsIndex.lookup(order, tempBR); - return bottomValue.compareTo(tempBR); + return -1; } } @@ -1177,32 +1147,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; + final int docOrd = readerOrds[doc]; if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - readerOrds[doc]; + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = readerOrds[doc]; - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - - if (bottomValue == null) { - if (order == 0) { - // unset - return 0; - } - // bottom wins - return -1; - } else if (order == 0) { - // doc wins - return 1; - } - termsIndex.lookup(order, tempBR); - return bottomValue.compareTo(tempBR); + return -1; } } @@ -1239,32 +1194,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; + final int docOrd = (int) readerOrds.get(doc); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - (int) readerOrds.get(doc); + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = (int) readerOrds.get(doc); - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - - if (bottomValue == null) { - if (order == 0) { - // unset - return 0; - } - // bottom wins - return -1; - } else if (order == 0) { - // doc wins - return 1; - } - termsIndex.lookup(order, tempBR); - return bottomValue.compareTo(tempBR); + return -1; } } @@ -1499,21 +1439,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; + final int docOrd = readerOrds[doc]&0xFF; if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - (readerOrds[doc]&0xFF); + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = readerOrds[doc]&0xFF; - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - - termsIndex.getByOrd(order, tempBR); - return comp.compare(bottomValue, tempBR); + return -1; } } @@ -1544,21 +1480,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; + final int docOrd = readerOrds[doc]&0xFFFF; if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - (readerOrds[doc]&0xFFFF); + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = readerOrds[doc]&0xFFFF; - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - - termsIndex.getByOrd(order, tempBR); - return comp.compare(bottomValue, tempBR); + return -1; } } @@ -1589,20 +1521,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; + final int docOrd = readerOrds[doc]; if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - readerOrds[doc]; + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = readerOrds[doc]; - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - termsIndex.getByOrd(order, tempBR); - return comp.compare(bottomValue, tempBR); + return -1; } } @@ -1632,20 +1561,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; + final int docOrd = (int) readerOrds.get(doc); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - (int) readerOrds.get(doc); + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = (int) readerOrds.get(doc); - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - termsIndex.getByOrd(order, tempBR); - return comp.compare(bottomValue, tempBR); + return -1; } } @@ -1672,21 +1598,17 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { - assert bottomSlot != -1; + final int docOrd = termsIndex.ord(doc); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - termsIndex.ord(doc); + return bottomOrd - docOrd; + } else if (bottomOrd >= docOrd) { + // the equals case always means bottom is > doc + // (because we set bottomOrd to the lower bound in + // setBottom): + return 1; } else { - // ord is only approx comparable: if they are not - // equal, we can use that; if they are equal, we - // must fallback to compare by value - final int order = termsIndex.ord(doc); - final int cmp = bottomOrd - order; - if (cmp != 0) { - return cmp; - } - termsIndex.getByOrd(order, tempBR); - return comp.compare(bottomValue, tempBR); + return -1; } } @@ -1775,7 +1697,7 @@ public abstract class FieldComparator { bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { - final int index = termsIndex.getByValue(bottomValue, tempBR); + final int index = termsIndex.getOrdByValue(bottomValue, tempBR); if (index < 0) { bottomOrd = -index - 2; bottomSameReader = false; diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index 0d4e05a887a..82925cc4125 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -70,7 +70,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { if (queueFull) { if ((reverseMul * comparator.compareBottom(doc)) <= 0) { // since docs are visited in doc Id order, if compare is 0, it means - // this document is largest than anything else in the queue, and + // this document is larger than anything else in the queue, and // therefore not competitive. return; } diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java index 54574830926..354a70e83fa 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java @@ -114,7 +114,7 @@ public class TestDocValues extends LuceneTestCase { assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx), bytesRef).utf8ToString()); int ord = ss - .getByValue(new BytesRef(values[idx]), new BytesRef()); + .getOrdByValue(new BytesRef(values[idx]), new BytesRef()); assertTrue(ord >= 0); assertEquals(ss.ord(idx), ord); } @@ -125,7 +125,7 @@ public class TestDocValues extends LuceneTestCase { final int valueCount = ss.getValueCount(); for (int i = 0; i < 1000; i++) { BytesRef bytesValue = new BytesRef(_TestUtil.randomFixedByteLengthUnicodeString(random, fixedSize? fixedLength : 1 + random.nextInt(39))); - int ord = ss.getByValue(bytesValue, new BytesRef()); + int ord = ss.getOrdByValue(bytesValue, new BytesRef()); if (ord >= 0) { assertTrue(bytesValue .bytesEquals(ss.getByOrd(ord, bytesRef))); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index a0e5b66725d..3a2a9b6a9ef 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -793,7 +793,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { hash.get(sort[i], expected); asSortedSource.getByOrd(i, actual); assertEquals(expected.utf8ToString(), actual.utf8ToString()); - int ord = asSortedSource.getByValue(expected, actual); + int ord = asSortedSource.getOrdByValue(expected, actual); assertEquals(i, ord); } AtomicReader slowR = SlowCompositeReaderWrapper.wrap(reader); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSort.java b/lucene/core/src/test/org/apache/lucene/search/TestSort.java index 4681028cf0b..87ef368abd4 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSort.java @@ -20,27 +20,32 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; import java.util.BitSet; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocValues; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.store.Directory; @@ -48,6 +53,7 @@ import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.DocIdBitSet; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; import org.junit.BeforeClass; @@ -1294,4 +1300,151 @@ public class TestSort extends LuceneTestCase { reader.close(); indexStore.close(); } + + private static class RandomFilter extends Filter { + private final Random random; + private float density; + private final List docValues; + public final List matchValues = Collections.synchronizedList(new ArrayList()); + + // density should be 0.0 ... 1.0 + public RandomFilter(Random random, float density, List docValues) { + this.random = random; + this.density = density; + this.docValues = docValues; + } + + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + final int maxDoc = context.reader().maxDoc(); + final DocValues.Source idSource = context.reader().docValues("id").getSource(); + assertNotNull(idSource); + final FixedBitSet bits = new FixedBitSet(maxDoc); + for(int docID=0;docID extends AbstractAll ordSet.clear(); for (BytesRef countedGroup : groups) { - int ord = this.source.getByValue(countedGroup, spare); + int ord = this.source.getOrdByValue(countedGroup, spare); if (ord >= 0) { ordSet.put(ord); } diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java b/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java index 3178fda4c5a..c3460f7980b 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java @@ -20,7 +20,6 @@ package org.apache.lucene.search.grouping.dv; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Type; // javadocs -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Sort; import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector; import org.apache.lucene.search.grouping.SearchGroup; @@ -215,7 +214,7 @@ public abstract class DVSecondPassGroupingCollector extends Abstrac ordSet.clear(); for (SearchGroupDocs group : groupMap.values()) { - int ord = this.source.getByValue(group.groupValue, spare); + int ord = this.source.getOrdByValue(group.groupValue, spare); if (ord >= 0) { groupDocs[ordSet.put(ord)] = group; }