diff --git a/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVProducer.java index 0781a855336..d25a64ba4dd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVProducer.java @@ -25,6 +25,11 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; +// nocommit add javadocs stating that this must open all +// necessary files "on init", not later eg in .getXXX, else +// an IW that deletes a commit will cause an SR to hit +// exceptions.... + public abstract class SimpleDVProducer implements Closeable { public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java index 4e26dcce73e..2e3f1b81da6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java @@ -519,12 +519,14 @@ public interface FieldCache { // nocommit: can we merge this api with the SortedDocValues api? public abstract static class DocTermsIndex { + // nocommit remove this? public int binarySearchLookup(BytesRef key, BytesRef spare) { // this special case is the reason that Arrays.binarySearch() isn't useful. - if (key == null) - return 0; - - int low = 1; + if (key == null) { + return -1; + } + + int low = 0; int high = numOrd()-1; while (low <= high) { @@ -543,24 +545,26 @@ public interface FieldCache { /** The BytesRef argument must not be null; the method * returns the same BytesRef, or an empty (length=0) - * BytesRef if this ord is the null ord (0). */ + * BytesRef if this ord is the null ord (-1). */ public abstract BytesRef lookup(int ord, BytesRef reuse); /** Convenience method, to lookup the Term for a doc. * If this doc is deleted or did not have this field, * this will return an empty (length=0) BytesRef. */ public BytesRef getTerm(int docID, BytesRef reuse) { - return lookup(getOrd(docID), reuse); + int ord = getOrd(docID); + if (ord == -1) { + return null; + } + return lookup(ord, reuse); } - /** Returns sort ord for this document. Ord 0 is - * reserved for docs that are deleted or did not have + /** Returns sort ord for this document. Ord -1 is + * is returend for docs that are deleted or did not have * this field. */ public abstract int getOrd(int docID); - /** Returns total unique ord count; this includes +1 for - * the null ord (always 0) unless the field was - * indexed with doc values. */ + /** Returns total unique ord count. */ public abstract int numOrd(); /** Number of documents */ @@ -568,9 +572,6 @@ public interface FieldCache { /** Returns a TermsEnum that can iterate over the values in this index entry */ public abstract TermsEnum getTermsEnum(); - - /** @lucene.internal */ - public abstract PackedInts.Reader getDocToOrd(); } /** Checks the internal cache for an appropriate entry, and if none diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index c33d5ff01ba..c21e5176d05 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -335,9 +335,6 @@ class FieldCacheImpl implements FieldCache { } } } - } else { - // nocommit is this right ... - docsWithField = new Bits.MatchNoBits(maxDoc); } } @@ -970,11 +967,6 @@ class FieldCacheImpl implements FieldCache { this.numOrd = numOrd; } - @Override - public PackedInts.Reader getDocToOrd() { - return docToTermOrd; - } - @Override public int numOrd() { return numOrd; @@ -982,7 +974,7 @@ class FieldCacheImpl implements FieldCache { @Override public int getOrd(int docID) { - return (int) docToTermOrd.get(docID); + return (int) docToTermOrd.get(docID)-1; } @Override @@ -1010,17 +1002,17 @@ class FieldCacheImpl implements FieldCache { final BytesRef term = new BytesRef(); public DocTermsIndexEnum() { - currentOrd = 0; + currentOrd = -1; currentBlockNumber = 0; blocks = bytes.getBlocks(); blockEnds = bytes.getBlockEnds(); - currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get(0)); + term.bytes = blocks[0]; end = blockEnds[currentBlockNumber]; } @Override public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException { - int low = 1; + int low = 0; int high = numOrd-1; while (low <= high) { @@ -1032,8 +1024,9 @@ class FieldCacheImpl implements FieldCache { low = mid + 1; else if (cmp > 0) high = mid - 1; - else + else { return SeekStatus.FOUND; // key found + } } if (low == numOrd) { @@ -1045,7 +1038,7 @@ class FieldCacheImpl implements FieldCache { } public void seekExact(long ord) throws IOException { - assert(ord >= 0 && ord <= numOrd); + assert ord >= 0 && ord <= numOrd; // TODO: if gap is small, could iterate from current position? Or let user decide that? currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord)); end = blockEnds[currentBlockNumber]; @@ -1057,14 +1050,18 @@ class FieldCacheImpl implements FieldCache { int start = term.offset + term.length; if (start >= end) { // switch byte blocks - if (currentBlockNumber +1 >= blocks.length) { + if (currentBlockNumber+1 >= blocks.length) { + assert currentOrd+1 == numOrd: "currentOrd=" + currentOrd + " numOrd=" + numOrd; return null; } currentBlockNumber++; term.bytes = blocks[currentBlockNumber]; end = blockEnds[currentBlockNumber]; start = 0; - if (end<=0) return null; // special case of empty last array + if (end<=0) { + assert currentOrd+1 == numOrd; + return null; // special case of empty last array + } } currentOrd++; @@ -1131,6 +1128,12 @@ class FieldCacheImpl implements FieldCache { } } + // nocommit for DV if you ask for sorted or binary we + // should check sorted first? + + // nocommit woudl be nice if .getTErms would return a + // DocTermsIndex if one already existed + public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException { return getTermsIndex(reader, field, PackedInts.FAST); } @@ -1180,12 +1183,6 @@ class FieldCacheImpl implements FieldCache { // nocommit: to the codec api? or can that termsenum just use this thing? return null; } - - @Override - public Reader getDocToOrd() { - // nocommit: add this to the codec api! - return null; - } }; } else { @@ -1206,6 +1203,7 @@ class FieldCacheImpl implements FieldCache { termCountHardLimit = maxDoc+1; } + // nocommit use Uninvert? if (terms != null) { // Try for coarse estimate for number of bits; this // should be an underestimate most of the time, which @@ -1238,8 +1236,9 @@ class FieldCacheImpl implements FieldCache { final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio); // 0 is reserved for "unset" - bytes.copyUsingLengthPrefix(new BytesRef()); - int termOrd = 1; + int termOrd = 0; + + // nocommit use Uninvert? if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); @@ -1267,7 +1266,7 @@ class FieldCacheImpl implements FieldCache { if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } - docToTermOrd.set(docID, termOrd); + docToTermOrd.set(docID, 1+termOrd); } termOrd++; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java index d02c8401f93..8ba3c9b5407 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java @@ -97,35 +97,34 @@ public abstract class FieldCacheRangeFilter extends Filter { final int inclusiveLowerPoint, inclusiveUpperPoint; // Hints: - // * binarySearchLookup returns 0, if value was null. + // * binarySearchLookup returns -1, if value was null. // * the value is <0 if no exact hit was found, the returned value // is (-(insertion point) - 1) - if (lowerPoint == 0) { - assert lowerVal == null; - inclusiveLowerPoint = 1; - } else if (includeLower && lowerPoint > 0) { + if (lowerPoint == -1 && lowerVal == null) { + inclusiveLowerPoint = 0; + } else if (includeLower && lowerPoint >= 0) { inclusiveLowerPoint = lowerPoint; - } else if (lowerPoint > 0) { + } else if (lowerPoint >= 0) { inclusiveLowerPoint = lowerPoint + 1; } else { - inclusiveLowerPoint = Math.max(1, -lowerPoint - 1); + inclusiveLowerPoint = Math.max(0, -lowerPoint - 1); } - if (upperPoint == 0) { - assert upperVal == null; + if (upperPoint == -1 && upperVal == null) { inclusiveUpperPoint = Integer.MAX_VALUE; - } else if (includeUpper && upperPoint > 0) { + } else if (includeUpper && upperPoint >= 0) { inclusiveUpperPoint = upperPoint; - } else if (upperPoint > 0) { + } else if (upperPoint >= 0) { inclusiveUpperPoint = upperPoint - 1; } else { inclusiveUpperPoint = -upperPoint - 2; } - if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint) + if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) { return DocIdSet.EMPTY_DOCIDSET; + } - assert inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0; + assert inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0; return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) { @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java index bb837c4ff76..11caa9c84d5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java @@ -122,15 +122,21 @@ public class FieldCacheTermsFilter extends Filter { final FixedBitSet bits = new FixedBitSet(fcsi.numOrd()); final BytesRef spare = new BytesRef(); for (int i=0;i 0) { - bits.set(termNumber); + int ord = fcsi.binarySearchLookup(terms[i], spare); + if (ord >= 0) { + bits.set(ord); } } return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) { @Override protected final boolean matchDoc(int doc) { - return bits.get(fcsi.getOrd(doc)); + int ord = fcsi.getOrd(doc); + if (ord == -1) { + // missing + return false; + } else { + return bits.get(ord); + } } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java index 8145501580f..3df7bb1176e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java @@ -1171,6 +1171,9 @@ public abstract class FieldComparator { return docValue.compareTo(value); } + // nocommit remove null from FC DocTerms/Index as an + // allowed value + /** Base class for specialized (per bit width of the * ords) per-segment comparator. NOTE: this is messy; * we do this only because hotspot can't reliably inline @@ -1218,191 +1221,6 @@ public abstract class FieldComparator { } } - // Used per-segment when bit width of doc->ord is 8: - private final class ByteOrdComparator extends PerSegmentComparator { - private final byte[] readerOrds; - private final DocTermsIndex termsIndex; - private final int docBase; - - public ByteOrdComparator(byte[] readerOrds, DocTermsIndex termsIndex, int docBase) { - this.readerOrds = readerOrds; - this.termsIndex = termsIndex; - this.docBase = docBase; - } - - @Override - public int compareBottom(int doc) { - assert bottomSlot != -1; - final int docOrd = (readerOrds[doc]&0xFF); - if (bottomSameReader) { - // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; - } else if (bottomOrd >= docOrd) { - // the equals case always means bottom is > doc - // (because we set bottomOrd to the lower bound in - // setBottom): - return 1; - } else { - return -1; - } - } - - @Override - public void copy(int slot, int doc) { - final int ord = readerOrds[doc]&0xFF; - ords[slot] = ord; - if (ord == 0) { - values[slot] = null; - } else { - assert ord > 0; - if (values[slot] == null) { - values[slot] = new BytesRef(); - } - termsIndex.lookup(ord, values[slot]); - } - readerGen[slot] = currentReaderGen; - } - } - - // Used per-segment when bit width of doc->ord is 16: - private final class ShortOrdComparator extends PerSegmentComparator { - private final short[] readerOrds; - private final DocTermsIndex termsIndex; - private final int docBase; - - public ShortOrdComparator(short[] readerOrds, DocTermsIndex termsIndex, int docBase) { - this.readerOrds = readerOrds; - this.termsIndex = termsIndex; - this.docBase = docBase; - } - - @Override - public int compareBottom(int doc) { - assert bottomSlot != -1; - final int docOrd = (readerOrds[doc]&0xFFFF); - if (bottomSameReader) { - // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; - } else if (bottomOrd >= docOrd) { - // the equals case always means bottom is > doc - // (because we set bottomOrd to the lower bound in - // setBottom): - return 1; - } else { - return -1; - } - } - - @Override - public void copy(int slot, int doc) { - final int ord = readerOrds[doc]&0xFFFF; - ords[slot] = ord; - if (ord == 0) { - values[slot] = null; - } else { - assert ord > 0; - if (values[slot] == null) { - values[slot] = new BytesRef(); - } - termsIndex.lookup(ord, values[slot]); - } - readerGen[slot] = currentReaderGen; - } - } - - // Used per-segment when bit width of doc->ord is 32: - private final class IntOrdComparator extends PerSegmentComparator { - private final int[] readerOrds; - private final DocTermsIndex termsIndex; - private final int docBase; - - public IntOrdComparator(int[] readerOrds, DocTermsIndex termsIndex, int docBase) { - this.readerOrds = readerOrds; - this.termsIndex = termsIndex; - this.docBase = docBase; - } - - @Override - public int compareBottom(int doc) { - assert bottomSlot != -1; - final int docOrd = readerOrds[doc]; - if (bottomSameReader) { - // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; - } else if (bottomOrd >= docOrd) { - // the equals case always means bottom is > doc - // (because we set bottomOrd to the lower bound in - // setBottom): - return 1; - } else { - return -1; - } - } - - @Override - public void copy(int slot, int doc) { - final int ord = readerOrds[doc]; - ords[slot] = ord; - if (ord == 0) { - values[slot] = null; - } else { - assert ord > 0; - if (values[slot] == null) { - values[slot] = new BytesRef(); - } - termsIndex.lookup(ord, values[slot]); - } - readerGen[slot] = currentReaderGen; - } - } - - // Used per-segment when bit width is not a native array - // size (8, 16, 32): - private final class AnyDocToOrdComparator extends PerSegmentComparator { - private final PackedInts.Reader readerOrds; - private final DocTermsIndex termsIndex; - private final int docBase; - - public AnyDocToOrdComparator(PackedInts.Reader readerOrds, DocTermsIndex termsIndex, int docBase) { - this.readerOrds = readerOrds; - this.termsIndex = termsIndex; - this.docBase = docBase; - } - - @Override - public int compareBottom(int doc) { - assert bottomSlot != -1; - final int docOrd = (int) readerOrds.get(doc); - if (bottomSameReader) { - // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; - } else if (bottomOrd >= docOrd) { - // the equals case always means bottom is > doc - // (because we set bottomOrd to the lower bound in - // setBottom): - return 1; - } else { - return -1; - } - } - - @Override - public void copy(int slot, int doc) { - final int ord = (int) readerOrds.get(doc); - ords[slot] = ord; - if (ord == 0) { - values[slot] = null; - } else { - assert ord > 0; - if (values[slot] == null) { - values[slot] = new BytesRef(); - } - termsIndex.lookup(ord, values[slot]); - } - readerGen[slot] = currentReaderGen; - } - } - // Used per-segment when docToOrd is null: private final class AnyOrdComparator extends PerSegmentComparator { private final DocTermsIndex termsIndex; @@ -1416,7 +1234,7 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; - final int docOrd = (int) termsIndex.getOrd(doc); + final int docOrd = termsIndex.getOrd(doc); if (bottomSameReader) { // ord is precisely comparable, even in the equal case return bottomOrd - docOrd; @@ -1432,12 +1250,12 @@ public abstract class FieldComparator { @Override public void copy(int slot, int doc) { - final int ord = (int) termsIndex.getOrd(doc); + final int ord = termsIndex.getOrd(doc); ords[slot] = ord; - if (ord == 0) { + if (ord == -1) { values[slot] = null; } else { - assert ord > 0; + assert ord >= 0; if (values[slot] == null) { values[slot] = new BytesRef(); } @@ -1451,29 +1269,7 @@ public abstract class FieldComparator { public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { final int docBase = context.docBase; termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field); - final PackedInts.Reader docToOrd = termsIndex.getDocToOrd(); - FieldComparator perSegComp = null; - if (docToOrd != null && docToOrd.hasArray()) { - final Object arr = docToOrd.getArray(); - if (arr instanceof byte[]) { - perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase); - } else if (arr instanceof short[]) { - perSegComp = new ShortOrdComparator((short[]) arr, termsIndex, docBase); - } else if (arr instanceof int[]) { - perSegComp = new IntOrdComparator((int[]) arr, termsIndex, docBase); - } - // Don't specialize the long[] case since it's not - // possible, ie, worse case is MAX_INT-1 docs with - // every one having a unique value. - } - if (perSegComp == null) { - if (docToOrd != null) { - perSegComp = new AnyDocToOrdComparator(docToOrd, termsIndex, docBase); - } else { - perSegComp = new AnyOrdComparator(termsIndex, docBase); - } - } - + FieldComparator perSegComp = new AnyOrdComparator(termsIndex, docBase); currentReaderGen++; if (bottomSlot != -1) { perSegComp.setBottom(bottomSlot); @@ -1492,9 +1288,9 @@ public abstract class FieldComparator { bottomSameReader = true; } else { if (bottomValue == null) { - // 0 ord is null for all segments - assert ords[bottomSlot] == 0; - bottomOrd = 0; + // -1 ord is null for all segments + assert ords[bottomSlot] == -1; + bottomOrd = -1; bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { @@ -2118,7 +1914,7 @@ public abstract class FieldComparator { } final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) { - return binarySearch(br, a, key, 1, a.numOrd()-1); + return binarySearch(br, a, key, 0, a.numOrd()-1); } final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key, int low, int high) { diff --git a/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java b/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java index d8e88396859..99134986684 100644 --- a/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java +++ b/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java @@ -494,6 +494,11 @@ public class TestDemoDocValue extends LuceneTestCase { directory.close(); } + // nocommit tests should fail if a codec makes the [easy] + // mistake of NOT opening all files when SimpleDVProducer + // is created ... frist cut of Lucene41 had this bug but + // no tests failed!? + public void testDemoSortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index aba49657baa..433e0656cb2 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1628,9 +1628,9 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(1, reader.docFreq(new Term("content", bigTerm))); FieldCache.DocTermsIndex dti = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(reader), "content", random().nextFloat() * PackedInts.FAST); - assertEquals(5, dti.numOrd()); // +1 for null ord + assertEquals(4, dti.numOrd()); assertEquals(4, dti.size()); - assertEquals(bigTermBytesRef, dti.lookup(3, new BytesRef())); + assertEquals(bigTermBytesRef, dti.lookup(2, new BytesRef())); reader.close(); dir.close(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java b/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java index 46cb61badab..da06242f427 100644 --- a/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java +++ b/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java @@ -144,7 +144,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod // fill into a OpenBitSet do { long ord = termsEnum.ord(); - if (ord > 0) { + if (ord >= 0) { termSet.set(ord); } } while (termsEnum.next() != null); @@ -155,7 +155,11 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) { @Override protected final boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { - return termSet.get(fcsi.getOrd(doc)); + int ord = fcsi.getOrd(doc); + if (ord == -1) { + return false; + } + return termSet.get(ord); } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java b/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java index cc77bc2d09e..e78f0d64a65 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestElevationComparator.java @@ -159,7 +159,7 @@ class ElevationComparatorSource extends FieldComparatorSource { private int docVal(int doc) { int ord = idIndex.getOrd(doc); - if (ord == 0) { + if (ord == -1) { return 0; } else { BytesRef id = idIndex.lookup(ord, tempBR); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java index 6c6c0a584c5..cdaded446cb 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java @@ -203,7 +203,7 @@ public class TestFieldCache extends LuceneTestCase { TermsEnum tenum = termsIndex.getTermsEnum(); BytesRef val = new BytesRef(); - for (int i=1; i= 0)) { // ensure first field is in order