diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e849b966d7c..1dea4a0735e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -214,6 +214,9 @@ API Changes a value for the field. Previously it only did this if the SortedDocValues was produced by uninversion on the FieldCache. (Robert Muir) +* LUCENE-5183: remove BinaryDocValues.MISSING. In order to determine a document + is missing a field, use getDocsWithField instead. (Robert Muir) + Changes in Runtime Behavior * LUCENE-5178: DocValues codec consumer APIs (iterables) return null values diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java index 9e6431d8b9d..67fe0b14486 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java @@ -30,17 +30,12 @@ public abstract class BinaryDocValues { /** Lookup the value for document. */ public abstract void get(int docID, BytesRef result); - - /** - * Indicates the value was missing for the document. - */ - public static final byte[] MISSING = new byte[0]; - /** An empty BinaryDocValues which returns {@link #MISSING} for every document */ + /** An empty BinaryDocValues which returns {@link BytesRef#EMPTY_BYTES} for every document */ public static final BinaryDocValues EMPTY = new BinaryDocValues() { @Override public void get(int docID, BytesRef result) { - result.bytes = MISSING; + result.bytes = BytesRef.EMPTY_BYTES; result.offset = 0; result.length = 0; } diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java index df36931a253..a00ce9d493a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java @@ -60,7 +60,7 @@ public abstract class SortedDocValues extends BinaryDocValues { public void get(int docID, BytesRef result) { int ord = getOrd(docID); if (ord == -1) { - result.bytes = MISSING; + result.bytes = BytesRef.EMPTY_BYTES; result.length = 0; result.offset = 0; } else { @@ -68,7 +68,7 @@ public abstract class SortedDocValues extends BinaryDocValues { } } - /** An empty SortedDocValues which returns {@link #MISSING} for every document */ + /** An empty SortedDocValues which returns {@link BytesRef#EMPTY_BYTES} for every document */ public static final SortedDocValues EMPTY = new SortedDocValues() { @Override public int getOrd(int docID) { @@ -77,7 +77,7 @@ public abstract class SortedDocValues extends BinaryDocValues { @Override public void lookupOrd(int ord, BytesRef result) { - result.bytes = MISSING; + result.bytes = BytesRef.EMPTY_BYTES; result.offset = 0; result.length = 0; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java index cce1024ed5d..f08621f8206 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java @@ -378,18 +378,19 @@ public interface FieldCache { * method to retrieve the term (as a BytesRef) per document. * @param reader Used to get field values. * @param field Which field contains the strings. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. * @return The values in the given field for each document. * @throws IOException If any error occurs. */ - public BinaryDocValues getTerms (AtomicReader reader, String field) - throws IOException; + public BinaryDocValues getTerms (AtomicReader reader, String field, boolean setDocsWithField) throws IOException; - /** Expert: just like {@link #getTerms(AtomicReader,String)}, + /** Expert: just like {@link #getTerms(AtomicReader,String,boolean)}, * but you can specify whether more RAM should be consumed in exchange for * faster lookups (default is "true"). Note that the * first call for a given reader and field "wins", * subsequent calls will share the same cache entry. */ - public BinaryDocValues getTerms (AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException; + public BinaryDocValues getTerms (AtomicReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException; /** Checks the internal cache for an appropriate entry, and if none * is found, reads the term values in field diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index b8e81d17ea4..85b77d902bc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -1056,7 +1056,7 @@ class FieldCacheImpl implements FieldCache { public void get(int docID, BytesRef ret) { final int pointer = (int) docToOffset.get(docID); if (pointer == 0) { - ret.bytes = MISSING; + ret.bytes = BytesRef.EMPTY_BYTES; ret.offset = 0; ret.length = 0; } else { @@ -1067,11 +1067,11 @@ class FieldCacheImpl implements FieldCache { // TODO: this if DocTermsIndex was already created, we // should share it... - public BinaryDocValues getTerms(AtomicReader reader, String field) throws IOException { - return getTerms(reader, field, PackedInts.FAST); + public BinaryDocValues getTerms(AtomicReader reader, String field, boolean setDocsWithField) throws IOException { + return getTerms(reader, field, setDocsWithField, PackedInts.FAST); } - public BinaryDocValues getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { + public BinaryDocValues getTerms(AtomicReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException { BinaryDocValues valuesIn = reader.getBinaryDocValues(field); if (valuesIn == null) { valuesIn = reader.getSortedDocValues(field); @@ -1092,7 +1092,7 @@ class FieldCacheImpl implements FieldCache { return BinaryDocValues.EMPTY; } - return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); + return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), setDocsWithField); } static final class BinaryDocValuesCache extends Cache { @@ -1101,7 +1101,7 @@ class FieldCacheImpl implements FieldCache { } @Override - protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */) + protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { // TODO: would be nice to first check if DocTermsIndex @@ -1170,8 +1170,22 @@ class FieldCacheImpl implements FieldCache { } } + final PackedInts.Reader offsetReader = docToOffset.getMutable(); + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, new Bits() { + @Override + public boolean get(int index) { + return offsetReader.get(index) != 0; + } + + @Override + public int length() { + return maxDoc; + } + }); + } // maybe an int-only impl? - return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable()); + return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java index 28e6144bd42..86e7967de4d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java @@ -893,6 +893,9 @@ public abstract class FieldComparator { return values[slot]; } } + + // just used internally in this comparator + private static final byte[] MISSING_BYTES = new byte[0]; /** Sorts by field's natural Term sort order. All * comparisons are done using BytesRef.compareTo, which is @@ -902,6 +905,7 @@ public abstract class FieldComparator { private BytesRef[] values; private BinaryDocValues docTerms; + private Bits docsWithField; private final String field; private BytesRef bottom; private final BytesRef tempBR = new BytesRef(); @@ -930,12 +934,15 @@ public abstract class FieldComparator { @Override public int compareBottom(int doc) { docTerms.get(doc, tempBR); - if (bottom.bytes == BinaryDocValues.MISSING) { - if (tempBR.bytes == BinaryDocValues.MISSING) { + if (tempBR.length == 0 && docsWithField.get(doc) == false) { + tempBR.bytes = MISSING_BYTES; + } + if (bottom.bytes == MISSING_BYTES) { + if (tempBR.bytes == MISSING_BYTES) { return 0; } return -1; - } else if (tempBR.bytes == BinaryDocValues.MISSING) { + } else if (tempBR.bytes == MISSING_BYTES) { return 1; } return bottom.compareTo(tempBR); @@ -947,11 +954,15 @@ public abstract class FieldComparator { values[slot] = new BytesRef(); } docTerms.get(doc, values[slot]); + if (values[slot].length == 0 && docsWithField.get(doc) == false) { + values[slot].bytes = MISSING_BYTES; + } } @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - docTerms = FieldCache.DEFAULT.getTerms(context.reader(), field); + docTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, true); + docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field); return this; } @@ -981,6 +992,9 @@ public abstract class FieldComparator { @Override public int compareDocToValue(int doc, BytesRef value) { docTerms.get(doc, tempBR); + if (tempBR.length == 0 && docsWithField.get(doc) == false) { + tempBR.bytes = MISSING_BYTES; + } return tempBR.compareTo(value); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index 64d0baf1c43..67c8e51c5bd 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -182,7 +182,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); - BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field"); + BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field", false); BytesRef bytes1 = new BytesRef(); s.get(0, bytes1); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesWithThreads.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesWithThreads.java index 17ce72b7c95..964fc518df1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesWithThreads.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesWithThreads.java @@ -79,7 +79,7 @@ public class TestDocValuesWithThreads extends LuceneTestCase { //NumericDocValues ndv = ar.getNumericDocValues("number"); FieldCache.Longs ndv = FieldCache.DEFAULT.getLongs(ar, "number", false); //BinaryDocValues bdv = ar.getBinaryDocValues("bytes"); - BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes"); + BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes", false); SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted"); startingGun.await(); int iters = atLeast(1000); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java index c36367277bb..572129341f9 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java @@ -250,12 +250,13 @@ public class TestFieldCache extends LuceneTestCase { termsIndex = cache.getTermsIndex(reader, "bogusfield"); // getTerms - BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString"); - assertSame("Second request to cache return same array", terms, cache.getTerms(reader, "theRandomUnicodeString")); + BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString", true); + assertSame("Second request to cache return same array", terms, cache.getTerms(reader, "theRandomUnicodeString", true)); + Bits bits = cache.getDocsWithField(reader, "theRandomUnicodeString"); for (int i = 0; i < NUM_DOCS; i++) { terms.get(i, br); final BytesRef term; - if (br.bytes == BinaryDocValues.MISSING) { + if (!bits.get(i)) { term = null; } else { term = br; @@ -265,7 +266,7 @@ public class TestFieldCache extends LuceneTestCase { } // test bad field - terms = cache.getTerms(reader, "bogusfield"); + terms = cache.getTerms(reader, "bogusfield", false); // getDocTermOrds SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"); @@ -305,7 +306,7 @@ public class TestFieldCache extends LuceneTestCase { writer.close(); IndexReader r = DirectoryReader.open(dir); AtomicReader reader = SlowCompositeReaderWrapper.wrap(r); - FieldCache.DEFAULT.getTerms(reader, "foobar"); + FieldCache.DEFAULT.getTerms(reader, "foobar", true); FieldCache.DEFAULT.getTermsIndex(reader, "foobar"); FieldCache.DEFAULT.purge(reader); r.close(); @@ -460,7 +461,7 @@ public class TestFieldCache extends LuceneTestCase { fail(); } catch (IllegalStateException expected) {} - BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary"); + BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true); binary.get(0, scratch); assertEquals("binary value", scratch.utf8ToString()); @@ -493,7 +494,7 @@ public class TestFieldCache extends LuceneTestCase { fail(); } catch (IllegalStateException expected) {} - binary = FieldCache.DEFAULT.getTerms(ar, "sorted"); + binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true); binary.get(0, scratch); assertEquals("sorted value", scratch.utf8ToString()); @@ -517,7 +518,7 @@ public class TestFieldCache extends LuceneTestCase { assertEquals(42, numeric.get(0)); try { - FieldCache.DEFAULT.getTerms(ar, "numeric"); + FieldCache.DEFAULT.getTerms(ar, "numeric", true); fail(); } catch (IllegalStateException expected) {} @@ -547,7 +548,7 @@ public class TestFieldCache extends LuceneTestCase { } catch (IllegalStateException expected) {} try { - FieldCache.DEFAULT.getTerms(ar, "sortedset"); + FieldCache.DEFAULT.getTerms(ar, "sortedset", true); fail(); } catch (IllegalStateException expected) {} @@ -603,14 +604,14 @@ public class TestFieldCache extends LuceneTestCase { assertEquals(0, doubles.get(0), 0.0D); BytesRef scratch = new BytesRef(); - BinaryDocValues binaries = cache.getTerms(ar, "bogusterms"); + BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true); binaries.get(0, scratch); - assertTrue(scratch.bytes == BinaryDocValues.MISSING); + assertEquals(0, scratch.length); SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex"); assertEquals(-1, sorted.getOrd(0)); sorted.get(0, scratch); - assertTrue(scratch.bytes == BinaryDocValues.MISSING); + assertEquals(0, scratch.length); SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued"); sortedSet.setDocument(0); @@ -662,14 +663,14 @@ public class TestFieldCache extends LuceneTestCase { assertEquals(0, doubles.get(0), 0.0D); BytesRef scratch = new BytesRef(); - BinaryDocValues binaries = cache.getTerms(ar, "bogusterms"); + BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true); binaries.get(0, scratch); - assertTrue(scratch.bytes == BinaryDocValues.MISSING); + assertEquals(0, scratch.length); SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex"); assertEquals(-1, sorted.getOrd(0)); sorted.get(0, scratch); - assertTrue(scratch.bytes == BinaryDocValues.MISSING); + assertEquals(0, scratch.length); SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued"); sortedSet.setDocument(0); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java b/lucene/core/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java index 895eaa603db..1626bb67c94 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestFieldCacheSanityChecker.java @@ -120,7 +120,7 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase { cache.purgeAllCaches(); cache.getInts(readerX, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, false); - cache.getTerms(readerX, "theInt"); + cache.getTerms(readerX, "theInt", false); // // // @@ -142,9 +142,9 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase { FieldCache cache = FieldCache.DEFAULT; cache.purgeAllCaches(); - cache.getTerms(readerA, "theInt"); - cache.getTerms(readerB, "theInt"); - cache.getTerms(readerX, "theInt"); + cache.getTerms(readerA, "theInt", false); + cache.getTerms(readerB, "theInt", false); + cache.getTerms(readerX, "theInt", false); // // // diff --git a/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestFailOnFieldCacheInsanity.java b/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestFailOnFieldCacheInsanity.java index 20c8e47cfc6..680efa08f28 100644 --- a/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestFailOnFieldCacheInsanity.java +++ b/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestFailOnFieldCacheInsanity.java @@ -56,7 +56,7 @@ public class TestFailOnFieldCacheInsanity extends WithNestedTests { public void testDummy() throws Exception { makeIndex(); assertNotNull(FieldCache.DEFAULT.getTermsIndex(subR, "ints")); - assertNotNull(FieldCache.DEFAULT.getTerms(subR, "ints")); + assertNotNull(FieldCache.DEFAULT.getTerms(subR, "ints", false)); // NOTE: do not close reader/directory, else it // purges FC entries } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java index afd589fa49e..49004b43b35 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java @@ -109,7 +109,7 @@ abstract class TermsCollector extends Collector { @Override public void setNextReader(AtomicReaderContext context) throws IOException { - fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field); + fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, false); } } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java index 7be115b57e0..dae42b57068 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java @@ -129,7 +129,7 @@ abstract class TermsWithScoreCollector extends Collector { @Override public void setNextReader(AtomicReaderContext context) throws IOException { - fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field); + fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, false); } static class Avg extends SV { diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index e067f0b0aac..f99f4d22588 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -47,6 +47,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; @@ -511,13 +512,14 @@ public class TestJoinUtil extends LuceneTestCase { private Scorer scorer; private BinaryDocValues terms; + private Bits docsWithField; private final BytesRef spare = new BytesRef(); @Override public void collect(int doc) throws IOException { terms.get(doc, spare); BytesRef joinValue = spare; - if (joinValue.bytes == BinaryDocValues.MISSING) { + if (joinValue.length == 0 && !docsWithField.get(doc)) { return; } @@ -530,7 +532,8 @@ public class TestJoinUtil extends LuceneTestCase { @Override public void setNextReader(AtomicReaderContext context) throws IOException { - terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField); + terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true); + docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField); } @Override @@ -628,7 +631,7 @@ public class TestJoinUtil extends LuceneTestCase { @Override public void setNextReader(AtomicReaderContext context) throws IOException { - terms = FieldCache.DEFAULT.getTerms(context.reader(), toField); + terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false); docBase = context.docBase; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java index 3148a23d196..950b07dbbe3 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java @@ -148,8 +148,16 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { @Override public void fillValue(int doc) { - termsIndex.get(doc, mval.value); - mval.exists = mval.value.bytes != SortedDocValues.MISSING; + int ord = termsIndex.getOrd(doc); + if (ord == -1) { + mval.value.bytes = BytesRef.EMPTY_BYTES; + mval.value.offset = 0; + mval.value.length = 0; + mval.exists = false; + } else { + termsIndex.lookupOrd(ord, mval.value); + mval.exists = true; + } } }; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java index 871c94cc50e..5d33ef301b7 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java @@ -45,8 +45,8 @@ public class BytesRefFieldSource extends FieldCacheSource { // To be sorted or not to be sorted, that is the question // TODO: do it cleaner? if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) { + final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field, true); final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field); - final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field); return new FunctionValues() { @Override diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/JoinDocFreqValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/JoinDocFreqValueSource.java index c59ef21e5c6..173ca579b84 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/JoinDocFreqValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/JoinDocFreqValueSource.java @@ -56,7 +56,7 @@ public class JoinDocFreqValueSource extends FieldCacheSource { @Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { - final BinaryDocValues terms = cache.getTerms(readerContext.reader(), field, PackedInts.FAST); + final BinaryDocValues terms = cache.getTerms(readerContext.reader(), field, false, PackedInts.FAST); final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader(); Terms t = MultiFields.getTerms(top, qfield); final TermsEnum termsEnum = t == null ? TermsEnum.EMPTY : t.iterator(null); diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java index b7f8d11828e..b4a0e1904b5 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; /** Sorts by a field's value using the given Collator @@ -39,6 +40,7 @@ public final class SlowCollatedStringComparator extends FieldComparator private final String[] values; private BinaryDocValues currentDocTerms; + private Bits docsWithField; private final String field; final Collator collator; private String bottom; @@ -68,7 +70,7 @@ public final class SlowCollatedStringComparator extends FieldComparator @Override public int compareBottom(int doc) { currentDocTerms.get(doc, tempBR); - final String val2 = tempBR.bytes == BinaryDocValues.MISSING ? null : tempBR.utf8ToString(); + final String val2 = tempBR.length == 0 && docsWithField.get(doc) == false ? null : tempBR.utf8ToString(); if (bottom == null) { if (val2 == null) { return 0; @@ -83,7 +85,7 @@ public final class SlowCollatedStringComparator extends FieldComparator @Override public void copy(int slot, int doc) { currentDocTerms.get(doc, tempBR); - if (tempBR.bytes == BinaryDocValues.MISSING) { + if (tempBR.length == 0 && docsWithField.get(doc) == false) { values[slot] = null; } else { values[slot] = tempBR.utf8ToString(); @@ -92,7 +94,8 @@ public final class SlowCollatedStringComparator extends FieldComparator @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field); + currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, true); + docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field); return this; } @@ -124,7 +127,7 @@ public final class SlowCollatedStringComparator extends FieldComparator public int compareDocToValue(int doc, String value) { currentDocTerms.get(doc, tempBR); final String docValue; - if (tempBR.bytes == BinaryDocValues.MISSING) { + if (tempBR.length == 0 && docsWithField.get(doc) == false) { docValue = null; } else { docValue = tempBR.utf8ToString(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java index 44942785cb5..53d12ae2dd9 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java @@ -2635,7 +2635,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase { AtomicReader ar = SlowCompositeReaderWrapper.wrap(r); - BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field"); + BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false); for(int docID=0;docID