LUCENE-5183: remove BinaryDocValues.MISSING

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1518997 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-08-30 15:46:03 +00:00
parent 99fdfb3d63
commit f403e3760e
19 changed files with 104 additions and 62 deletions

View File

@ -214,6 +214,9 @@ API Changes
a value for the field. Previously it only did this if the SortedDocValues
was produced by uninversion on the FieldCache. (Robert Muir)
* LUCENE-5183: remove BinaryDocValues.MISSING. In order to determine a document
is missing a field, use getDocsWithField instead. (Robert Muir)
Changes in Runtime Behavior
* LUCENE-5178: DocValues codec consumer APIs (iterables) return null values

View File

@ -30,17 +30,12 @@ public abstract class BinaryDocValues {
/** Lookup the value for document. */
public abstract void get(int docID, BytesRef result);
/**
* Indicates the value was missing for the document.
*/
public static final byte[] MISSING = new byte[0];
/** An empty BinaryDocValues which returns {@link #MISSING} for every document */
/** An empty BinaryDocValues which returns {@link BytesRef#EMPTY_BYTES} for every document */
public static final BinaryDocValues EMPTY = new BinaryDocValues() {
@Override
public void get(int docID, BytesRef result) {
result.bytes = MISSING;
result.bytes = BytesRef.EMPTY_BYTES;
result.offset = 0;
result.length = 0;
}

View File

@ -60,7 +60,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
public void get(int docID, BytesRef result) {
int ord = getOrd(docID);
if (ord == -1) {
result.bytes = MISSING;
result.bytes = BytesRef.EMPTY_BYTES;
result.length = 0;
result.offset = 0;
} else {
@ -68,7 +68,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
}
}
/** An empty SortedDocValues which returns {@link #MISSING} for every document */
/** An empty SortedDocValues which returns {@link BytesRef#EMPTY_BYTES} for every document */
public static final SortedDocValues EMPTY = new SortedDocValues() {
@Override
public int getOrd(int docID) {
@ -77,7 +77,7 @@ public abstract class SortedDocValues extends BinaryDocValues {
@Override
public void lookupOrd(int ord, BytesRef result) {
result.bytes = MISSING;
result.bytes = BytesRef.EMPTY_BYTES;
result.offset = 0;
result.length = 0;
}

View File

@ -378,18 +378,19 @@ public interface FieldCache {
* method to retrieve the term (as a BytesRef) per document.
* @param reader Used to get field values.
* @param field Which field contains the strings.
* @param setDocsWithField If true then {@link #getDocsWithField} will
* also be computed and stored in the FieldCache.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public BinaryDocValues getTerms (AtomicReader reader, String field)
throws IOException;
public BinaryDocValues getTerms (AtomicReader reader, String field, boolean setDocsWithField) throws IOException;
/** Expert: just like {@link #getTerms(AtomicReader,String)},
/** Expert: just like {@link #getTerms(AtomicReader,String,boolean)},
* but you can specify whether more RAM should be consumed in exchange for
* faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */
public BinaryDocValues getTerms (AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException;
public BinaryDocValues getTerms (AtomicReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found, reads the term values in <code>field</code>

View File

@ -1056,7 +1056,7 @@ class FieldCacheImpl implements FieldCache {
public void get(int docID, BytesRef ret) {
final int pointer = (int) docToOffset.get(docID);
if (pointer == 0) {
ret.bytes = MISSING;
ret.bytes = BytesRef.EMPTY_BYTES;
ret.offset = 0;
ret.length = 0;
} else {
@ -1067,11 +1067,11 @@ class FieldCacheImpl implements FieldCache {
// TODO: this if DocTermsIndex was already created, we
// should share it...
public BinaryDocValues getTerms(AtomicReader reader, String field) throws IOException {
return getTerms(reader, field, PackedInts.FAST);
public BinaryDocValues getTerms(AtomicReader reader, String field, boolean setDocsWithField) throws IOException {
return getTerms(reader, field, setDocsWithField, PackedInts.FAST);
}
public BinaryDocValues getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
public BinaryDocValues getTerms(AtomicReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException {
BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
if (valuesIn == null) {
valuesIn = reader.getSortedDocValues(field);
@ -1092,7 +1092,7 @@ class FieldCacheImpl implements FieldCache {
return BinaryDocValues.EMPTY;
}
return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false);
return (BinaryDocValues) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), setDocsWithField);
}
static final class BinaryDocValuesCache extends Cache {
@ -1101,7 +1101,7 @@ class FieldCacheImpl implements FieldCache {
}
@Override
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField)
throws IOException {
// TODO: would be nice to first check if DocTermsIndex
@ -1170,8 +1170,22 @@ class FieldCacheImpl implements FieldCache {
}
}
final PackedInts.Reader offsetReader = docToOffset.getMutable();
if (setDocsWithField) {
wrapper.setDocsWithField(reader, key.field, new Bits() {
@Override
public boolean get(int index) {
return offsetReader.get(index) != 0;
}
@Override
public int length() {
return maxDoc;
}
});
}
// maybe an int-only impl?
return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable());
return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader);
}
}

View File

@ -893,6 +893,9 @@ public abstract class FieldComparator<T> {
return values[slot];
}
}
// just used internally in this comparator
private static final byte[] MISSING_BYTES = new byte[0];
/** Sorts by field's natural Term sort order. All
* comparisons are done using BytesRef.compareTo, which is
@ -902,6 +905,7 @@ public abstract class FieldComparator<T> {
private BytesRef[] values;
private BinaryDocValues docTerms;
private Bits docsWithField;
private final String field;
private BytesRef bottom;
private final BytesRef tempBR = new BytesRef();
@ -930,12 +934,15 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) {
docTerms.get(doc, tempBR);
if (bottom.bytes == BinaryDocValues.MISSING) {
if (tempBR.bytes == BinaryDocValues.MISSING) {
if (tempBR.length == 0 && docsWithField.get(doc) == false) {
tempBR.bytes = MISSING_BYTES;
}
if (bottom.bytes == MISSING_BYTES) {
if (tempBR.bytes == MISSING_BYTES) {
return 0;
}
return -1;
} else if (tempBR.bytes == BinaryDocValues.MISSING) {
} else if (tempBR.bytes == MISSING_BYTES) {
return 1;
}
return bottom.compareTo(tempBR);
@ -947,11 +954,15 @@ public abstract class FieldComparator<T> {
values[slot] = new BytesRef();
}
docTerms.get(doc, values[slot]);
if (values[slot].length == 0 && docsWithField.get(doc) == false) {
values[slot].bytes = MISSING_BYTES;
}
}
@Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
docTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
docTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, true);
docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field);
return this;
}
@ -981,6 +992,9 @@ public abstract class FieldComparator<T> {
@Override
public int compareDocToValue(int doc, BytesRef value) {
docTerms.get(doc, tempBR);
if (tempBR.length == 0 && docsWithField.get(doc) == false) {
tempBR.bytes = MISSING_BYTES;
}
return tempBR.compareTo(value);
}
}

View File

@ -182,7 +182,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = w.getReader();
BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field");
BinaryDocValues s = FieldCache.DEFAULT.getTerms(getOnlySegmentReader(r), "field", false);
BytesRef bytes1 = new BytesRef();
s.get(0, bytes1);

View File

@ -79,7 +79,7 @@ public class TestDocValuesWithThreads extends LuceneTestCase {
//NumericDocValues ndv = ar.getNumericDocValues("number");
FieldCache.Longs ndv = FieldCache.DEFAULT.getLongs(ar, "number", false);
//BinaryDocValues bdv = ar.getBinaryDocValues("bytes");
BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes");
BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes", false);
SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
startingGun.await();
int iters = atLeast(1000);

View File

@ -250,12 +250,13 @@ public class TestFieldCache extends LuceneTestCase {
termsIndex = cache.getTermsIndex(reader, "bogusfield");
// getTerms
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
assertSame("Second request to cache return same array", terms, cache.getTerms(reader, "theRandomUnicodeString"));
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString", true);
assertSame("Second request to cache return same array", terms, cache.getTerms(reader, "theRandomUnicodeString", true));
Bits bits = cache.getDocsWithField(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
terms.get(i, br);
final BytesRef term;
if (br.bytes == BinaryDocValues.MISSING) {
if (!bits.get(i)) {
term = null;
} else {
term = br;
@ -265,7 +266,7 @@ public class TestFieldCache extends LuceneTestCase {
}
// test bad field
terms = cache.getTerms(reader, "bogusfield");
terms = cache.getTerms(reader, "bogusfield", false);
// getDocTermOrds
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
@ -305,7 +306,7 @@ public class TestFieldCache extends LuceneTestCase {
writer.close();
IndexReader r = DirectoryReader.open(dir);
AtomicReader reader = SlowCompositeReaderWrapper.wrap(r);
FieldCache.DEFAULT.getTerms(reader, "foobar");
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
FieldCache.DEFAULT.purge(reader);
r.close();
@ -460,7 +461,7 @@ public class TestFieldCache extends LuceneTestCase {
fail();
} catch (IllegalStateException expected) {}
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary");
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true);
binary.get(0, scratch);
assertEquals("binary value", scratch.utf8ToString());
@ -493,7 +494,7 @@ public class TestFieldCache extends LuceneTestCase {
fail();
} catch (IllegalStateException expected) {}
binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true);
binary.get(0, scratch);
assertEquals("sorted value", scratch.utf8ToString());
@ -517,7 +518,7 @@ public class TestFieldCache extends LuceneTestCase {
assertEquals(42, numeric.get(0));
try {
FieldCache.DEFAULT.getTerms(ar, "numeric");
FieldCache.DEFAULT.getTerms(ar, "numeric", true);
fail();
} catch (IllegalStateException expected) {}
@ -547,7 +548,7 @@ public class TestFieldCache extends LuceneTestCase {
} catch (IllegalStateException expected) {}
try {
FieldCache.DEFAULT.getTerms(ar, "sortedset");
FieldCache.DEFAULT.getTerms(ar, "sortedset", true);
fail();
} catch (IllegalStateException expected) {}
@ -603,14 +604,14 @@ public class TestFieldCache extends LuceneTestCase {
assertEquals(0, doubles.get(0), 0.0D);
BytesRef scratch = new BytesRef();
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
binaries.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
assertEquals(0, scratch.length);
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(-1, sorted.getOrd(0));
sorted.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
assertEquals(0, scratch.length);
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued");
sortedSet.setDocument(0);
@ -662,14 +663,14 @@ public class TestFieldCache extends LuceneTestCase {
assertEquals(0, doubles.get(0), 0.0D);
BytesRef scratch = new BytesRef();
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
binaries.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
assertEquals(0, scratch.length);
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(-1, sorted.getOrd(0));
sorted.get(0, scratch);
assertTrue(scratch.bytes == BinaryDocValues.MISSING);
assertEquals(0, scratch.length);
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued");
sortedSet.setDocument(0);

View File

@ -120,7 +120,7 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase {
cache.purgeAllCaches();
cache.getInts(readerX, "theInt", FieldCache.NUMERIC_UTILS_INT_PARSER, false);
cache.getTerms(readerX, "theInt");
cache.getTerms(readerX, "theInt", false);
// // //
@ -142,9 +142,9 @@ public class TestFieldCacheSanityChecker extends LuceneTestCase {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getTerms(readerA, "theInt");
cache.getTerms(readerB, "theInt");
cache.getTerms(readerX, "theInt");
cache.getTerms(readerA, "theInt", false);
cache.getTerms(readerB, "theInt", false);
cache.getTerms(readerX, "theInt", false);
// // //

View File

@ -56,7 +56,7 @@ public class TestFailOnFieldCacheInsanity extends WithNestedTests {
public void testDummy() throws Exception {
makeIndex();
assertNotNull(FieldCache.DEFAULT.getTermsIndex(subR, "ints"));
assertNotNull(FieldCache.DEFAULT.getTerms(subR, "ints"));
assertNotNull(FieldCache.DEFAULT.getTerms(subR, "ints", false));
// NOTE: do not close reader/directory, else it
// purges FC entries
}

View File

@ -109,7 +109,7 @@ abstract class TermsCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, false);
}
}

View File

@ -129,7 +129,7 @@ abstract class TermsWithScoreCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, false);
}
static class Avg extends SV {

View File

@ -47,6 +47,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
@ -511,13 +512,14 @@ public class TestJoinUtil extends LuceneTestCase {
private Scorer scorer;
private BinaryDocValues terms;
private Bits docsWithField;
private final BytesRef spare = new BytesRef();
@Override
public void collect(int doc) throws IOException {
terms.get(doc, spare);
BytesRef joinValue = spare;
if (joinValue.bytes == BinaryDocValues.MISSING) {
if (joinValue.length == 0 && !docsWithField.get(doc)) {
return;
}
@ -530,7 +532,8 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField);
terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true);
docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField);
}
@Override
@ -628,7 +631,7 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
terms = FieldCache.DEFAULT.getTerms(context.reader(), toField);
terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false);
docBase = context.docBase;
}

View File

@ -148,8 +148,16 @@ public abstract class DocTermsIndexDocValues extends FunctionValues {
@Override
public void fillValue(int doc) {
termsIndex.get(doc, mval.value);
mval.exists = mval.value.bytes != SortedDocValues.MISSING;
int ord = termsIndex.getOrd(doc);
if (ord == -1) {
mval.value.bytes = BytesRef.EMPTY_BYTES;
mval.value.offset = 0;
mval.value.length = 0;
mval.exists = false;
} else {
termsIndex.lookupOrd(ord, mval.value);
mval.exists = true;
}
}
};
}

View File

@ -45,8 +45,8 @@ public class BytesRefFieldSource extends FieldCacheSource {
// To be sorted or not to be sorted, that is the question
// TODO: do it cleaner?
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field, true);
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field);
final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
return new FunctionValues() {
@Override

View File

@ -56,7 +56,7 @@ public class JoinDocFreqValueSource extends FieldCacheSource {
@Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException
{
final BinaryDocValues terms = cache.getTerms(readerContext.reader(), field, PackedInts.FAST);
final BinaryDocValues terms = cache.getTerms(readerContext.reader(), field, false, PackedInts.FAST);
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
Terms t = MultiFields.getTerms(top, qfield);
final TermsEnum termsEnum = t == null ? TermsEnum.EMPTY : t.iterator(null);

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
/** Sorts by a field's value using the given Collator
@ -39,6 +40,7 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
private final String[] values;
private BinaryDocValues currentDocTerms;
private Bits docsWithField;
private final String field;
final Collator collator;
private String bottom;
@ -68,7 +70,7 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
@Override
public int compareBottom(int doc) {
currentDocTerms.get(doc, tempBR);
final String val2 = tempBR.bytes == BinaryDocValues.MISSING ? null : tempBR.utf8ToString();
final String val2 = tempBR.length == 0 && docsWithField.get(doc) == false ? null : tempBR.utf8ToString();
if (bottom == null) {
if (val2 == null) {
return 0;
@ -83,7 +85,7 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
@Override
public void copy(int slot, int doc) {
currentDocTerms.get(doc, tempBR);
if (tempBR.bytes == BinaryDocValues.MISSING) {
if (tempBR.length == 0 && docsWithField.get(doc) == false) {
values[slot] = null;
} else {
values[slot] = tempBR.utf8ToString();
@ -92,7 +94,8 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
@Override
public FieldComparator<String> setNextReader(AtomicReaderContext context) throws IOException {
currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, true);
docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field);
return this;
}
@ -124,7 +127,7 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
public int compareDocToValue(int doc, String value) {
currentDocTerms.get(doc, tempBR);
final String docValue;
if (tempBR.bytes == BinaryDocValues.MISSING) {
if (tempBR.length == 0 && docsWithField.get(doc) == false) {
docValue = null;
} else {
docValue = tempBR.utf8ToString();

View File

@ -2635,7 +2635,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
for(int docID=0;docID<docBytes.size();docID++) {
StoredDocument doc = ar.document(docID);
BytesRef bytes = new BytesRef();
@ -2707,7 +2707,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
for(int docID=0;docID<docBytes.size();docID++) {
StoredDocument doc = ar.document(docID);
BytesRef bytes = new BytesRef();