GITHUB#13449: Sparse index, optional skip list on top of doc values (#13449)

Optional skip list on top of doc values which is exposed via the DocValuesSkipper abstraction. A new flag is 
added to FieldType.java that configures whether to create a "skip index" for doc values.

Co-authored-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
Ignacio Vera 2024-06-13 10:17:50 +02:00 committed by GitHub
parent 1c655823dd
commit 048770205c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
63 changed files with 2177 additions and 55 deletions

View File

@ -129,6 +129,10 @@ New Features
* GITHUB#13233: Add RomanianNormalizationFilter (Trey Jones, Robert Muir)
* GITHUB#13449: Sparse index: optional skip list on top of doc values which is exposed via the
DocValuesSkipper abstraction. A new flag is added to FieldType.java that configures whether
to create a "skip index" for doc values. (Ignacio Vera)
Improvements
---------------------

View File

@ -209,6 +209,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
false,
dvGen,
attributes,
pointDataDimensionCount,

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
@ -1677,6 +1678,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
}
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) {
return null;
}
@Override
public void checkIntegrity() throws IOException {
CodecUtil.checksumEntireFile(data);

View File

@ -186,6 +186,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
false,
dvGen,
attributes,
pointDataDimensionCount,

View File

@ -16,13 +16,16 @@
*/
package org.apache.lucene.codecs.simpletext;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.DOCCOUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXVALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORIGIN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE;
@ -40,6 +43,7 @@ import java.util.function.IntFunction;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
@ -59,12 +63,15 @@ import org.apache.lucene.util.StringHelper;
class SimpleTextDocValuesReader extends DocValuesProducer {
static class OneField {
int docCount;
long dataStartFilePointer;
String pattern;
String ordPattern;
int maxLength;
boolean fixedLength;
long origin;
long minValue;
long maxValue;
long numValues;
}
@ -99,17 +106,34 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
DocValuesType dvType = DocValuesType.valueOf(stripPrefix(TYPE));
assert dvType != DocValuesType.NONE;
if (dvType == DocValuesType.NUMERIC) {
if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED_NUMERIC) {
readLine();
assert startsWith(MINVALUE)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.minValue = Long.parseLong(stripPrefix(MINVALUE));
readLine();
assert startsWith(MAXVALUE)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.maxValue = Long.parseLong(stripPrefix(MAXVALUE));
}
readLine();
assert startsWith(DOCCOUNT)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.docCount = Integer.parseInt(stripPrefix(DOCCOUNT));
if (dvType == DocValuesType.NUMERIC) {
readLine();
assert startsWith(ORIGIN)
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
field.origin = Long.parseLong(stripPrefix(ORIGIN));
readLine();
assert startsWith(PATTERN);
field.pattern = stripPrefix(PATTERN);
field.dataStartFilePointer = data.getFilePointer();
data.seek(data.getFilePointer() + (1 + field.pattern.length() + 2) * (long) maxDoc);
} else if (dvType == DocValuesType.BINARY) {
} else if (dvType == DocValuesType.BINARY || dvType == DocValuesType.SORTED_NUMERIC) {
readLine();
assert startsWith(MAXLENGTH);
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
@ -225,7 +249,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
throw new CorruptIndexException("failed to parse BigDecimal value", in, pe);
}
SimpleTextUtil.readLine(in, scratch); // read the line telling us if it's real or not
return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
return BigInteger.valueOf(field.origin).add(bd.toBigIntegerExact()).longValue();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
@ -824,4 +848,82 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
}
}
}
@Override
public DocValuesSkipper getSkipper(FieldInfo fieldInfo) {
final boolean numeric =
fieldInfo.getDocValuesType() == DocValuesType.NUMERIC
|| fieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
final OneField field = fields.get(fieldInfo.name);
// SegmentCoreReaders already verifies this field is
// valid:
assert field != null;
return new DocValuesSkipper() {
int doc = -1;
@Override
public int numLevels() {
return 1;
}
@Override
public long minValue(int level) {
return minValue();
}
@Override
public long maxValue(int level) {
return maxValue();
}
@Override
public int docCount(int level) {
return docCount();
}
@Override
public long minValue() {
return numeric ? field.minValue : 0;
}
@Override
public long maxValue() {
return numeric ? field.maxValue : field.numValues - 1;
}
@Override
public int docCount() {
return field.docCount;
}
@Override
public int minDocID(int level) {
if (doc == -1) {
return -1;
} else if (doc >= maxDoc || field.docCount == 0) {
return DocIdSetIterator.NO_MORE_DOCS;
} else {
return 0;
}
}
@Override
public int maxDocID(int level) {
if (doc == -1) {
return -1;
} else if (doc >= maxDoc || field.docCount == 0) {
return DocIdSetIterator.NO_MORE_DOCS;
} else {
return maxDoc;
}
}
@Override
public void advance(int target) {
doc = target;
}
};
}
}

View File

@ -46,8 +46,13 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
static final BytesRef END = new BytesRef("END");
static final BytesRef FIELD = new BytesRef("field ");
static final BytesRef TYPE = new BytesRef(" type ");
static final BytesRef DOCCOUNT = new BytesRef(" doccount ");
// used for numerics
static final BytesRef MINVALUE = new BytesRef(" minvalue ");
static final BytesRef ORIGIN = new BytesRef(" origin "); // for deltas
static final BytesRef MINVALUE = new BytesRef(" minalue ");
static final BytesRef MAXVALUE = new BytesRef(" maxvalue ");
static final BytesRef PATTERN = new BytesRef(" pattern ");
// used for bytes
static final BytesRef LENGTH = new BytesRef("length ");
@ -97,13 +102,27 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
maxValue = Math.max(maxValue, v);
numValues++;
}
// write absolute min and max for skipper
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, MAXVALUE);
SimpleTextUtil.write(data, Long.toString(maxValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(numValues), scratch);
SimpleTextUtil.writeNewline(data);
if (numValues != numDocs) {
minValue = Math.min(minValue, 0);
maxValue = Math.max(maxValue, 0);
}
// write our minimum value to the .dat, all entries are deltas from that
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, ORIGIN);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
@ -161,6 +180,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.BINARY;
writeFieldEntry(field, DocValuesType.BINARY);
doAddBinaryField(field, valuesProducer);
}
@ -168,10 +188,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
throws IOException {
int maxLength = 0;
BinaryDocValues values = valuesProducer.getBinary(field);
int docCount = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
maxLength = Math.max(maxLength, values.binaryValue().toString().length());
}
writeFieldEntry(field, DocValuesType.BINARY);
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
// write maxLength
SimpleTextUtil.write(data, MAXLENGTH);
@ -232,6 +257,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert field.getDocValuesType() == DocValuesType.SORTED;
writeFieldEntry(field, DocValuesType.SORTED);
int docCount = 0;
SortedDocValues values = valuesProducer.getSorted(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
}
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
int valueCount = 0;
int maxLength = -1;
TermsEnum terms = valuesProducer.getSorted(field).termsEnum();
@ -301,7 +335,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert valuesSeen == valueCount;
SortedDocValues values = valuesProducer.getSorted(field);
values = valuesProducer.getSorted(field);
for (int i = 0; i < numDocs; ++i) {
if (values.docID() < i) {
values.nextDoc();
@ -321,6 +355,28 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
throws IOException {
assert fieldSeen(field.name);
assert field.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
writeFieldEntry(field, DocValuesType.SORTED_NUMERIC);
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (int i = 0; i < values.docValueCount(); ++i) {
long v = values.nextValue();
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
}
}
// write absolute min and max for skipper
SimpleTextUtil.write(data, MINVALUE);
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
SimpleTextUtil.writeNewline(data);
SimpleTextUtil.write(data, MAXVALUE);
SimpleTextUtil.write(data, Long.toString(maxValue), scratch);
SimpleTextUtil.writeNewline(data);
doAddBinaryField(
field,
new EmptyDocValuesProducer() {
@ -395,6 +451,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
assert field.getDocValuesType() == DocValuesType.SORTED_SET;
writeFieldEntry(field, DocValuesType.SORTED_SET);
int docCount = 0;
SortedSetDocValues values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
++docCount;
}
SimpleTextUtil.write(data, DOCCOUNT);
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
SimpleTextUtil.writeNewline(data);
long valueCount = 0;
int maxLength = 0;
TermsEnum terms = valuesProducer.getSortedSet(field).termsEnum();
@ -430,7 +495,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
// length
int maxOrdListLength = 0;
StringBuilder sb2 = new StringBuilder();
SortedSetDocValues values = valuesProducer.getSortedSet(field);
values = valuesProducer.getSortedSet(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
sb2.setLength(0);
for (int i = 0; i < values.docValueCount(); i++) {

View File

@ -60,6 +60,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
static final BytesRef NORMS = new BytesRef(" norms ");
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
static final BytesRef DOCVALUES_SKIP_INDEX = new BytesRef(" doc values skip index");
static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
@ -122,6 +123,11 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
String dvType = readString(DOCVALUES.length, scratch);
final DocValuesType docValuesType = docValuesType(dvType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
boolean docValueSkipper =
Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
@ -184,6 +190,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
docValueSkipper,
dvGen,
Collections.unmodifiableMap(atts),
dimensionalCount,
@ -276,6 +283,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_GEN);
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
SimpleTextUtil.writeNewline(out);

View File

@ -37,6 +37,12 @@ import org.apache.lucene.util.BytesRef;
public class TestSimpleTextDocValuesFormat extends BaseDocValuesFormatTestCase {
private final Codec codec = new SimpleTextCodec();
@Override
protected boolean skipperHasAccurateDocBounds() {
// This format always returns minDocID = 0 and maxDocID = maxDoc - 1
return false;
}
@Override
protected Codec getCodec() {
return codec;

View File

@ -111,6 +111,7 @@ public class TestBlockWriter extends LuceneTestCase {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
-1,
Collections.emptyMap(),
0,

View File

@ -198,6 +198,7 @@ public class TestSTBlockReader extends LuceneTestCase {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
-1,
Collections.emptyMap(),
0,

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
@ -73,6 +74,13 @@ public abstract class DocValuesProducer implements Closeable {
*/
public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException;
/**
* Returns a {@link DocValuesSkipper} for this field. The returned instance need not be
* thread-safe: it will only be used by a single thread. The return value is undefined if {@link
* FieldInfo#hasDocValuesSkipIndex()} doesn't return {@code true}.
*/
public abstract DocValuesSkipper getSkipper(FieldInfo field) throws IOException;
/**
* Checks consistency of this producer
*

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene90;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_SIZE;
import java.io.IOException;
import java.util.Arrays;
@ -43,6 +44,7 @@ import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ByteBuffersIndexOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -129,16 +131,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene90DocValuesFormat.NUMERIC);
writeValues(
field,
DocValuesProducer producer =
new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return DocValues.singleton(valuesProducer.getNumeric(field));
}
},
false);
};
if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, producer);
}
writeValues(field, producer, false);
}
private static class MinMaxTracker {
@ -183,6 +186,84 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
}
}
private static class SkipAccumulator {
int minDocID;
int maxDocID;
int docCount;
long minValue;
long maxValue;
SkipAccumulator(int docID) {
minDocID = docID;
minValue = Long.MAX_VALUE;
maxValue = Long.MIN_VALUE;
docCount = 0;
}
void accumulate(long value) {
minValue = Math.min(minValue, value);
maxValue = Math.max(maxValue, value);
}
void nextDoc(int docID) {
maxDocID = docID;
++docCount;
}
void writeTo(DataOutput output) throws IOException {
output.writeInt(maxDocID);
output.writeInt(minDocID);
output.writeLong(maxValue);
output.writeLong(minValue);
output.writeInt(docCount);
}
}
private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
assert field.hasDocValuesSkipIndex();
// TODO: This disk compression once we introduce levels
long start = data.getFilePointer();
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
long globalMaxValue = Long.MIN_VALUE;
long globalMinValue = Long.MAX_VALUE;
int globalDocCount = 0;
int maxDocId = -1;
SkipAccumulator accumulator = null;
int counter = 0;
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
if (counter == 0) {
accumulator = new SkipAccumulator(doc);
}
accumulator.nextDoc(doc);
for (int i = 0, end = values.docValueCount(); i < end; ++i) {
accumulator.accumulate(values.nextValue());
}
if (++counter == SKIP_INDEX_INTERVAL_SIZE) {
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
globalDocCount += accumulator.docCount;
maxDocId = accumulator.maxDocID;
accumulator.writeTo(data);
counter = 0;
}
}
if (counter > 0) {
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
globalDocCount += accumulator.docCount;
maxDocId = accumulator.maxDocID;
accumulator.writeTo(data);
}
meta.writeLong(start); // record the start in meta
meta.writeLong(data.getFilePointer() - start); // record the length
meta.writeLong(globalMaxValue);
meta.writeLong(globalMinValue);
meta.writeInt(globalDocCount);
meta.writeInt(maxDocId);
}
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
throws IOException {
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
@ -489,13 +570,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
meta.writeInt(field.number);
meta.writeByte(Lucene90DocValuesFormat.SORTED);
doAddSortedField(field, valuesProducer);
doAddSortedField(field, valuesProducer, false);
}
private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
writeValues(
field,
private void doAddSortedField(
FieldInfo field, DocValuesProducer valuesProducer, boolean addTypeByte) throws IOException {
DocValuesProducer producer =
new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
@ -534,8 +614,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
};
return DocValues.singleton(sortedOrds);
}
},
true);
};
if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, producer);
}
if (addTypeByte) {
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
}
writeValues(field, producer, true);
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
}
@ -702,6 +788,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
private void doAddSortedNumericField(
FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException {
if (field.hasDocValuesSkipIndex()) {
writeSkipIndex(field, valuesProducer);
}
if (ords) {
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
}
long[] stats = writeValues(field, valuesProducer, ords);
int numDocsWithField = Math.toIntExact(stats[0]);
long numValues = stats[1];
@ -753,7 +845,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
meta.writeByte(Lucene90DocValuesFormat.SORTED_SET);
if (isSingleValued(valuesProducer.getSortedSet(field))) {
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
doAddSortedField(
field,
new EmptyDocValuesProducer() {
@ -762,10 +854,10 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
return SortedSetSelector.wrap(
valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
}
});
},
true);
return;
}
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
doAddSortedNumericField(
field,

View File

@ -181,4 +181,7 @@ public final class Lucene90DocValuesFormat extends DocValuesFormat {
static final int TERMS_DICT_REVERSE_INDEX_SHIFT = 10;
static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT;
static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1;
static final int SKIP_INDEX_INTERVAL_SHIFT = 12;
static final int SKIP_INDEX_INTERVAL_SIZE = 1 << SKIP_INDEX_INTERVAL_SHIFT;
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
@ -39,6 +40,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
@ -59,6 +61,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
private final Map<String, SortedEntry> sorted;
private final Map<String, SortedSetEntry> sortedSets;
private final Map<String, SortedNumericEntry> sortedNumerics;
private final Map<String, DocValuesSkipperEntry> skippers;
private final IndexInput data;
private final int maxDoc;
private int version = -1;
@ -80,6 +83,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
sorted = new HashMap<>();
sortedSets = new HashMap<>();
sortedNumerics = new HashMap<>();
skippers = new HashMap<>();
merging = false;
// read in the entries from the metadata file.
@ -147,6 +151,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
Map<String, SortedEntry> sorted,
Map<String, SortedSetEntry> sortedSets,
Map<String, SortedNumericEntry> sortedNumerics,
Map<String, DocValuesSkipperEntry> skippers,
IndexInput data,
int maxDoc,
int version,
@ -156,6 +161,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
this.sorted = sorted;
this.sortedSets = sortedSets;
this.sortedNumerics = sortedNumerics;
this.skippers = skippers;
this.data = data.clone();
this.maxDoc = maxDoc;
this.version = version;
@ -165,7 +171,16 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public DocValuesProducer getMergeInstance() {
return new Lucene90DocValuesProducer(
numerics, binaries, sorted, sortedSets, sortedNumerics, data, maxDoc, version, true);
numerics,
binaries,
sorted,
sortedSets,
sortedNumerics,
skippers,
data,
maxDoc,
version,
true);
}
private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
@ -175,6 +190,9 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
}
byte type = meta.readByte();
if (info.hasDocValuesSkipIndex()) {
skippers.put(info.name, readDocValueSkipperMeta(meta));
}
if (type == Lucene90DocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta));
} else if (type == Lucene90DocValuesFormat.BINARY) {
@ -197,6 +215,17 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
return entry;
}
private DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) throws IOException {
long offset = meta.readLong();
long length = meta.readLong();
long maxValue = meta.readLong();
long minValue = meta.readLong();
int docCount = meta.readInt();
int maxDocID = meta.readInt();
return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID);
}
private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException {
entry.docsWithFieldOffset = meta.readLong();
entry.docsWithFieldLength = meta.readLong();
@ -326,6 +355,9 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
data.close();
}
private record DocValuesSkipperEntry(
long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {}
private static class NumericEntry {
long[] table;
int blockShift;
@ -1749,4 +1781,88 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
return mul * values.get(index & mask) + delta;
}
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
final DocValuesSkipperEntry entry = skippers.get(field.name);
final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (input.length() > 0) {
input.prefetch(0, 1);
}
return new DocValuesSkipper() {
int minDocID = -1;
int maxDocID = -1;
long minValue, maxValue;
int docCount;
@Override
public void advance(int target) throws IOException {
if (target > entry.maxDocId) {
minDocID = DocIdSetIterator.NO_MORE_DOCS;
maxDocID = DocIdSetIterator.NO_MORE_DOCS;
} else {
while (true) {
maxDocID = input.readInt();
if (maxDocID >= target) {
minDocID = input.readInt();
maxValue = input.readLong();
minValue = input.readLong();
docCount = input.readInt();
break;
} else {
input.skipBytes(24);
}
}
}
}
@Override
public int numLevels() {
return 1;
}
@Override
public int minDocID(int level) {
return minDocID;
}
@Override
public int maxDocID(int level) {
return maxDocID;
}
@Override
public long minValue(int level) {
return minValue;
}
@Override
public long maxValue(int level) {
return maxValue;
}
@Override
public int docCount(int level) {
return docCount;
}
@Override
public long minValue() {
return entry.minValue;
}
@Override
public long maxValue() {
return entry.maxValue;
}
@Override
public int docCount() {
return entry.docCount;
}
};
}
}

View File

@ -163,8 +163,10 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
boolean isParentField =
format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
boolean hasDocValuesSkipIndex =
format >= FORMAT_DOCVALUE_SKIPPER ? (bits & DOCVALUES_SKIPPER) != 0 : false;
if ((bits & 0xE0) != 0) {
if ((bits & 0xC0) != 0) {
throw new CorruptIndexException(
"unused bits are set \"" + Integer.toBinaryString(bits) + "\"", input);
}
@ -173,6 +175,13 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
"parent field bit is set but shouldn't \"" + Integer.toBinaryString(bits) + "\"",
input);
}
if (format < FORMAT_DOCVALUE_SKIPPER && (bits & DOCVALUES_SKIPPER) != 0) {
throw new CorruptIndexException(
"doc values skipper bit is set but shouldn't \""
+ Integer.toBinaryString(bits)
+ "\"",
input);
}
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
@ -208,6 +217,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
hasDocValuesSkipIndex,
dvGen,
attributes,
pointDataDimensionCount,
@ -394,6 +404,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
if (fi.hasDocValuesSkipIndex()) bits |= DOCVALUES_SKIPPER;
output.writeByte(bits);
output.writeByte(indexOptionsByte(fi.getIndexOptions()));
@ -423,7 +434,8 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
static final int FORMAT_START = 0;
// this doesn't actually change the file format but uses up one more bit an existing bit pattern
static final int FORMAT_PARENT_FIELD = 1;
static final int FORMAT_CURRENT = FORMAT_PARENT_FIELD;
static final int FORMAT_DOCVALUE_SKIPPER = 2;
static final int FORMAT_CURRENT = FORMAT_DOCVALUE_SKIPPER;
// Field flags
static final byte STORE_TERMVECTOR = 0x1;
@ -431,4 +443,5 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
static final byte STORE_PAYLOADS = 0x4;
static final byte SOFT_DELETES_FIELD = 0x8;
static final byte PARENT_FIELD_FIELD = 0x10;
static final byte DOCVALUES_SKIPPER = 0x20;
}

View File

@ -28,6 +28,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
@ -346,6 +347,12 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
return producer == null ? null : producer.getSortedSet(field);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSkipper(field);
}
@Override
public void close() throws IOException {
IOUtils.close(formats.values());

View File

@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
/**
* Wrapper around a {@link TwoPhaseIterator} for a doc-values range query that speeds things up by
* taking advantage of a {@link DocValuesSkipper}.
*/
final class DocValuesRangeIterator extends TwoPhaseIterator {
enum Match {
/** None of the documents in the range match */
NO,
/** Document values need to be checked to verify matches */
MAYBE,
/** All documents in the range that have a value match */
IF_DOC_HAS_VALUE,
/** All docs in the range match */
YES;
}
private final Approximation approximation;
private final TwoPhaseIterator innerTwoPhase;
DocValuesRangeIterator(
TwoPhaseIterator twoPhase, DocValuesSkipper skipper, long lowerValue, long upperValue) {
super(new Approximation(twoPhase.approximation(), skipper, lowerValue, upperValue));
this.approximation = (Approximation) approximation();
this.innerTwoPhase = twoPhase;
}
static class Approximation extends DocIdSetIterator {
private final DocIdSetIterator innerApproximation;
private final DocValuesSkipper skipper;
private final long lowerValue;
private final long upperValue;
private int doc = -1;
// Track a decision for all doc IDs between the current doc ID and upTo inclusive.
Match match = Match.MAYBE;
int upTo = -1;
Approximation(
DocIdSetIterator innerApproximation,
DocValuesSkipper skipper,
long lowerValue,
long upperValue) {
this.innerApproximation = innerApproximation;
this.skipper = skipper;
this.lowerValue = lowerValue;
this.upperValue = upperValue;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(docID() + 1);
}
@Override
public int advance(int target) throws IOException {
while (true) {
if (target > upTo) {
skipper.advance(target);
// If target doesn't have a value and is between two blocks, it is possible that advance()
// moved to a block that doesn't contain `target`.
target = Math.max(target, skipper.minDocID(0));
if (target == NO_MORE_DOCS) {
return doc = NO_MORE_DOCS;
}
upTo = skipper.maxDocID(0);
match = match(0);
// If we have a YES or NO decision, see if we still have the same decision on a higher
// level (= on a wider range of doc IDs)
int nextLevel = 1;
while (match != Match.MAYBE
&& nextLevel < skipper.numLevels()
&& match == match(nextLevel)) {
upTo = skipper.maxDocID(nextLevel);
nextLevel++;
}
}
switch (match) {
case YES:
return doc = target;
case MAYBE:
case IF_DOC_HAS_VALUE:
if (target > innerApproximation.docID()) {
target = innerApproximation.advance(target);
}
if (target <= upTo) {
return doc = target;
}
// Otherwise we are breaking the invariant that `doc` must always be <= upTo, so let
// the loop run one more iteration to advance the skipper.
break;
case NO:
if (upTo == DocIdSetIterator.NO_MORE_DOCS) {
return doc = NO_MORE_DOCS;
}
target = upTo + 1;
break;
default:
throw new AssertionError("Unknown enum constant: " + match);
}
}
}
@Override
public long cost() {
return innerApproximation.cost();
}
private Match match(int level) {
long minValue = skipper.minValue(level);
long maxValue = skipper.maxValue(level);
if (minValue > upperValue || maxValue < lowerValue) {
return Match.NO;
} else if (minValue >= lowerValue && maxValue <= upperValue) {
if (skipper.docCount(level) == skipper.maxDocID(level) - skipper.minDocID(level) + 1) {
return Match.YES;
} else {
return Match.IF_DOC_HAS_VALUE;
}
} else {
return Match.MAYBE;
}
}
}
@Override
public final boolean matches() throws IOException {
return switch (approximation.match) {
case YES -> true;
case IF_DOC_HAS_VALUE -> true;
case MAYBE -> innerTwoPhase.matches();
case NO -> throw new IllegalStateException("Unpositioned approximation");
};
}
@Override
public float matchCost() {
return innerTwoPhase.matchCost();
}
}

View File

@ -22,6 +22,7 @@ import java.util.Objects;
import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.VectorEncoding;
@ -40,6 +41,7 @@ public class FieldType implements IndexableFieldType {
private IndexOptions indexOptions = IndexOptions.NONE;
private boolean frozen;
private DocValuesType docValuesType = DocValuesType.NONE;
private boolean docValuesSkipIndex;
private int dimensionCount;
private int indexDimensionCount;
private int dimensionNumBytes;
@ -59,6 +61,7 @@ public class FieldType implements IndexableFieldType {
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.docValuesType = ref.docValuesType();
this.docValuesSkipIndex = ref.hasDocValuesSkipIndex();
this.dimensionCount = ref.pointDimensionCount();
this.indexDimensionCount = ref.pointIndexDimensionCount();
this.dimensionNumBytes = ref.pointNumBytes();
@ -504,6 +507,22 @@ public class FieldType implements IndexableFieldType {
docValuesType = type;
}
@Override
public boolean hasDocValuesSkipIndex() {
return docValuesSkipIndex;
}
/**
* Set whether to enable a skip index for doc values on this field. This is typically useful on
* fields that are part of the {@link IndexWriterConfig#setIndexSort index sort}, or that
* correlate with fields that are part of the index sort, so that values can be expected to be
* clustered in the doc ID space.
*/
public void setDocValuesSkipIndex(boolean docValuesSkipIndex) {
checkIfFrozen();
this.docValuesSkipIndex = docValuesSkipIndex;
}
@Override
public int hashCode() {
final int prime = 31;
@ -512,6 +531,7 @@ public class FieldType implements IndexableFieldType {
result = prime * result + indexDimensionCount;
result = prime * result + dimensionNumBytes;
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
result = prime * result + Boolean.hashCode(docValuesSkipIndex);
result = prime * result + indexOptions.hashCode();
result = prime * result + (omitNorms ? 1231 : 1237);
result = prime * result + (storeTermVectorOffsets ? 1231 : 1237);
@ -533,6 +553,7 @@ public class FieldType implements IndexableFieldType {
if (indexDimensionCount != other.indexDimensionCount) return false;
if (dimensionNumBytes != other.dimensionNumBytes) return false;
if (docValuesType != other.docValuesType) return false;
if (docValuesSkipIndex != other.docValuesSkipIndex) return false;
if (indexOptions != other.indexOptions) return false;
if (omitNorms != other.omitNorms) return false;
if (storeTermVectorOffsets != other.storeTermVectorOffsets) return false;

View File

@ -35,9 +35,27 @@ public class NumericDocValuesField extends Field {
/** Type for numeric DocValues. */
public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static {
TYPE.setDocValuesType(DocValuesType.NUMERIC);
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link NumericDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param value 64-bit long value
* @throws IllegalArgumentException if the field name is null
*/
public static NumericDocValuesField indexedField(String name, long value) {
return new NumericDocValuesField(name, value, INDEXED_TYPE);
}
/**
@ -60,7 +78,11 @@ public class NumericDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null
*/
public NumericDocValuesField(String name, Long value) {
super(name, TYPE);
this(name, value, TYPE);
}
private NumericDocValuesField(String name, Long value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}

View File

@ -41,9 +41,27 @@ public class SortedDocValuesField extends Field {
/** Type for sorted bytes DocValues */
public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static {
TYPE.setDocValuesType(DocValuesType.SORTED);
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param bytes binary content
* @throws IllegalArgumentException if the field name is null
*/
public static SortedDocValuesField indexedField(String name, BytesRef bytes) {
return new SortedDocValuesField(name, bytes, INDEXED_TYPE);
}
/**
@ -54,7 +72,11 @@ public class SortedDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null
*/
public SortedDocValuesField(String name, BytesRef bytes) {
super(name, TYPE);
this(name, bytes, TYPE);
}
private SortedDocValuesField(String name, BytesRef bytes, FieldType fieldType) {
super(name, fieldType);
fieldsData = bytes;
}

View File

@ -43,9 +43,27 @@ public class SortedNumericDocValuesField extends Field {
/** Type for sorted numeric DocValues. */
public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static {
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedNumericDocValuesField} with the specified 64-bit long value that
* also creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param value 64-bit long value
* @throws IllegalArgumentException if the field name is null
*/
public static SortedNumericDocValuesField indexedField(String name, long value) {
return new SortedNumericDocValuesField(name, value, INDEXED_TYPE);
}
/**
@ -56,8 +74,12 @@ public class SortedNumericDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null
*/
public SortedNumericDocValuesField(String name, long value) {
super(name, TYPE);
fieldsData = Long.valueOf(value);
this(name, Long.valueOf(value), TYPE);
}
private SortedNumericDocValuesField(String name, Long value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}
/**

View File

@ -19,6 +19,7 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
@ -109,9 +110,17 @@ final class SortedNumericDocValuesRangeQuery extends Query {
if (context.reader().getFieldInfos().fieldInfo(field) == null) {
return null;
}
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
if (skipper != null) {
if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) {
return null;
}
}
SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
final NumericDocValues singleton = DocValues.unwrapSingleton(values);
final TwoPhaseIterator iterator;
TwoPhaseIterator iterator;
if (singleton != null) {
iterator =
new TwoPhaseIterator(singleton) {
@ -149,6 +158,9 @@ final class SortedNumericDocValuesRangeQuery extends Query {
}
};
}
if (skipper != null) {
iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue);
}
final var scorer = new ConstantScoreScorer(score(), scoreMode, iterator);
return new DefaultScorerSupplier(scorer);
}

View File

@ -42,9 +42,27 @@ public class SortedSetDocValuesField extends Field {
/** Type for sorted bytes DocValues */
public static final FieldType TYPE = new FieldType();
private static final FieldType INDEXED_TYPE;
static {
TYPE.setDocValuesType(DocValuesType.SORTED_SET);
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedSetDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
*
* @param name field name
* @param bytes binary content
* @throws IllegalArgumentException if the field name is null
*/
public static SortedSetDocValuesField indexedField(String name, BytesRef bytes) {
return new SortedSetDocValuesField(name, bytes, INDEXED_TYPE);
}
/**
@ -55,7 +73,11 @@ public class SortedSetDocValuesField extends Field {
* @throws IllegalArgumentException if the field name is null
*/
public SortedSetDocValuesField(String name, BytesRef bytes) {
super(name, TYPE);
this(name, bytes, TYPE);
}
private SortedSetDocValuesField(String name, BytesRef bytes, FieldType fieldType) {
super(name, fieldType);
fieldsData = bytes;
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
@ -113,6 +114,7 @@ final class SortedSetDocValuesRangeQuery extends Query {
if (context.reader().getFieldInfos().fieldInfo(field) == null) {
return null;
}
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
// implement ScorerSupplier, since we do some expensive stuff to make a scorer
@ -149,12 +151,15 @@ final class SortedSetDocValuesRangeQuery extends Query {
}
// no terms matched in this segment
if (minOrd > maxOrd) {
// no terms matched in this segment
if (minOrd > maxOrd
|| (skipper != null
&& (minOrd > skipper.maxValue() || maxOrd < skipper.minValue()))) {
return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.empty());
}
final SortedDocValues singleton = DocValues.unwrapSingleton(values);
final TwoPhaseIterator iterator;
TwoPhaseIterator iterator;
if (singleton != null) {
iterator =
new TwoPhaseIterator(singleton) {
@ -192,6 +197,9 @@ final class SortedSetDocValuesRangeQuery extends Query {
}
};
}
if (skipper != null) {
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd);
}
return new ConstantScoreScorer(score(), scoreMode, iterator);
}

View File

@ -365,6 +365,9 @@ public final class CheckIndex implements Closeable {
/** Total number of sortedset fields */
public long totalSortedSetFields;
/** Total number of skipping index tested. */
public long totalSkippingIndex;
/** Exception thrown during doc values test (null on success) */
public Throwable error;
}
@ -3228,13 +3231,14 @@ public final class CheckIndex implements Closeable {
infoStream,
String.format(
Locale.ROOT,
"OK [%d docvalues fields; %d BINARY; %d NUMERIC; %d SORTED; %d SORTED_NUMERIC; %d SORTED_SET] [took %.3f sec]",
"OK [%d docvalues fields; %d BINARY; %d NUMERIC; %d SORTED; %d SORTED_NUMERIC; %d SORTED_SET; %d SKIPPING INDEX] [took %.3f sec]",
status.totalValueFields,
status.totalBinaryFields,
status.totalNumericFields,
status.totalSortedFields,
status.totalSortedNumericFields,
status.totalSortedSetFields,
status.totalSkippingIndex,
nsToSec(System.nanoTime() - startNS)));
} catch (Throwable e) {
if (failFast) {
@ -3254,6 +3258,94 @@ public final class CheckIndex implements Closeable {
DocValuesIterator get(FieldInfo fi) throws IOException;
}
private static void checkDocValueSkipper(FieldInfo fi, DocValuesSkipper skipper)
throws IOException {
String fieldName = fi.name;
if (skipper.maxDocID(0) != -1) {
throw new CheckIndexException(
"binary dv iterator for field: "
+ fieldName
+ " should start at docID=-1, but got "
+ skipper.maxDocID(0));
}
if (skipper.docCount() > 0 && skipper.minValue() > skipper.maxValue()) {
throw new CheckIndexException(
"skipper dv iterator for field: "
+ fieldName
+ " reports wrong global value range, got "
+ skipper.minValue()
+ " > "
+ skipper.maxValue());
}
int docCount = 0;
int doc;
while (true) {
doc = skipper.maxDocID(0) + 1;
skipper.advance(doc);
if (skipper.maxDocID(0) == NO_MORE_DOCS) {
break;
}
int levels = skipper.numLevels();
for (int level = 0; level < levels; level++) {
if (skipper.minDocID(level) < doc) {
throw new CheckIndexException(
"skipper dv iterator for field: "
+ fieldName
+ " reports wrong minDocID, got "
+ skipper.minDocID(level)
+ " < "
+ doc);
}
if (skipper.minDocID(level) > skipper.maxDocID(level)) {
throw new CheckIndexException(
"skipper dv iterator for field: "
+ fieldName
+ " reports wrong doc range, got "
+ skipper.minDocID(level)
+ " > "
+ skipper.maxDocID(level));
}
if (skipper.minValue() > skipper.minValue(level)) {
throw new CheckIndexException(
"skipper dv iterator for field: "
+ fieldName
+ " : global minValue "
+ skipper.minValue()
+ " , got "
+ skipper.minValue(level));
}
if (skipper.maxValue() < skipper.maxValue(level)) {
throw new CheckIndexException(
"skipper dv iterator for field: "
+ fieldName
+ " : global maxValue "
+ skipper.maxValue()
+ " , got "
+ skipper.maxValue(level));
}
if (skipper.minValue(level) > skipper.maxValue(level)) {
throw new CheckIndexException(
"skipper dv iterator for field: "
+ fieldName
+ " reports wrong value range, got "
+ skipper.minValue(level)
+ " > "
+ skipper.maxValue(level));
}
}
docCount += skipper.docCount(0);
}
if (skipper.docCount() != docCount) {
throw new CheckIndexException(
"skipper dv iterator for field: "
+ fieldName
+ " inconsistent docCount, got "
+ skipper.docCount()
+ " != "
+ docCount);
}
}
private static void checkDVIterator(FieldInfo fi, DocValuesIteratorSupplier producer)
throws IOException {
String field = fi.name;
@ -3627,6 +3719,10 @@ public final class CheckIndex implements Closeable {
private static void checkDocValues(
FieldInfo fi, DocValuesProducer dvReader, DocValuesStatus status) throws Exception {
if (fi.hasDocValuesSkipIndex()) {
status.totalSkippingIndex++;
checkDocValueSkipper(fi, dvReader.getSkipper(fi));
}
switch (fi.getDocValuesType()) {
case SORTED:
status.totalSortedFields++;

View File

@ -196,6 +196,16 @@ public abstract class CodecReader extends LeafReader {
return getDocValuesReader().getSortedSet(fi);
}
@Override
public final DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
ensureOpen();
FieldInfo fi = getFieldInfos().fieldInfo(field);
if (fi == null || fi.hasDocValuesSkipIndex() == false) {
return null;
}
return getDocValuesReader().getSkipper(fi);
}
@Override
public final NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();

View File

@ -108,4 +108,9 @@ abstract class DocValuesLeafReader extends LeafReader {
public final CacheHelper getReaderCacheHelper() {
throw new UnsupportedOperationException();
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
throw new UnsupportedOperationException();
}
}

View File

@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
/**
* Skipper for {@link DocValues}.
*
* <p>A skipper has a position that can only be advanced via {@link #advance(int)}. The next advance
* position must be greater than {@link #maxDocID(int)} at level 0. A skipper's position, along with
* a {@code level}, determines the interval at which the skipper is currently situated.
*/
public abstract class DocValuesSkipper {
/**
* Advance this skipper so that all levels contain the next document on or after {@code target}.
*
* <p><b>NOTE</b>: The behavior is undefined if {@code target} is less than or equal to {@code
* maxDocID(0)}.
*
* <p><b>NOTE</b>: {@code minDocID(0)} may return a doc ID that is greater than {@code target} if
* the target document doesn't have a value.
*/
public abstract void advance(int target) throws IOException;
/** Return the number of levels. This number may change when moving to a different interval. */
public abstract int numLevels();
/**
* Return the minimum doc ID of the interval on the given level, inclusive. This returns {@code
* -1} if {@link #advance(int)} has not been called yet and {@link DocIdSetIterator#NO_MORE_DOCS}
* if the iterator is exhausted. This method is non-increasing when {@code level} increases. Said
* otherwise {@code minDocID(level+1) <= minDocId(level)}.
*/
public abstract int minDocID(int level);
/**
* Return the maximum doc ID of the interval on the given level, inclusive. This returns {@code
* -1} if {@link #advance(int)} has not been called yet and {@link DocIdSetIterator#NO_MORE_DOCS}
* if the iterator is exhausted. This method is non-decreasing when {@code level} decreases. Said
* otherwise {@code maxDocID(level+1) >= maxDocId(level)}.
*/
public abstract int maxDocID(int level);
/**
* Return the minimum value of the interval at the given level, inclusive.
*
* <p><b>NOTE</b>: It is only guaranteed that values in this interval are greater than or equal
* the returned value. There is no guarantee that one document actually has this value.
*/
public abstract long minValue(int level);
/**
* Return the maximum value of the interval at the given level, inclusive.
*
* <p><b>NOTE</b>: It is only guaranteed that values in this interval are less than or equal the
* returned value. There is no guarantee that one document actually has this value.
*/
public abstract long maxValue(int level);
/**
* Return the number of documents that have a value in the interval associated with the given
* level.
*/
public abstract int docCount(int level);
/**
* Return the global minimum value.
*
* <p><b>NOTE</b>: It is only guaranteed that values are greater than or equal the returned value.
* There is no guarantee that one document actually has this value.
*/
public abstract long minValue();
/**
* Return the global maximum value.
*
* <p><b>NOTE</b>: It is only guaranteed that values are less than or equal the returned value.
* There is no guarantee that one document actually has this value.
*/
public abstract long maxValue();
/** Return the global number of documents with a value for the field. */
public abstract int docCount();
}

View File

@ -22,31 +22,37 @@ package org.apache.lucene.index;
*/
public enum DocValuesType {
/** No doc values for this field. */
NONE,
NONE(false),
/** A per-document Number */
NUMERIC,
NUMERIC(true),
/**
* A per-document byte[]. Values may be larger than 32766 bytes, but different codecs may enforce
* their own limits.
*/
BINARY,
BINARY(false),
/**
* A pre-sorted byte[]. Fields with this type only store distinct byte values and store an
* additional offset pointer per document to dereference the shared byte[]. The stored byte[] is
* presorted and allows access via document id, ordinal and by-value. Values must be {@code <=
* 32766} bytes.
*/
SORTED,
SORTED(true),
/**
* A pre-sorted Number[]. Fields with this type store numeric values in sorted order according to
* {@link Long#compare(long, long)}.
*/
SORTED_NUMERIC,
SORTED_NUMERIC(true),
/**
* A pre-sorted Set&lt;byte[]&gt;. Fields with this type only store distinct byte values and store
* additional offset pointers per document to dereference the shared byte[]s. The stored byte[] is
* presorted and allows access via document id, ordinal and by-value. Values must be {@code <=
* 32766} bytes.
*/
SORTED_SET,
SORTED_SET(true);
final boolean supportsSkipIndex; // pkg-private for use in FieldInfo
DocValuesType(boolean supportsSkipIndex) {
this.supportsSkipIndex = supportsSkipIndex;
}
}

View File

@ -51,6 +51,11 @@ public abstract class EmptyDocValuesProducer extends DocValuesProducer {
throw new UnsupportedOperationException();
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) {
throw new UnsupportedOperationException();
}
@Override
public void checkIntegrity() {
throw new UnsupportedOperationException();

View File

@ -28,13 +28,16 @@ import java.util.Objects;
* threads accessing this object.
*/
public final class FieldInfo {
/** Field's name */
public final String name;
/** Internal field number */
public final int number;
private DocValuesType docValuesType;
private DocValuesType docValuesType = DocValuesType.NONE;
private final boolean docValuesSkipIndex;
// True if any document indexed term vectors
private boolean storeTermVector;
@ -80,6 +83,7 @@ public final class FieldInfo {
boolean storePayloads,
IndexOptions indexOptions,
DocValuesType docValues,
boolean hasDocValuesSkipIndex,
long dvGen,
Map<String, String> attributes,
int pointDimensionCount,
@ -95,6 +99,7 @@ public final class FieldInfo {
this.docValuesType =
Objects.requireNonNull(
docValues, "DocValuesType must not be null (field: \"" + name + "\")");
this.docValuesSkipIndex = hasDocValuesSkipIndex;
this.indexOptions =
Objects.requireNonNull(
indexOptions, "IndexOptions must not be null (field: \"" + name + "\")");
@ -152,6 +157,13 @@ public final class FieldInfo {
if (docValuesType == null) {
throw new IllegalArgumentException("DocValuesType must not be null (field: '" + name + "')");
}
if (docValuesType.supportsSkipIndex == false && docValuesSkipIndex) {
throw new IllegalArgumentException(
"field '"
+ name
+ "' cannot have docValuesSkipIndex set to true with doc values type "
+ docValuesType);
}
if (dvGen != -1 && docValuesType == DocValuesType.NONE) {
throw new IllegalArgumentException(
"field '"
@ -235,6 +247,7 @@ public final class FieldInfo {
verifySameStoreTermVectors(fieldName, this.storeTermVector, o.storeTermVector);
}
verifySameDocValuesType(fieldName, this.docValuesType, o.docValuesType);
verifySameDocValuesSkipIndex(fieldName, this.docValuesSkipIndex, o.docValuesSkipIndex);
verifySamePointsOptions(
fieldName,
this.pointDimensionCount,
@ -289,6 +302,24 @@ public final class FieldInfo {
}
}
/**
* Verify that the provided docValues type are the same
*
* @throws IllegalArgumentException if they are not the same
*/
static void verifySameDocValuesSkipIndex(
String fieldName, boolean hasDocValuesSkipIndex1, boolean hasDocValuesSkipIndex2) {
if (hasDocValuesSkipIndex1 != hasDocValuesSkipIndex2) {
throw new IllegalArgumentException(
"cannot change field \""
+ fieldName
+ "\" from docValuesSkipIndex="
+ hasDocValuesSkipIndex1
+ " to inconsistent docValuesSkipIndex="
+ hasDocValuesSkipIndex2);
}
}
/**
* Verify that the provided store term vectors options are the same
*
@ -557,6 +588,11 @@ public final class FieldInfo {
return docValuesType;
}
/** Returns true if, and only if, this field has a skip index. */
public boolean hasDocValuesSkipIndex() {
return docValuesSkipIndex;
}
/** Sets the docValues generation of this field. */
void setDocValuesGen(long dvGen) {
this.dvGen = dvGen;

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.index;
import static org.apache.lucene.index.FieldInfo.verifySameDocValuesSkipIndex;
import static org.apache.lucene.index.FieldInfo.verifySameDocValuesType;
import static org.apache.lucene.index.FieldInfo.verifySameIndexOptions;
import static org.apache.lucene.index.FieldInfo.verifySameOmitNorms;
@ -364,6 +365,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
IndexOptions indexOptions,
IndexOptionsProperties indexOptionsProperties,
DocValuesType docValuesType,
boolean docValuesSkipIndex,
FieldDimensions fieldDimensions,
FieldVectorProperties fieldVectorProperties) {}
@ -442,6 +444,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
? new IndexOptionsProperties(fi.hasVectors(), fi.omitsNorms())
: null,
fi.getDocValuesType(),
fi.hasDocValuesSkipIndex(),
new FieldDimensions(
fi.getPointDimensionCount(),
fi.getPointIndexDimensionCount(),
@ -521,6 +524,9 @@ public class FieldInfos implements Iterable<FieldInfo> {
DocValuesType currentDVType = fieldProperties.docValuesType;
verifySameDocValuesType(fieldName, currentDVType, fi.getDocValuesType());
boolean currentDocValuesSkipIndex = fieldProperties.docValuesSkipIndex;
verifySameDocValuesSkipIndex(
fieldName, currentDocValuesSkipIndex, fi.hasDocValuesSkipIndex());
FieldDimensions dims = fieldProperties.fieldDimensions;
verifySamePointsOptions(
@ -576,6 +582,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
false,
IndexOptions.NONE,
dvType,
false,
-1,
new HashMap<>(),
0,
@ -602,6 +609,15 @@ public class FieldInfos implements Iterable<FieldInfo> {
+ fieldDvType
+ "].");
}
boolean hasDocValuesSkipIndex = fieldProperties.docValuesSkipIndex;
if (hasDocValuesSkipIndex) {
throw new IllegalArgumentException(
"Can't update ["
+ dvType
+ "] doc values; the field ["
+ fieldName
+ "] must be doc values only field, bit it has doc values skip index");
}
FieldDimensions fdimensions = fieldProperties.fieldDimensions;
if (fdimensions != null && fdimensions.dimensionCount != 0) {
throw new IllegalArgumentException(
@ -660,6 +676,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
false,
IndexOptions.NONE,
dvType,
false,
-1,
new HashMap<>(),
0,
@ -780,6 +797,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
fi.hasPayloads(),
fi.getIndexOptions(),
fi.getDocValuesType(),
fi.hasDocValuesSkipIndex(),
dvGen,
// original attributes is UnmodifiableMap
new HashMap<>(fi.attributes()),

View File

@ -441,6 +441,12 @@ public abstract class FilterLeafReader extends LeafReader {
return in.getSortedSetDocValues(field);
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
ensureOpen();
return in.getDocValuesSkipper(field);
}
@Override
public NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();

View File

@ -86,6 +86,9 @@ public interface IndexableFieldType {
/** DocValues {@link DocValuesType}: how the field's value will be indexed into docValues. */
DocValuesType docValuesType();
/** Whether a skip index for doc values should be created on this field. */
boolean hasDocValuesSkipIndex();
/**
* If this is positive (representing the number of point dimensions), the field is indexed as a
* point.

View File

@ -680,6 +680,7 @@ final class IndexingChain implements Accountable {
false,
s.indexOptions,
s.docValuesType,
s.hasDocValuesSkipIndex,
-1,
s.attributes,
s.pointDimensionCount,
@ -831,7 +832,12 @@ final class IndexingChain implements Accountable {
verifyUnIndexedFieldType(fieldName, fieldType);
}
if (fieldType.docValuesType() != DocValuesType.NONE) {
schema.setDocValues(fieldType.docValuesType());
schema.setDocValues(fieldType.docValuesType(), fieldType.hasDocValuesSkipIndex());
} else if (fieldType.hasDocValuesSkipIndex()) {
throw new IllegalArgumentException(
"field '"
+ schema.name
+ "' cannot have docValuesSkipIndex set to true without doc values");
}
if (fieldType.pointDimensionCount() != 0) {
schema.setPoints(
@ -1432,6 +1438,7 @@ final class IndexingChain implements Accountable {
private boolean storeTermVector = false;
private IndexOptions indexOptions = IndexOptions.NONE;
private DocValuesType docValuesType = DocValuesType.NONE;
private boolean hasDocValuesSkipIndex = false;
private int pointDimensionCount = 0;
private int pointIndexDimensionCount = 0;
private int pointNumBytes = 0;
@ -1497,11 +1504,13 @@ final class IndexingChain implements Accountable {
}
}
void setDocValues(DocValuesType newDocValuesType) {
void setDocValues(DocValuesType newDocValuesType, boolean newHasDocValuesSkipIndex) {
if (docValuesType == DocValuesType.NONE) {
this.docValuesType = newDocValuesType;
this.hasDocValuesSkipIndex = newHasDocValuesSkipIndex;
} else {
assertSame("doc values type", docValuesType, newDocValuesType);
assertSame("doc values skip index", hasDocValuesSkipIndex, newHasDocValuesSkipIndex);
}
}
@ -1549,6 +1558,7 @@ final class IndexingChain implements Accountable {
assertSame("omit norms", fi.omitsNorms(), omitNorms);
assertSame("store term vector", fi.hasVectors(), storeTermVector);
assertSame("doc values type", fi.getDocValuesType(), docValuesType);
assertSame("doc values skip index", fi.hasDocValuesSkipIndex(), hasDocValuesSkipIndex);
assertSame(
"vector similarity function", fi.getVectorSimilarityFunction(), vectorSimilarityFunction);
assertSame("vector encoding", fi.getVectorEncoding(), vectorEncoding);

View File

@ -202,6 +202,13 @@ public abstract non-sealed class LeafReader extends IndexReader {
*/
public abstract NumericDocValues getNormValues(String field) throws IOException;
/**
* Returns a {@link DocValuesSkipper} allowing skipping ranges of doc IDs that are not of
* interest, or {@code null} if a skip index was not indexed. The returned instance should be
* confined to the thread that created it.
*/
public abstract DocValuesSkipper getDocValuesSkipper(String field) throws IOException;
/**
* Returns {@link FloatVectorValues} for this field, or null if no {@link FloatVectorValues} were
* indexed. The returned instance should only be used by a single thread.

View File

@ -399,6 +399,13 @@ public class ParallelLeafReader extends LeafReader {
return reader == null ? null : reader.getSortedSetDocValues(field);
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
ensureOpen();
LeafReader reader = fieldToReader.get(field);
return reader == null ? null : reader.getDocValuesSkipper(field);
}
@Override
public NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();

View File

@ -713,6 +713,7 @@ final class ReadersAndUpdates {
fi.hasPayloads(),
fi.getIndexOptions(),
fi.getDocValuesType(),
fi.hasDocValuesSkipIndex(),
fi.getDocValuesGen(),
new HashMap<>(fi.attributes()),
fi.getPointDimensionCount(),

View File

@ -124,6 +124,13 @@ class SegmentDocValuesProducer extends DocValuesProducer {
return dvProducer.getSortedSet(field);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
assert dvProducer != null;
return dvProducer.getSkipper(field);
}
@Override
public void checkIntegrity() throws IOException {
for (DocValuesProducer producer : dvProducers) {

View File

@ -240,6 +240,11 @@ public final class SlowCodecReaderWrapper {
return reader.getSortedSetDocValues(field.name);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
return reader.getDocValuesSkipper(field.name);
}
@Override
public void checkIntegrity() throws IOException {
// We already checkIntegrity the entire reader up front

View File

@ -494,6 +494,11 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
}
return new MultiSortedSetDocValues(values, docStarts, map, totalCost);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
throw new UnsupportedOperationException("This method is for searching not for merging");
}
}
@Override

View File

@ -637,6 +637,12 @@ public final class SortingCodecReader extends FilterCodecReader {
public void close() throws IOException {
delegate.close();
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
// We can hardly return information about min/max values if doc IDs have been reordered.
return null;
}
};
}

View File

@ -0,0 +1,273 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.tests.util.LuceneTestCase;
public class TestDocValuesRangeIterator extends LuceneTestCase {
public void testSingleLevel() throws IOException {
doTestBasics(false);
}
public void testMultipleLevels() throws IOException {
doTestBasics(true);
}
private void doTestBasics(boolean doLevels) throws IOException {
long queryMin = 10;
long queryMax = 20;
// Fake numeric doc values so that:
// docs 0-256 all match
// docs in 256-512 are all greater than queryMax
// docs in 512-768 are all less than queryMin
// docs in 768-1024 have some docs that match the range, others not
// docs in 1024-2048 follow a similar pattern as docs in 0-1024 except that not all docs have a
// value
NumericDocValues values =
new NumericDocValues() {
int doc = -1;
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
if (target < 1024) {
// dense up to 1024
return doc = target;
} else if (doc < 2047) {
// 50% docs have a value up to 2048
return doc = target + (target & 1);
} else {
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
}
@Override
public long longValue() throws IOException {
int d = doc % 1024;
if (d < 128) {
return (queryMin + queryMax) >> 1;
} else if (d < 256) {
return queryMax + 1;
} else if (d < 512) {
return queryMin - 1;
} else {
return switch ((d / 2) % 3) {
case 0 -> queryMin - 1;
case 1 -> queryMax + 1;
case 2 -> (queryMin + queryMax) >> 1;
default -> throw new AssertionError();
};
}
}
@Override
public long cost() {
return 42;
}
};
AtomicBoolean twoPhaseCalled = new AtomicBoolean();
TwoPhaseIterator twoPhase =
new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
twoPhaseCalled.set(true);
long v = values.longValue();
return v >= queryMin && v <= queryMax;
}
@Override
public float matchCost() {
return 2f; // 2 comparisons
}
};
DocValuesSkipper skipper =
new DocValuesSkipper() {
int doc = -1;
@Override
public void advance(int target) throws IOException {
doc = target;
}
@Override
public int numLevels() {
return doLevels ? 3 : 1;
}
@Override
public int minDocID(int level) {
int rangeLog = 9 - numLevels() + level;
// the level is the log2 of the interval
if (doc < 0) {
return -1;
} else if (doc >= 2048) {
return DocIdSetIterator.NO_MORE_DOCS;
} else {
int mask = (1 << rangeLog) - 1;
// prior multiple of 2^level
return doc & ~mask;
}
}
@Override
public int maxDocID(int level) {
int rangeLog = 9 - numLevels() + level;
int minDocID = minDocID(level);
return switch (minDocID) {
case -1 -> -1;
case DocIdSetIterator.NO_MORE_DOCS -> DocIdSetIterator.NO_MORE_DOCS;
default -> minDocID + (1 << rangeLog) - 1;
};
}
@Override
public long minValue(int level) {
int d = doc % 1024;
if (d < 128) {
return queryMin;
} else if (d < 256) {
return queryMax + 1;
} else if (d < 768) {
return queryMin - 1;
} else {
return queryMin - 1;
}
}
@Override
public long maxValue(int level) {
int d = doc % 1024;
if (d < 128) {
return queryMax;
} else if (d < 256) {
return queryMax + 1;
} else if (d < 768) {
return queryMin - 1;
} else {
return queryMax + 1;
}
}
@Override
public int docCount(int level) {
int rangeLog = 9 - numLevels() + level;
if (doc < 1024) {
return 1 << rangeLog;
} else {
// half docs have a value
return 1 << rangeLog >> 1;
}
}
@Override
public long minValue() {
return Long.MIN_VALUE;
}
@Override
public long maxValue() {
return Long.MAX_VALUE;
}
@Override
public int docCount() {
return 1024 + 1024 / 2;
}
};
DocValuesRangeIterator rangeIterator =
new DocValuesRangeIterator(twoPhase, skipper, queryMin, queryMax);
DocValuesRangeIterator.Approximation rangeApproximation =
(DocValuesRangeIterator.Approximation) rangeIterator.approximation();
assertEquals(100, rangeApproximation.advance(100));
assertEquals(DocValuesRangeIterator.Match.YES, rangeApproximation.match);
assertEquals(255, rangeApproximation.upTo);
assertTrue(rangeIterator.matches());
assertTrue(values.docID() < rangeApproximation.docID()); // we did not advance doc values
assertFalse(twoPhaseCalled.get());
assertEquals(768, rangeApproximation.advance(300));
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
if (doLevels) {
assertEquals(831, rangeApproximation.upTo);
} else {
assertEquals(1023, rangeApproximation.upTo);
}
for (int i = 0; i < 10; ++i) {
assertEquals(values.docID(), rangeApproximation.docID());
assertEquals(twoPhase.matches(), rangeIterator.matches());
assertTrue(twoPhaseCalled.get());
twoPhaseCalled.set(false);
rangeApproximation.nextDoc();
}
assertEquals(1100, rangeApproximation.advance(1099));
assertEquals(DocValuesRangeIterator.Match.IF_DOC_HAS_VALUE, rangeApproximation.match);
assertEquals(1024 + 256 - 1, rangeApproximation.upTo);
assertEquals(values.docID(), rangeApproximation.docID());
assertTrue(rangeIterator.matches());
assertFalse(twoPhaseCalled.get());
assertEquals(1024 + 768, rangeApproximation.advance(1024 + 300));
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
if (doLevels) {
assertEquals(1024 + 831, rangeApproximation.upTo);
} else {
assertEquals(2047, rangeApproximation.upTo);
}
for (int i = 0; i < 10; ++i) {
assertEquals(values.docID(), rangeApproximation.docID());
assertEquals(twoPhase.matches(), rangeIterator.matches());
assertTrue(twoPhaseCalled.get());
twoPhaseCalled.set(false);
rangeApproximation.nextDoc();
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, rangeApproximation.advance(2048));
}
}

View File

@ -105,6 +105,9 @@ public class TestCheckIndex extends BaseTestCheckIndex {
// doc value
doc.add(new NumericDocValuesField("dv", random().nextLong()));
// doc value with skip index
doc.add(NumericDocValuesField.indexedField("dv_skip", random().nextLong()));
// point value
byte[] point = new byte[4];
NumericUtils.intToSortableBytes(random().nextInt(), point, 0);
@ -154,7 +157,7 @@ public class TestCheckIndex extends BaseTestCheckIndex {
assertNull(segStatus.liveDocStatus.error);
// confirm field infos testing status
assertEquals(8, segStatus.fieldInfoStatus.totFields);
assertEquals(9, segStatus.fieldInfoStatus.totFields);
assertTrue(output.toString(UTF_8).contains("test: field infos"));
assertNull(segStatus.fieldInfoStatus.error);
@ -184,7 +187,8 @@ public class TestCheckIndex extends BaseTestCheckIndex {
assertNull(segStatus.termVectorStatus.error);
// confirm doc values testing status
assertEquals(2, segStatus.docValuesStatus.totalNumericFields);
assertEquals(3, segStatus.docValuesStatus.totalNumericFields);
assertEquals(1, segStatus.docValuesStatus.totalSkippingIndex);
assertTrue(output.toString(UTF_8).contains("test: docvalues"));
assertNull(segStatus.docValuesStatus.error);

View File

@ -106,6 +106,7 @@ public class TestCodecs extends LuceneTestCase {
storePayloads,
indexOptions,
DocValuesType.NONE,
false,
-1,
new HashMap<>(),
0,

View File

@ -250,6 +250,7 @@ public class TestFieldInfos extends LuceneTestCase {
false,
IndexOptions.NONE,
DocValuesType.NONE,
false,
-1,
new HashMap<>(),
0,
@ -271,6 +272,7 @@ public class TestFieldInfos extends LuceneTestCase {
false,
IndexOptions.NONE,
DocValuesType.NONE,
false,
-1,
new HashMap<>(),
0,
@ -294,6 +296,7 @@ public class TestFieldInfos extends LuceneTestCase {
false,
IndexOptions.NONE,
DocValuesType.NONE,
false,
-1,
new HashMap<>(),
0,

View File

@ -58,6 +58,7 @@ public class TestFieldsReader extends LuceneTestCase {
false,
ift.indexOptions(),
ift.docValuesType(),
ift.hasDocValuesSkipIndex(),
-1,
new HashMap<>(),
0,

View File

@ -4976,4 +4976,62 @@ public class TestIndexWriter extends LuceneTestCase {
}
}
}
public void testDocValuesMixedSkippingIndex() throws Exception {
try (Directory dir = newDirectory()) {
try (IndexWriter writer =
new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())))) {
Document doc1 = new Document();
doc1.add(SortedNumericDocValuesField.indexedField("test", random().nextLong()));
writer.addDocument(doc1);
Document doc2 = new Document();
doc2.add(new SortedNumericDocValuesField("test", random().nextLong()));
IllegalArgumentException ex =
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc2));
assertEquals(
"Inconsistency of field data structures across documents for field [test] of doc [1]. doc values skip index: expected 'true', but it has 'false'.",
ex.getMessage());
}
}
try (Directory dir = newDirectory()) {
try (IndexWriter writer =
new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())))) {
Document doc1 = new Document();
doc1.add(new SortedSetDocValuesField("test", TestUtil.randomBinaryTerm(random())));
writer.addDocument(doc1);
Document doc2 = new Document();
doc2.add(SortedSetDocValuesField.indexedField("test", TestUtil.randomBinaryTerm(random())));
IllegalArgumentException ex =
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc2));
assertEquals(
"Inconsistency of field data structures across documents for field [test] of doc [1]. doc values skip index: expected 'false', but it has 'true'.",
ex.getMessage());
}
}
}
public void testDocValuesSkippingIndexWithoutDocValues() throws Exception {
for (DocValuesType docValuesType :
new DocValuesType[] {DocValuesType.NONE, DocValuesType.BINARY}) {
FieldType fieldType = new FieldType();
fieldType.setStored(true);
fieldType.setDocValuesType(docValuesType);
fieldType.setDocValuesSkipIndex(true);
fieldType.freeze();
try (Directory dir = newMockDirectory()) {
try (IndexWriter writer =
new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())))) {
Document doc1 = new Document();
doc1.add(new Field("test", new byte[10], fieldType));
IllegalArgumentException ex =
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc1));
assertTrue(
ex.getMessage()
.startsWith("field 'test' cannot have docValuesSkipIndex set to true"));
}
}
}
}
}

View File

@ -95,6 +95,11 @@ public class TestIndexableField extends LuceneTestCase {
return DocValuesType.NONE;
}
@Override
public boolean hasDocValuesSkipIndex() {
return false;
}
@Override
public int pointDimensionCount() {
return 0;

View File

@ -191,6 +191,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
0,
Collections.emptyMap(),
0,
@ -230,6 +231,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
1,
Collections.emptyMap(),
0,
@ -295,6 +297,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
segmentInfo.getNextDocValuesGen(),
Collections.emptyMap(),
0,
@ -365,6 +368,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
segmentInfo.getNextDocValuesGen(),
Collections.emptyMap(),
0,
@ -403,6 +407,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
segmentInfo.getNextDocValuesGen(),
Collections.emptyMap(),
0,

View File

@ -101,6 +101,11 @@ public class TestSegmentToThreadMapping extends LuceneTestCase {
return null;
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) {
return null;
}
@Override
public PointValues getPointValues(String field) {
return null;

View File

@ -1289,6 +1289,7 @@ public class TestSortOptimization extends LuceneTestCase {
false,
IndexOptions.NONE,
fi.getDocValuesType(),
fi.hasDocValuesSkipIndex(),
fi.getDocValuesGen(),
fi.attributes(),
0,

View File

@ -21,6 +21,7 @@ import java.util.Collections;
import java.util.Iterator;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -95,6 +96,7 @@ public class TermVectorLeafReader extends LeafReader {
terms.hasPayloads(),
indexOptions,
DocValuesType.NONE,
false,
-1,
Collections.emptyMap(),
0,
@ -141,6 +143,11 @@ public class TermVectorLeafReader extends LeafReader {
return null;
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
return null;
}
@Override
public NumericDocValues getNormValues(String field) throws IOException {
return null; // Is this needed? See MemoryIndex for a way to do it.

View File

@ -728,6 +728,7 @@ public class MemoryIndex {
storePayloads,
indexOptions,
fieldType.docValuesType(),
false,
-1,
Collections.emptyMap(),
fieldType.pointDimensionCount(),
@ -782,6 +783,7 @@ public class MemoryIndex {
info.fieldInfo.hasPayloads(),
info.fieldInfo.getIndexOptions(),
docValuesType,
false,
-1,
info.fieldInfo.attributes(),
info.fieldInfo.getPointDimensionCount(),
@ -1622,6 +1624,12 @@ public class MemoryIndex {
}
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
// Skipping isn't needed on a 1-doc index.
return null;
}
@Override
public PointValues getPointValues(String fieldName) {
Info info = fields.get(fieldName);

View File

@ -23,6 +23,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
@ -280,6 +281,14 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
return AssertingLeafReader.AssertingSortedSetDocValues.create(values, maxDoc);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
assert field.hasDocValuesSkipIndex();
DocValuesSkipper skipper = in.getSkipper(field);
assert skipper != null;
return new AssertingLeafReader.AssertingDocValuesSkipper(skipper);
}
@Override
public void close() throws IOException {
in.close();

View File

@ -23,6 +23,7 @@ import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
@ -1155,6 +1156,109 @@ public class AssertingLeafReader extends FilterLeafReader {
}
}
/** Wraps a DocValuesSkipper but with additional asserts */
public static class AssertingDocValuesSkipper extends DocValuesSkipper {
private final Thread creationThread = Thread.currentThread();
private final DocValuesSkipper in;
/** Sole constructor */
public AssertingDocValuesSkipper(DocValuesSkipper in) {
this.in = in;
assert minDocID(0) == -1;
assert maxDocID(0) == -1;
}
@Override
public void advance(int target) throws IOException {
assertThread("Doc values skipper", creationThread);
assert target > maxDocID(0)
: "Illegal to call advance() on a target that is not beyond the current interval";
in.advance(target);
assert in.minDocID(0) <= in.maxDocID(0);
}
private boolean iterating() {
return maxDocID(0) != -1
&& minDocID(0) != -1
&& maxDocID(0) != DocIdSetIterator.NO_MORE_DOCS
&& minDocID(0) != DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int numLevels() {
assertThread("Doc values skipper", creationThread);
return in.numLevels();
}
@Override
public int minDocID(int level) {
assertThread("Doc values skipper", creationThread);
Objects.checkIndex(level, numLevels());
int minDocID = in.minDocID(level);
assert minDocID <= in.maxDocID(level);
if (level > 0) {
assert minDocID <= in.minDocID(level - 1);
}
return minDocID;
}
@Override
public int maxDocID(int level) {
assertThread("Doc values skipper", creationThread);
Objects.checkIndex(level, numLevels());
int maxDocID = in.maxDocID(level);
assert maxDocID >= in.minDocID(level);
if (level > 0) {
assert maxDocID >= in.maxDocID(level - 1);
}
return maxDocID;
}
@Override
public long minValue(int level) {
assertThread("Doc values skipper", creationThread);
assert iterating() : "Unpositioned iterator";
Objects.checkIndex(level, numLevels());
return in.minValue(level);
}
@Override
public long maxValue(int level) {
assertThread("Doc values skipper", creationThread);
assert iterating() : "Unpositioned iterator";
Objects.checkIndex(level, numLevels());
return in.maxValue(level);
}
@Override
public int docCount(int level) {
assertThread("Doc values skipper", creationThread);
assert iterating() : "Unpositioned iterator";
Objects.checkIndex(level, numLevels());
return in.docCount(level);
}
@Override
public long minValue() {
assertThread("Doc values skipper", creationThread);
return in.minValue();
}
@Override
public long maxValue() {
assertThread("Doc values skipper", creationThread);
return in.maxValue();
}
@Override
public int docCount() {
assertThread("Doc values skipper", creationThread);
return in.docCount();
}
}
/** Wraps a SortedSetDocValues but with additional asserts */
public static class AssertingPointValues extends PointValues {
private final Thread creationThread = Thread.currentThread();
@ -1483,6 +1587,19 @@ public class AssertingLeafReader extends FilterLeafReader {
}
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
DocValuesSkipper skipper = super.getDocValuesSkipper(field);
FieldInfo fi = getFieldInfos().fieldInfo(field);
if (skipper != null) {
assert fi.hasDocValuesSkipIndex();
return new AssertingDocValuesSkipper(skipper);
} else {
assert fi == null || fi.hasDocValuesSkipIndex() == false;
return null;
}
}
@Override
public NumericDocValues getNormValues(String field) throws IOException {
NumericDocValues dv = super.getNormValues(field);

View File

@ -56,6 +56,7 @@ import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@ -101,6 +102,24 @@ import org.apache.lucene.util.automaton.RegExp;
*/
public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTestCase {
/**
* Override and return {@code false} if the {@link DocValuesSkipper} produced by this format
* sometimes returns documents in {@link DocValuesSkipper#minDocID(int)} or {@link
* DocValuesSkipper#maxDocID(int)} that may not have a value.
*/
protected boolean skipperHasAccurateDocBounds() {
return true;
}
/**
* Override and return {@code false} if the {@link DocValuesSkipper} produced by this format
* sometimes returns values in {@link DocValuesSkipper#minValue(int)} or {@link
* DocValuesSkipper#maxValue(int)} that none of the documents in the range have.
*/
protected boolean skipperHasAccurateValueBounds() {
return true;
}
@Override
protected void addRandomFields(Document doc) {
if (usually()) {
@ -889,7 +908,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedDocValuesField("field", newBytesRef("hello")));
doc.add(SortedDocValuesField.indexedField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
@ -901,6 +920,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
assertEquals(0, skipper.docCount());
skipper.advance(0);
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
TermsEnum termsEnum = dv.termsEnum();
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
@ -2234,7 +2258,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
doc.add(SortedSetDocValuesField.indexedField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
@ -2246,6 +2270,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.getValueCount());
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
assertEquals(0, skipper.docCount());
skipper.advance(0);
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
TermsEnum termsEnum = dv.termsEnum();
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
@ -3211,7 +3240,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new NumericDocValuesField("field", 5));
doc.add(NumericDocValuesField.indexedField("field", 5));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
@ -3223,6 +3252,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
assertEquals(0, skipper.docCount());
skipper.advance(0);
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
ireader.close();
directory.close();
}
@ -3337,7 +3371,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedNumericDocValuesField("field", 5));
doc.add(SortedNumericDocValuesField.indexedField("field", 5));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.deleteDocuments(new Term("id", "1"));
@ -3349,6 +3383,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedNumericDocValues dv = getOnlyLeafReader(ireader).getSortedNumericDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
assertEquals(0, skipper.docCount());
skipper.advance(0);
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
ireader.close();
directory.close();
}
@ -3499,7 +3538,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedDocValuesField("field", newBytesRef("hello")));
doc.add(SortedDocValuesField.indexedField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
@ -3515,6 +3554,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
assertEquals(0, skipper.docCount());
skipper.advance(0);
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
TermsEnum termsEnum = dv.termsEnum();
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
@ -3534,7 +3578,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
doc.add(SortedSetDocValuesField.indexedField("field", newBytesRef("hello")));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
@ -3550,6 +3594,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
assertEquals(0, skipper.docCount());
skipper.advance(0);
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
TermsEnum termsEnum = dv.termsEnum();
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
@ -3569,7 +3618,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new NumericDocValuesField("field", 42L));
doc.add(NumericDocValuesField.indexedField("field", 42L));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
@ -3585,6 +3634,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
assertEquals(0, skipper.docCount());
skipper.advance(0);
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
ireader.close();
directory.close();
}
@ -3600,7 +3654,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
Document doc = new Document();
doc.add(new StringField("id", "1", Field.Store.NO));
doc.add(new SortedNumericDocValuesField("field", 42L));
doc.add(SortedNumericDocValuesField.indexedField("field", 42L));
iwriter.addDocument(doc);
final int numEmptyDocs = atLeast(1024);
for (int i = 0; i < numEmptyDocs; ++i) {
@ -3616,6 +3670,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
SortedNumericDocValues dv = getOnlyLeafReader(ireader).getSortedNumericDocValues("field");
assertEquals(NO_MORE_DOCS, dv.nextDoc());
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
assertEquals(0, skipper.docCount());
skipper.advance(0);
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
ireader.close();
directory.close();
}
@ -3807,4 +3866,457 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
protected boolean codecAcceptsHugeBinaryValues(String field) {
return true;
}
public void testNumericDocValuesWithSkipperSmall() throws Exception {
doTestNumericDocValuesWithSkipper(random().nextInt(1, 1000));
}
public void testNumericDocValuesWithSkipperMedium() throws Exception {
doTestNumericDocValuesWithSkipper(random().nextInt(1000, 20000));
}
@Nightly
public void testNumericDocValuesWithSkipperBig() throws Exception {
doTestNumericDocValuesWithSkipper(random().nextInt(50000, 100000));
}
private void doTestNumericDocValuesWithSkipper(int totalDocs) throws Exception {
assertDocValuesWithSkipper(
totalDocs,
new TestDocValueSkipper() {
@Override
public void populateDoc(Document doc) {
doc.add(NumericDocValuesField.indexedField("test", random().nextLong()));
}
@Override
public DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException {
NumericDocValues numericDocValues = leafReader.getNumericDocValues("test");
return new DocValuesWrapper() {
@Override
public int advance(int target) throws IOException {
return numericDocValues.advance(target);
}
@Override
public boolean advanceExact(int target) throws IOException {
return numericDocValues.advanceExact(target);
}
@Override
public long maxValue() throws IOException {
return numericDocValues.longValue();
}
@Override
public long minValue() throws IOException {
return numericDocValues.longValue();
}
@Override
public int docID() {
return numericDocValues.docID();
}
};
}
@Override
public DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException {
return leafReader.getDocValuesSkipper("test");
}
});
}
public void testSortedNumericDocValuesWithSkipperSmall() throws Exception {
doTestSortedNumericDocValuesWithSkipper(random().nextInt(1, 1000));
}
public void testSortedNumericDocValuesWithSkipperMedium() throws Exception {
doTestSortedNumericDocValuesWithSkipper(random().nextInt(1000, 20000));
}
@Nightly
public void testSortedNumericDocValuesWithSkipperBig() throws Exception {
doTestSortedNumericDocValuesWithSkipper(random().nextInt(50000, 100000));
}
private void doTestSortedNumericDocValuesWithSkipper(int totalDocs) throws Exception {
assertDocValuesWithSkipper(
totalDocs,
new TestDocValueSkipper() {
@Override
public void populateDoc(Document doc) {
for (int j = 0; j < random().nextInt(1, 5); j++) {
doc.add(SortedNumericDocValuesField.indexedField("test", random().nextLong()));
}
}
@Override
public DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException {
SortedNumericDocValues sortedNumericDocValues =
leafReader.getSortedNumericDocValues("test");
return new DocValuesWrapper() {
long max;
long min;
@Override
public int advance(int target) throws IOException {
int doc = sortedNumericDocValues.advance(target);
if (doc != NO_MORE_DOCS) {
readValues();
}
return doc;
}
@Override
public boolean advanceExact(int target) throws IOException {
if (sortedNumericDocValues.advanceExact(target)) {
readValues();
return true;
}
return false;
}
private void readValues() throws IOException {
max = Long.MIN_VALUE;
min = Long.MAX_VALUE;
for (int i = 0; i < sortedNumericDocValues.docValueCount(); i++) {
long value = sortedNumericDocValues.nextValue();
max = Math.max(max, value);
min = Math.min(min, value);
}
}
@Override
public long maxValue() {
return max;
}
@Override
public long minValue() {
return min;
}
@Override
public int docID() {
return sortedNumericDocValues.docID();
}
};
}
@Override
public DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException {
return leafReader.getDocValuesSkipper("test");
}
});
}
public void testSortedDocValuesWithSkipperSmall() throws Exception {
doTestSortedDocValuesWithSkipper(random().nextInt(1, 1000));
}
public void testSortedDocValuesWithSkipperMedium() throws Exception {
doTestSortedDocValuesWithSkipper(random().nextInt(1000, 20000));
}
@Nightly
public void testSortedDocValuesWithSkipperBig() throws Exception {
doTestSortedDocValuesWithSkipper(random().nextInt(50000, 100000));
}
private void doTestSortedDocValuesWithSkipper(int totalDocs) throws Exception {
assertDocValuesWithSkipper(
totalDocs,
new TestDocValueSkipper() {
@Override
public void populateDoc(Document doc) {
doc.add(SortedDocValuesField.indexedField("test", TestUtil.randomBinaryTerm(random())));
}
@Override
public DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException {
SortedDocValues sortedDocValues = leafReader.getSortedDocValues("test");
return new DocValuesWrapper() {
@Override
public int advance(int target) throws IOException {
return sortedDocValues.advance(target);
}
@Override
public boolean advanceExact(int target) throws IOException {
return sortedDocValues.advanceExact(target);
}
@Override
public long maxValue() throws IOException {
return sortedDocValues.ordValue();
}
@Override
public long minValue() throws IOException {
return sortedDocValues.ordValue();
}
@Override
public int docID() {
return sortedDocValues.docID();
}
};
}
@Override
public DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException {
return leafReader.getDocValuesSkipper("test");
}
});
}
public void testSortedSetDocValuesWithSkipperSmall() throws Exception {
doTestSortedSetDocValuesWithSkipper(random().nextInt(1, 1000));
}
public void testSortedSetDocValuesWithSkipperMedium() throws Exception {
doTestSortedSetDocValuesWithSkipper(random().nextInt(10000, 20000));
}
@Nightly
public void testSortedSetDocValuesWithSkipperBig() throws Exception {
doTestSortedSetDocValuesWithSkipper(random().nextInt(50000, 100000));
}
private void doTestSortedSetDocValuesWithSkipper(int totalDocs) throws Exception {
assertDocValuesWithSkipper(
totalDocs,
new TestDocValueSkipper() {
@Override
public void populateDoc(Document doc) {
for (int j = 0; j < random().nextInt(1, 5); j++) {
doc.add(
SortedSetDocValuesField.indexedField(
"test", TestUtil.randomBinaryTerm(random())));
}
}
@Override
public DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException {
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("test");
return new DocValuesWrapper() {
long max;
long min;
@Override
public int advance(int target) throws IOException {
int doc = sortedSetDocValues.advance(target);
if (doc != NO_MORE_DOCS) {
readValues();
}
return doc;
}
@Override
public boolean advanceExact(int target) throws IOException {
if (sortedSetDocValues.advanceExact(target)) {
readValues();
return true;
}
return false;
}
private void readValues() throws IOException {
max = Long.MIN_VALUE;
min = Long.MAX_VALUE;
for (int i = 0; i < sortedSetDocValues.docValueCount(); i++) {
long value = sortedSetDocValues.nextOrd();
max = Math.max(max, value);
min = Math.min(min, value);
}
}
@Override
public long maxValue() {
return max;
}
@Override
public long minValue() {
return min;
}
@Override
public int docID() {
return sortedSetDocValues.docID();
}
};
}
@Override
public DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException {
return leafReader.getDocValuesSkipper("test");
}
});
}
private void assertDocValuesWithSkipper(int totalDocs, TestDocValueSkipper testDocValueSkipper)
throws Exception {
Supplier<Boolean> booleanSupplier;
switch (random().nextInt(3)) {
case 0 -> booleanSupplier = () -> true;
case 1 -> booleanSupplier = () -> random().nextBoolean();
case 2 -> booleanSupplier = () -> random().nextBoolean() && random().nextBoolean();
default -> throw new AssertionError();
}
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
int numDocs = 0;
for (int i = 0; i < totalDocs; i++) {
Document doc = new Document();
if (booleanSupplier.get()) {
testDocValueSkipper.populateDoc(doc);
numDocs++;
}
writer.addDocument(doc);
if (rarely()) {
writer.commit();
}
}
writer.flush();
if (random().nextBoolean()) {
writer.forceMerge(1);
}
IndexReader r = writer.getReader();
int readDocs = 0;
for (LeafReaderContext readerContext : r.leaves()) {
LeafReader reader = readerContext.reader();
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
PrintStream infoStream = new PrintStream(bos, false, UTF_8);
DocValuesStatus status = CheckIndex.testDocValues((CodecReader) reader, infoStream, true);
if (status.error != null) {
throw new Exception(status.error);
}
readDocs +=
assertDocValuesSkipSequential(
testDocValueSkipper.docValuesWrapper(reader),
testDocValueSkipper.docValuesSkipper(reader));
for (int i = 0; i < 10; i++) {
assertDocValuesSkipRandom(
testDocValueSkipper.docValuesWrapper(reader),
testDocValueSkipper.docValuesSkipper(reader),
reader.maxDoc());
}
}
assertEquals(numDocs, readDocs);
IOUtils.close(r, writer, directory);
}
private int assertDocValuesSkipSequential(DocValuesWrapper iterator, DocValuesSkipper skipper)
throws IOException {
if (skipper == null) {
return 0;
}
assertEquals(-1, iterator.docID());
assertEquals(-1, skipper.minDocID(0));
assertEquals(-1, skipper.maxDocID(0));
iterator.advance(0);
int docCount = 0;
while (true) {
int previousMaxDoc = skipper.maxDocID(0);
skipper.advance(previousMaxDoc + 1);
assertTrue(skipper.minDocID(0) > previousMaxDoc);
if (skipperHasAccurateDocBounds()) {
assertEquals(iterator.docID(), skipper.minDocID(0));
} else {
assertTrue(
"Expected: " + iterator.docID() + " but got " + skipper.minDocID(0),
skipper.minDocID(0) <= iterator.docID());
}
if (skipper.minDocID(0) == NO_MORE_DOCS) {
assertEquals(NO_MORE_DOCS, skipper.maxDocID(0));
break;
}
assertTrue(skipper.docCount(0) > 0);
int maxDoc = -1;
long minVal = Long.MAX_VALUE;
long maxVal = Long.MIN_VALUE;
for (int i = 0; i < skipper.docCount(0); ++i) {
assertNotEquals(NO_MORE_DOCS, iterator.docID());
maxDoc = Math.max(maxDoc, iterator.docID());
minVal = Math.min(minVal, iterator.minValue());
maxVal = Math.max(maxVal, iterator.maxValue());
iterator.advance(iterator.docID() + 1);
}
if (skipperHasAccurateDocBounds()) {
assertEquals(maxDoc, skipper.maxDocID(0));
} else {
assertTrue(
"Expected: " + maxDoc + " but got " + skipper.maxDocID(0),
skipper.maxDocID(0) >= maxDoc);
}
if (skipperHasAccurateValueBounds()) {
assertEquals(minVal, skipper.minValue(0));
assertEquals(maxVal, skipper.maxValue(0));
} else {
assertTrue(
"Expected: " + minVal + " but got " + skipper.minValue(0),
minVal >= skipper.minValue(0));
assertTrue(
"Expected: " + maxVal + " but got " + skipper.maxValue(0),
maxVal <= skipper.maxValue(0));
}
docCount += skipper.docCount(0);
}
assertEquals(docCount, skipper.docCount());
return docCount;
}
private static void assertDocValuesSkipRandom(
DocValuesWrapper iterator, DocValuesSkipper skipper, int maxDoc) throws IOException {
if (skipper == null) {
return;
}
while (true) {
int doc = random().nextInt(skipper.maxDocID(0), maxDoc + 1) + 1;
skipper.advance(doc);
if (skipper.minDocID(0) == NO_MORE_DOCS) {
assertEquals(NO_MORE_DOCS, skipper.maxDocID(0));
return;
}
if (iterator.advanceExact(doc)) {
assertTrue(iterator.docID() >= skipper.minDocID(0));
assertTrue(iterator.docID() <= skipper.maxDocID(0));
assertTrue(iterator.minValue() >= skipper.minValue(0));
assertTrue(iterator.maxValue() <= skipper.maxValue(0));
}
}
}
private interface TestDocValueSkipper {
void populateDoc(Document doc);
DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException;
DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException;
}
private interface DocValuesWrapper {
int advance(int target) throws IOException;
boolean advanceExact(int target) throws IOException;
long maxValue() throws IOException;
long minValue() throws IOException;
int docID();
}
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.tests.index;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import java.io.IOException;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@ -295,6 +296,15 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
storePayloads = random().nextBoolean();
}
}
boolean hasDocValuesSkipIndex = false;
if (EnumSet.of(
DocValuesType.NUMERIC,
DocValuesType.SORTED,
DocValuesType.SORTED_NUMERIC,
DocValuesType.SORTED_SET)
.contains(fieldType.docValuesType())) {
hasDocValuesSkipIndex = fieldType.hasDocValuesSkipIndex();
}
FieldInfo fi =
new FieldInfo(
field,
@ -304,6 +314,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
storePayloads,
fieldType.indexOptions(),
fieldType.docValuesType(),
hasDocValuesSkipIndex,
-1,
new HashMap<>(),
fieldType.pointDimensionCount(),
@ -349,8 +360,15 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
}
if (r.nextBoolean()) {
DocValuesType values[] = DocValuesType.values();
DocValuesType[] values = DocValuesType.values();
DocValuesType current = values[r.nextInt(values.length)];
type.setDocValuesType(values[r.nextInt(values.length)]);
if (current == DocValuesType.NUMERIC
|| current == DocValuesType.SORTED_NUMERIC
|| current == DocValuesType.SORTED
|| current == DocValuesType.SORTED_SET) {
type.setDocValuesSkipIndex(random().nextBoolean());
}
}
if (r.nextBoolean()) {
@ -389,6 +407,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
assertEquals(expected.number, actual.number);
assertEquals(expected.name, actual.name);
assertEquals(expected.getDocValuesType(), actual.getDocValuesType());
assertEquals(expected.hasDocValuesSkipIndex(), actual.hasDocValuesSkipIndex());
assertEquals(expected.getIndexOptions(), actual.getIndexOptions());
assertEquals(expected.hasNorms(), actual.hasNorms());
assertEquals(expected.hasPayloads(), actual.hasPayloads());
@ -429,6 +448,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
false,
TextField.TYPE_STORED.indexOptions(),
DocValuesType.NONE,
false,
-1,
new HashMap<>(),
0,

View File

@ -357,6 +357,7 @@ public abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
proto.hasPayloads(),
proto.getIndexOptions(),
proto.getDocValuesType(),
proto.hasDocValuesSkipIndex(),
proto.getDocValuesGen(),
new HashMap<>(),
proto.getPointDimensionCount(),

View File

@ -26,6 +26,7 @@ import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -187,6 +188,17 @@ class MergeReaderWrapper extends LeafReader {
return norms.getNorms(fi);
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
ensureOpen();
FieldInfo fi = getFieldInfos().fieldInfo(field);
if (fi == null) {
// Field does not exist
return null;
}
return docValues.getSkipper(fi);
}
@Override
public FieldInfos getFieldInfos() {
return in.getFieldInfos();

View File

@ -103,6 +103,7 @@ public class MismatchedLeafReader extends FilterLeafReader {
oldInfo.hasPayloads(), // storePayloads
oldInfo.getIndexOptions(), // indexOptions
oldInfo.getDocValuesType(), // docValuesType
oldInfo.hasDocValuesSkipIndex(), // hasDocValuesSkipIndex
oldInfo.getDocValuesGen(), // dvGen
oldInfo.attributes(), // attributes
oldInfo.getPointDimensionCount(), // data dimension count

View File

@ -157,6 +157,7 @@ public class RandomPostingsTester {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
-1,
new HashMap<>(),
0,
@ -731,6 +732,7 @@ public class RandomPostingsTester {
doPayloads,
indexOptions,
DocValuesType.NONE,
false,
-1,
new HashMap<>(),
0,

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.Random;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexReader;
@ -227,6 +228,11 @@ public class QueryUtils {
return null;
}
@Override
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
return null;
}
@Override
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
return null;