mirror of https://github.com/apache/lucene.git
GITHUB#13449: Sparse index, optional skip list on top of doc values (#13449)
Optional skip list on top of doc values which is exposed via the DocValuesSkipper abstraction. A new flag is added to FieldType.java that configures whether to create a "skip index" for doc values. Co-authored-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
parent
1c655823dd
commit
048770205c
|
@ -129,6 +129,10 @@ New Features
|
|||
|
||||
* GITHUB#13233: Add RomanianNormalizationFilter (Trey Jones, Robert Muir)
|
||||
|
||||
* GITHUB#13449: Sparse index: optional skip list on top of doc values which is exposed via the
|
||||
DocValuesSkipper abstraction. A new flag is added to FieldType.java that configures whether
|
||||
to create a "skip index" for doc values. (Ignacio Vera)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -209,6 +209,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
|||
storePayloads,
|
||||
indexOptions,
|
||||
docValuesType,
|
||||
false,
|
||||
dvGen,
|
||||
attributes,
|
||||
pointDataDimensionCount,
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.BaseTermsEnum;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
|
@ -1677,6 +1678,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
CodecUtil.checksumEntireFile(data);
|
||||
|
|
|
@ -186,6 +186,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
|
|||
storePayloads,
|
||||
indexOptions,
|
||||
docValuesType,
|
||||
false,
|
||||
dvGen,
|
||||
attributes,
|
||||
pointDataDimensionCount,
|
||||
|
|
|
@ -16,13 +16,16 @@
|
|||
*/
|
||||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.DOCCOUNT;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXVALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORIGIN;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE;
|
||||
|
||||
|
@ -40,6 +43,7 @@ import java.util.function.IntFunction;
|
|||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -59,12 +63,15 @@ import org.apache.lucene.util.StringHelper;
|
|||
class SimpleTextDocValuesReader extends DocValuesProducer {
|
||||
|
||||
static class OneField {
|
||||
int docCount;
|
||||
long dataStartFilePointer;
|
||||
String pattern;
|
||||
String ordPattern;
|
||||
int maxLength;
|
||||
boolean fixedLength;
|
||||
long origin;
|
||||
long minValue;
|
||||
long maxValue;
|
||||
long numValues;
|
||||
}
|
||||
|
||||
|
@ -99,17 +106,34 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
|
|||
|
||||
DocValuesType dvType = DocValuesType.valueOf(stripPrefix(TYPE));
|
||||
assert dvType != DocValuesType.NONE;
|
||||
if (dvType == DocValuesType.NUMERIC) {
|
||||
|
||||
if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED_NUMERIC) {
|
||||
readLine();
|
||||
assert startsWith(MINVALUE)
|
||||
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
|
||||
field.minValue = Long.parseLong(stripPrefix(MINVALUE));
|
||||
readLine();
|
||||
assert startsWith(MAXVALUE)
|
||||
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
|
||||
field.maxValue = Long.parseLong(stripPrefix(MAXVALUE));
|
||||
}
|
||||
|
||||
readLine();
|
||||
assert startsWith(DOCCOUNT)
|
||||
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
|
||||
field.docCount = Integer.parseInt(stripPrefix(DOCCOUNT));
|
||||
|
||||
if (dvType == DocValuesType.NUMERIC) {
|
||||
readLine();
|
||||
assert startsWith(ORIGIN)
|
||||
: "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
|
||||
field.origin = Long.parseLong(stripPrefix(ORIGIN));
|
||||
readLine();
|
||||
assert startsWith(PATTERN);
|
||||
field.pattern = stripPrefix(PATTERN);
|
||||
field.dataStartFilePointer = data.getFilePointer();
|
||||
data.seek(data.getFilePointer() + (1 + field.pattern.length() + 2) * (long) maxDoc);
|
||||
} else if (dvType == DocValuesType.BINARY) {
|
||||
} else if (dvType == DocValuesType.BINARY || dvType == DocValuesType.SORTED_NUMERIC) {
|
||||
readLine();
|
||||
assert startsWith(MAXLENGTH);
|
||||
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
|
||||
|
@ -225,7 +249,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
|
|||
throw new CorruptIndexException("failed to parse BigDecimal value", in, pe);
|
||||
}
|
||||
SimpleTextUtil.readLine(in, scratch); // read the line telling us if it's real or not
|
||||
return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
|
||||
return BigInteger.valueOf(field.origin).add(bd.toBigIntegerExact()).longValue();
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
|
@ -824,4 +848,82 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo fieldInfo) {
|
||||
final boolean numeric =
|
||||
fieldInfo.getDocValuesType() == DocValuesType.NUMERIC
|
||||
|| fieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
|
||||
final OneField field = fields.get(fieldInfo.name);
|
||||
|
||||
// SegmentCoreReaders already verifies this field is
|
||||
// valid:
|
||||
assert field != null;
|
||||
|
||||
return new DocValuesSkipper() {
|
||||
int doc = -1;
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue(int level) {
|
||||
return minValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue(int level) {
|
||||
return maxValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount(int level) {
|
||||
return docCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() {
|
||||
return numeric ? field.minValue : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() {
|
||||
return numeric ? field.maxValue : field.numValues - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount() {
|
||||
return field.docCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minDocID(int level) {
|
||||
if (doc == -1) {
|
||||
return -1;
|
||||
} else if (doc >= maxDoc || field.docCount == 0) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDocID(int level) {
|
||||
if (doc == -1) {
|
||||
return -1;
|
||||
} else if (doc >= maxDoc || field.docCount == 0) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
return maxDoc;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void advance(int target) {
|
||||
doc = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,8 +46,13 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
static final BytesRef END = new BytesRef("END");
|
||||
static final BytesRef FIELD = new BytesRef("field ");
|
||||
static final BytesRef TYPE = new BytesRef(" type ");
|
||||
static final BytesRef DOCCOUNT = new BytesRef(" doccount ");
|
||||
// used for numerics
|
||||
static final BytesRef MINVALUE = new BytesRef(" minvalue ");
|
||||
static final BytesRef ORIGIN = new BytesRef(" origin "); // for deltas
|
||||
|
||||
static final BytesRef MINVALUE = new BytesRef(" minalue ");
|
||||
static final BytesRef MAXVALUE = new BytesRef(" maxvalue ");
|
||||
|
||||
static final BytesRef PATTERN = new BytesRef(" pattern ");
|
||||
// used for bytes
|
||||
static final BytesRef LENGTH = new BytesRef("length ");
|
||||
|
@ -97,13 +102,27 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
maxValue = Math.max(maxValue, v);
|
||||
numValues++;
|
||||
}
|
||||
|
||||
// write absolute min and max for skipper
|
||||
SimpleTextUtil.write(data, MINVALUE);
|
||||
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
SimpleTextUtil.write(data, MAXVALUE);
|
||||
SimpleTextUtil.write(data, Long.toString(maxValue), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
SimpleTextUtil.write(data, DOCCOUNT);
|
||||
SimpleTextUtil.write(data, Integer.toString(numValues), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
if (numValues != numDocs) {
|
||||
minValue = Math.min(minValue, 0);
|
||||
maxValue = Math.max(maxValue, 0);
|
||||
}
|
||||
|
||||
// write our minimum value to the .dat, all entries are deltas from that
|
||||
SimpleTextUtil.write(data, MINVALUE);
|
||||
SimpleTextUtil.write(data, ORIGIN);
|
||||
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
|
@ -161,6 +180,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
|
||||
assert fieldSeen(field.name);
|
||||
assert field.getDocValuesType() == DocValuesType.BINARY;
|
||||
writeFieldEntry(field, DocValuesType.BINARY);
|
||||
doAddBinaryField(field, valuesProducer);
|
||||
}
|
||||
|
||||
|
@ -168,10 +188,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
throws IOException {
|
||||
int maxLength = 0;
|
||||
BinaryDocValues values = valuesProducer.getBinary(field);
|
||||
int docCount = 0;
|
||||
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
|
||||
++docCount;
|
||||
maxLength = Math.max(maxLength, values.binaryValue().toString().length());
|
||||
}
|
||||
writeFieldEntry(field, DocValuesType.BINARY);
|
||||
|
||||
SimpleTextUtil.write(data, DOCCOUNT);
|
||||
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
// write maxLength
|
||||
SimpleTextUtil.write(data, MAXLENGTH);
|
||||
|
@ -232,6 +257,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
assert field.getDocValuesType() == DocValuesType.SORTED;
|
||||
writeFieldEntry(field, DocValuesType.SORTED);
|
||||
|
||||
int docCount = 0;
|
||||
SortedDocValues values = valuesProducer.getSorted(field);
|
||||
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
|
||||
++docCount;
|
||||
}
|
||||
SimpleTextUtil.write(data, DOCCOUNT);
|
||||
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
int valueCount = 0;
|
||||
int maxLength = -1;
|
||||
TermsEnum terms = valuesProducer.getSorted(field).termsEnum();
|
||||
|
@ -301,7 +335,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
|
||||
assert valuesSeen == valueCount;
|
||||
|
||||
SortedDocValues values = valuesProducer.getSorted(field);
|
||||
values = valuesProducer.getSorted(field);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
if (values.docID() < i) {
|
||||
values.nextDoc();
|
||||
|
@ -321,6 +355,28 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
throws IOException {
|
||||
assert fieldSeen(field.name);
|
||||
assert field.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
|
||||
writeFieldEntry(field, DocValuesType.SORTED_NUMERIC);
|
||||
|
||||
long minValue = Long.MAX_VALUE;
|
||||
long maxValue = Long.MIN_VALUE;
|
||||
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
|
||||
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
|
||||
for (int i = 0; i < values.docValueCount(); ++i) {
|
||||
long v = values.nextValue();
|
||||
minValue = Math.min(minValue, v);
|
||||
maxValue = Math.max(maxValue, v);
|
||||
}
|
||||
}
|
||||
|
||||
// write absolute min and max for skipper
|
||||
SimpleTextUtil.write(data, MINVALUE);
|
||||
SimpleTextUtil.write(data, Long.toString(minValue), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
SimpleTextUtil.write(data, MAXVALUE);
|
||||
SimpleTextUtil.write(data, Long.toString(maxValue), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
doAddBinaryField(
|
||||
field,
|
||||
new EmptyDocValuesProducer() {
|
||||
|
@ -395,6 +451,15 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
assert field.getDocValuesType() == DocValuesType.SORTED_SET;
|
||||
writeFieldEntry(field, DocValuesType.SORTED_SET);
|
||||
|
||||
int docCount = 0;
|
||||
SortedSetDocValues values = valuesProducer.getSortedSet(field);
|
||||
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
|
||||
++docCount;
|
||||
}
|
||||
SimpleTextUtil.write(data, DOCCOUNT);
|
||||
SimpleTextUtil.write(data, Integer.toString(docCount), scratch);
|
||||
SimpleTextUtil.writeNewline(data);
|
||||
|
||||
long valueCount = 0;
|
||||
int maxLength = 0;
|
||||
TermsEnum terms = valuesProducer.getSortedSet(field).termsEnum();
|
||||
|
@ -430,7 +495,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer {
|
|||
// length
|
||||
int maxOrdListLength = 0;
|
||||
StringBuilder sb2 = new StringBuilder();
|
||||
SortedSetDocValues values = valuesProducer.getSortedSet(field);
|
||||
values = valuesProducer.getSortedSet(field);
|
||||
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
|
||||
sb2.setLength(0);
|
||||
for (int i = 0; i < values.docValueCount(); i++) {
|
||||
|
|
|
@ -60,6 +60,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
|
||||
static final BytesRef NORMS = new BytesRef(" norms ");
|
||||
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
|
||||
static final BytesRef DOCVALUES_SKIP_INDEX = new BytesRef(" doc values skip index");
|
||||
static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
|
||||
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
|
||||
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
|
||||
|
@ -122,6 +123,11 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
String dvType = readString(DOCVALUES.length, scratch);
|
||||
final DocValuesType docValuesType = docValuesType(dvType);
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
|
||||
boolean docValueSkipper =
|
||||
Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
|
||||
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
|
||||
|
@ -184,6 +190,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
storePayloads,
|
||||
indexOptions,
|
||||
docValuesType,
|
||||
docValueSkipper,
|
||||
dvGen,
|
||||
Collections.unmodifiableMap(atts),
|
||||
dimensionalCount,
|
||||
|
@ -276,6 +283,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
|||
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
|
||||
SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, DOCVALUES_GEN);
|
||||
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
|
|
@ -37,6 +37,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
public class TestSimpleTextDocValuesFormat extends BaseDocValuesFormatTestCase {
|
||||
private final Codec codec = new SimpleTextCodec();
|
||||
|
||||
@Override
|
||||
protected boolean skipperHasAccurateDocBounds() {
|
||||
// This format always returns minDocID = 0 and maxDocID = maxDoc - 1
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
|
|
|
@ -111,6 +111,7 @@ public class TestBlockWriter extends LuceneTestCase {
|
|||
true,
|
||||
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
Collections.emptyMap(),
|
||||
0,
|
||||
|
|
|
@ -198,6 +198,7 @@ public class TestSTBlockReader extends LuceneTestCase {
|
|||
true,
|
||||
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
Collections.emptyMap(),
|
||||
0,
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
|
@ -73,6 +74,13 @@ public abstract class DocValuesProducer implements Closeable {
|
|||
*/
|
||||
public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns a {@link DocValuesSkipper} for this field. The returned instance need not be
|
||||
* thread-safe: it will only be used by a single thread. The return value is undefined if {@link
|
||||
* FieldInfo#hasDocValuesSkipIndex()} doesn't return {@code true}.
|
||||
*/
|
||||
public abstract DocValuesSkipper getSkipper(FieldInfo field) throws IOException;
|
||||
|
||||
/**
|
||||
* Checks consistency of this producer
|
||||
*
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene90;
|
|||
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SIZE;
|
||||
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_SIZE;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -43,6 +44,7 @@ import org.apache.lucene.search.SortedSetSelector;
|
|||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.ByteBuffersDataOutput;
|
||||
import org.apache.lucene.store.ByteBuffersIndexOutput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -129,16 +131,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
throws IOException {
|
||||
meta.writeInt(field.number);
|
||||
meta.writeByte(Lucene90DocValuesFormat.NUMERIC);
|
||||
|
||||
writeValues(
|
||||
field,
|
||||
DocValuesProducer producer =
|
||||
new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
return DocValues.singleton(valuesProducer.getNumeric(field));
|
||||
}
|
||||
},
|
||||
false);
|
||||
};
|
||||
if (field.hasDocValuesSkipIndex()) {
|
||||
writeSkipIndex(field, producer);
|
||||
}
|
||||
writeValues(field, producer, false);
|
||||
}
|
||||
|
||||
private static class MinMaxTracker {
|
||||
|
@ -183,6 +186,84 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
private static class SkipAccumulator {
|
||||
int minDocID;
|
||||
int maxDocID;
|
||||
int docCount;
|
||||
long minValue;
|
||||
long maxValue;
|
||||
|
||||
SkipAccumulator(int docID) {
|
||||
minDocID = docID;
|
||||
minValue = Long.MAX_VALUE;
|
||||
maxValue = Long.MIN_VALUE;
|
||||
docCount = 0;
|
||||
}
|
||||
|
||||
void accumulate(long value) {
|
||||
minValue = Math.min(minValue, value);
|
||||
maxValue = Math.max(maxValue, value);
|
||||
}
|
||||
|
||||
void nextDoc(int docID) {
|
||||
maxDocID = docID;
|
||||
++docCount;
|
||||
}
|
||||
|
||||
void writeTo(DataOutput output) throws IOException {
|
||||
output.writeInt(maxDocID);
|
||||
output.writeInt(minDocID);
|
||||
output.writeLong(maxValue);
|
||||
output.writeLong(minValue);
|
||||
output.writeInt(docCount);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
|
||||
throws IOException {
|
||||
assert field.hasDocValuesSkipIndex();
|
||||
// TODO: This disk compression once we introduce levels
|
||||
long start = data.getFilePointer();
|
||||
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
|
||||
long globalMaxValue = Long.MIN_VALUE;
|
||||
long globalMinValue = Long.MAX_VALUE;
|
||||
int globalDocCount = 0;
|
||||
int maxDocId = -1;
|
||||
SkipAccumulator accumulator = null;
|
||||
int counter = 0;
|
||||
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
|
||||
if (counter == 0) {
|
||||
accumulator = new SkipAccumulator(doc);
|
||||
}
|
||||
accumulator.nextDoc(doc);
|
||||
for (int i = 0, end = values.docValueCount(); i < end; ++i) {
|
||||
accumulator.accumulate(values.nextValue());
|
||||
}
|
||||
if (++counter == SKIP_INDEX_INTERVAL_SIZE) {
|
||||
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
|
||||
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
|
||||
globalDocCount += accumulator.docCount;
|
||||
maxDocId = accumulator.maxDocID;
|
||||
accumulator.writeTo(data);
|
||||
counter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (counter > 0) {
|
||||
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
|
||||
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
|
||||
globalDocCount += accumulator.docCount;
|
||||
maxDocId = accumulator.maxDocID;
|
||||
accumulator.writeTo(data);
|
||||
}
|
||||
meta.writeLong(start); // record the start in meta
|
||||
meta.writeLong(data.getFilePointer() - start); // record the length
|
||||
meta.writeLong(globalMaxValue);
|
||||
meta.writeLong(globalMinValue);
|
||||
meta.writeInt(globalDocCount);
|
||||
meta.writeInt(maxDocId);
|
||||
}
|
||||
|
||||
private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
|
||||
throws IOException {
|
||||
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
|
||||
|
@ -489,13 +570,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
|
||||
meta.writeInt(field.number);
|
||||
meta.writeByte(Lucene90DocValuesFormat.SORTED);
|
||||
doAddSortedField(field, valuesProducer);
|
||||
doAddSortedField(field, valuesProducer, false);
|
||||
}
|
||||
|
||||
private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer)
|
||||
throws IOException {
|
||||
writeValues(
|
||||
field,
|
||||
private void doAddSortedField(
|
||||
FieldInfo field, DocValuesProducer valuesProducer, boolean addTypeByte) throws IOException {
|
||||
DocValuesProducer producer =
|
||||
new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
|
@ -534,8 +614,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
};
|
||||
return DocValues.singleton(sortedOrds);
|
||||
}
|
||||
},
|
||||
true);
|
||||
};
|
||||
if (field.hasDocValuesSkipIndex()) {
|
||||
writeSkipIndex(field, producer);
|
||||
}
|
||||
if (addTypeByte) {
|
||||
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
|
||||
}
|
||||
writeValues(field, producer, true);
|
||||
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
|
||||
}
|
||||
|
||||
|
@ -702,6 +788,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
|
||||
private void doAddSortedNumericField(
|
||||
FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException {
|
||||
if (field.hasDocValuesSkipIndex()) {
|
||||
writeSkipIndex(field, valuesProducer);
|
||||
}
|
||||
if (ords) {
|
||||
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
|
||||
}
|
||||
long[] stats = writeValues(field, valuesProducer, ords);
|
||||
int numDocsWithField = Math.toIntExact(stats[0]);
|
||||
long numValues = stats[1];
|
||||
|
@ -753,7 +845,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
meta.writeByte(Lucene90DocValuesFormat.SORTED_SET);
|
||||
|
||||
if (isSingleValued(valuesProducer.getSortedSet(field))) {
|
||||
meta.writeByte((byte) 0); // multiValued (0 = singleValued)
|
||||
|
||||
doAddSortedField(
|
||||
field,
|
||||
new EmptyDocValuesProducer() {
|
||||
|
@ -762,10 +854,10 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
|
|||
return SortedSetSelector.wrap(
|
||||
valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
|
||||
}
|
||||
});
|
||||
},
|
||||
true);
|
||||
return;
|
||||
}
|
||||
meta.writeByte((byte) 1); // multiValued (1 = multiValued)
|
||||
|
||||
doAddSortedNumericField(
|
||||
field,
|
||||
|
|
|
@ -181,4 +181,7 @@ public final class Lucene90DocValuesFormat extends DocValuesFormat {
|
|||
static final int TERMS_DICT_REVERSE_INDEX_SHIFT = 10;
|
||||
static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT;
|
||||
static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1;
|
||||
|
||||
static final int SKIP_INDEX_INTERVAL_SHIFT = 12;
|
||||
static final int SKIP_INDEX_INTERVAL_SIZE = 1 << SKIP_INDEX_INTERVAL_SHIFT;
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.index.BaseTermsEnum;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
|
@ -39,6 +40,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
|
@ -59,6 +61,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
private final Map<String, SortedEntry> sorted;
|
||||
private final Map<String, SortedSetEntry> sortedSets;
|
||||
private final Map<String, SortedNumericEntry> sortedNumerics;
|
||||
private final Map<String, DocValuesSkipperEntry> skippers;
|
||||
private final IndexInput data;
|
||||
private final int maxDoc;
|
||||
private int version = -1;
|
||||
|
@ -80,6 +83,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
sorted = new HashMap<>();
|
||||
sortedSets = new HashMap<>();
|
||||
sortedNumerics = new HashMap<>();
|
||||
skippers = new HashMap<>();
|
||||
merging = false;
|
||||
|
||||
// read in the entries from the metadata file.
|
||||
|
@ -147,6 +151,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
Map<String, SortedEntry> sorted,
|
||||
Map<String, SortedSetEntry> sortedSets,
|
||||
Map<String, SortedNumericEntry> sortedNumerics,
|
||||
Map<String, DocValuesSkipperEntry> skippers,
|
||||
IndexInput data,
|
||||
int maxDoc,
|
||||
int version,
|
||||
|
@ -156,6 +161,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
this.sorted = sorted;
|
||||
this.sortedSets = sortedSets;
|
||||
this.sortedNumerics = sortedNumerics;
|
||||
this.skippers = skippers;
|
||||
this.data = data.clone();
|
||||
this.maxDoc = maxDoc;
|
||||
this.version = version;
|
||||
|
@ -165,7 +171,16 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
@Override
|
||||
public DocValuesProducer getMergeInstance() {
|
||||
return new Lucene90DocValuesProducer(
|
||||
numerics, binaries, sorted, sortedSets, sortedNumerics, data, maxDoc, version, true);
|
||||
numerics,
|
||||
binaries,
|
||||
sorted,
|
||||
sortedSets,
|
||||
sortedNumerics,
|
||||
skippers,
|
||||
data,
|
||||
maxDoc,
|
||||
version,
|
||||
true);
|
||||
}
|
||||
|
||||
private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
|
||||
|
@ -175,6 +190,9 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
|
||||
}
|
||||
byte type = meta.readByte();
|
||||
if (info.hasDocValuesSkipIndex()) {
|
||||
skippers.put(info.name, readDocValueSkipperMeta(meta));
|
||||
}
|
||||
if (type == Lucene90DocValuesFormat.NUMERIC) {
|
||||
numerics.put(info.name, readNumeric(meta));
|
||||
} else if (type == Lucene90DocValuesFormat.BINARY) {
|
||||
|
@ -197,6 +215,17 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
return entry;
|
||||
}
|
||||
|
||||
private DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) throws IOException {
|
||||
long offset = meta.readLong();
|
||||
long length = meta.readLong();
|
||||
long maxValue = meta.readLong();
|
||||
long minValue = meta.readLong();
|
||||
int docCount = meta.readInt();
|
||||
int maxDocID = meta.readInt();
|
||||
|
||||
return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID);
|
||||
}
|
||||
|
||||
private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException {
|
||||
entry.docsWithFieldOffset = meta.readLong();
|
||||
entry.docsWithFieldLength = meta.readLong();
|
||||
|
@ -326,6 +355,9 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
data.close();
|
||||
}
|
||||
|
||||
private record DocValuesSkipperEntry(
|
||||
long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {}
|
||||
|
||||
private static class NumericEntry {
|
||||
long[] table;
|
||||
int blockShift;
|
||||
|
@ -1749,4 +1781,88 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
return mul * values.get(index & mask) + delta;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
|
||||
final DocValuesSkipperEntry entry = skippers.get(field.name);
|
||||
|
||||
final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (input.length() > 0) {
|
||||
input.prefetch(0, 1);
|
||||
}
|
||||
return new DocValuesSkipper() {
|
||||
int minDocID = -1;
|
||||
int maxDocID = -1;
|
||||
long minValue, maxValue;
|
||||
int docCount;
|
||||
|
||||
@Override
|
||||
public void advance(int target) throws IOException {
|
||||
if (target > entry.maxDocId) {
|
||||
minDocID = DocIdSetIterator.NO_MORE_DOCS;
|
||||
maxDocID = DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
while (true) {
|
||||
maxDocID = input.readInt();
|
||||
if (maxDocID >= target) {
|
||||
minDocID = input.readInt();
|
||||
maxValue = input.readLong();
|
||||
minValue = input.readLong();
|
||||
docCount = input.readInt();
|
||||
break;
|
||||
} else {
|
||||
input.skipBytes(24);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minDocID(int level) {
|
||||
return minDocID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDocID(int level) {
|
||||
return maxDocID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue(int level) {
|
||||
return minValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue(int level) {
|
||||
return maxValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount(int level) {
|
||||
return docCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() {
|
||||
return entry.minValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() {
|
||||
return entry.maxValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount() {
|
||||
return entry.docCount;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -163,8 +163,10 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
|||
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
|
||||
boolean isParentField =
|
||||
format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
|
||||
boolean hasDocValuesSkipIndex =
|
||||
format >= FORMAT_DOCVALUE_SKIPPER ? (bits & DOCVALUES_SKIPPER) != 0 : false;
|
||||
|
||||
if ((bits & 0xE0) != 0) {
|
||||
if ((bits & 0xC0) != 0) {
|
||||
throw new CorruptIndexException(
|
||||
"unused bits are set \"" + Integer.toBinaryString(bits) + "\"", input);
|
||||
}
|
||||
|
@ -173,6 +175,13 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
|||
"parent field bit is set but shouldn't \"" + Integer.toBinaryString(bits) + "\"",
|
||||
input);
|
||||
}
|
||||
if (format < FORMAT_DOCVALUE_SKIPPER && (bits & DOCVALUES_SKIPPER) != 0) {
|
||||
throw new CorruptIndexException(
|
||||
"doc values skipper bit is set but shouldn't \""
|
||||
+ Integer.toBinaryString(bits)
|
||||
+ "\"",
|
||||
input);
|
||||
}
|
||||
|
||||
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
|
||||
|
||||
|
@ -208,6 +217,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
|||
storePayloads,
|
||||
indexOptions,
|
||||
docValuesType,
|
||||
hasDocValuesSkipIndex,
|
||||
dvGen,
|
||||
attributes,
|
||||
pointDataDimensionCount,
|
||||
|
@ -394,6 +404,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
|||
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
|
||||
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
|
||||
if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
|
||||
if (fi.hasDocValuesSkipIndex()) bits |= DOCVALUES_SKIPPER;
|
||||
output.writeByte(bits);
|
||||
|
||||
output.writeByte(indexOptionsByte(fi.getIndexOptions()));
|
||||
|
@ -423,7 +434,8 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
|||
static final int FORMAT_START = 0;
|
||||
// this doesn't actually change the file format but uses up one more bit an existing bit pattern
|
||||
static final int FORMAT_PARENT_FIELD = 1;
|
||||
static final int FORMAT_CURRENT = FORMAT_PARENT_FIELD;
|
||||
static final int FORMAT_DOCVALUE_SKIPPER = 2;
|
||||
static final int FORMAT_CURRENT = FORMAT_DOCVALUE_SKIPPER;
|
||||
|
||||
// Field flags
|
||||
static final byte STORE_TERMVECTOR = 0x1;
|
||||
|
@ -431,4 +443,5 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
|
|||
static final byte STORE_PAYLOADS = 0x4;
|
||||
static final byte SOFT_DELETES_FIELD = 0x8;
|
||||
static final byte PARENT_FIELD_FIELD = 0x10;
|
||||
static final byte DOCVALUES_SKIPPER = 0x20;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
|
@ -346,6 +347,12 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
|
|||
return producer == null ? null : producer.getSortedSet(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
|
||||
DocValuesProducer producer = fields.get(field.name);
|
||||
return producer == null ? null : producer.getSkipper(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(formats.values());
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
/**
|
||||
* Wrapper around a {@link TwoPhaseIterator} for a doc-values range query that speeds things up by
|
||||
* taking advantage of a {@link DocValuesSkipper}.
|
||||
*/
|
||||
final class DocValuesRangeIterator extends TwoPhaseIterator {
|
||||
|
||||
enum Match {
|
||||
/** None of the documents in the range match */
|
||||
NO,
|
||||
/** Document values need to be checked to verify matches */
|
||||
MAYBE,
|
||||
/** All documents in the range that have a value match */
|
||||
IF_DOC_HAS_VALUE,
|
||||
/** All docs in the range match */
|
||||
YES;
|
||||
}
|
||||
|
||||
private final Approximation approximation;
|
||||
private final TwoPhaseIterator innerTwoPhase;
|
||||
|
||||
DocValuesRangeIterator(
|
||||
TwoPhaseIterator twoPhase, DocValuesSkipper skipper, long lowerValue, long upperValue) {
|
||||
super(new Approximation(twoPhase.approximation(), skipper, lowerValue, upperValue));
|
||||
this.approximation = (Approximation) approximation();
|
||||
this.innerTwoPhase = twoPhase;
|
||||
}
|
||||
|
||||
static class Approximation extends DocIdSetIterator {
|
||||
|
||||
private final DocIdSetIterator innerApproximation;
|
||||
private final DocValuesSkipper skipper;
|
||||
private final long lowerValue;
|
||||
private final long upperValue;
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
// Track a decision for all doc IDs between the current doc ID and upTo inclusive.
|
||||
Match match = Match.MAYBE;
|
||||
int upTo = -1;
|
||||
|
||||
Approximation(
|
||||
DocIdSetIterator innerApproximation,
|
||||
DocValuesSkipper skipper,
|
||||
long lowerValue,
|
||||
long upperValue) {
|
||||
this.innerApproximation = innerApproximation;
|
||||
this.skipper = skipper;
|
||||
this.lowerValue = lowerValue;
|
||||
this.upperValue = upperValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(docID() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
while (true) {
|
||||
if (target > upTo) {
|
||||
skipper.advance(target);
|
||||
// If target doesn't have a value and is between two blocks, it is possible that advance()
|
||||
// moved to a block that doesn't contain `target`.
|
||||
target = Math.max(target, skipper.minDocID(0));
|
||||
if (target == NO_MORE_DOCS) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
upTo = skipper.maxDocID(0);
|
||||
match = match(0);
|
||||
|
||||
// If we have a YES or NO decision, see if we still have the same decision on a higher
|
||||
// level (= on a wider range of doc IDs)
|
||||
int nextLevel = 1;
|
||||
while (match != Match.MAYBE
|
||||
&& nextLevel < skipper.numLevels()
|
||||
&& match == match(nextLevel)) {
|
||||
upTo = skipper.maxDocID(nextLevel);
|
||||
nextLevel++;
|
||||
}
|
||||
}
|
||||
switch (match) {
|
||||
case YES:
|
||||
return doc = target;
|
||||
case MAYBE:
|
||||
case IF_DOC_HAS_VALUE:
|
||||
if (target > innerApproximation.docID()) {
|
||||
target = innerApproximation.advance(target);
|
||||
}
|
||||
if (target <= upTo) {
|
||||
return doc = target;
|
||||
}
|
||||
// Otherwise we are breaking the invariant that `doc` must always be <= upTo, so let
|
||||
// the loop run one more iteration to advance the skipper.
|
||||
break;
|
||||
case NO:
|
||||
if (upTo == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
target = upTo + 1;
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Unknown enum constant: " + match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return innerApproximation.cost();
|
||||
}
|
||||
|
||||
private Match match(int level) {
|
||||
long minValue = skipper.minValue(level);
|
||||
long maxValue = skipper.maxValue(level);
|
||||
if (minValue > upperValue || maxValue < lowerValue) {
|
||||
return Match.NO;
|
||||
} else if (minValue >= lowerValue && maxValue <= upperValue) {
|
||||
if (skipper.docCount(level) == skipper.maxDocID(level) - skipper.minDocID(level) + 1) {
|
||||
return Match.YES;
|
||||
} else {
|
||||
return Match.IF_DOC_HAS_VALUE;
|
||||
}
|
||||
} else {
|
||||
return Match.MAYBE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean matches() throws IOException {
|
||||
return switch (approximation.match) {
|
||||
case YES -> true;
|
||||
case IF_DOC_HAS_VALUE -> true;
|
||||
case MAYBE -> innerTwoPhase.matches();
|
||||
case NO -> throw new IllegalStateException("Unpositioned approximation");
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return innerTwoPhase.matchCost();
|
||||
}
|
||||
}
|
|
@ -22,6 +22,7 @@ import java.util.Objects;
|
|||
import org.apache.lucene.analysis.Analyzer; // javadocs
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
|
@ -40,6 +41,7 @@ public class FieldType implements IndexableFieldType {
|
|||
private IndexOptions indexOptions = IndexOptions.NONE;
|
||||
private boolean frozen;
|
||||
private DocValuesType docValuesType = DocValuesType.NONE;
|
||||
private boolean docValuesSkipIndex;
|
||||
private int dimensionCount;
|
||||
private int indexDimensionCount;
|
||||
private int dimensionNumBytes;
|
||||
|
@ -59,6 +61,7 @@ public class FieldType implements IndexableFieldType {
|
|||
this.omitNorms = ref.omitNorms();
|
||||
this.indexOptions = ref.indexOptions();
|
||||
this.docValuesType = ref.docValuesType();
|
||||
this.docValuesSkipIndex = ref.hasDocValuesSkipIndex();
|
||||
this.dimensionCount = ref.pointDimensionCount();
|
||||
this.indexDimensionCount = ref.pointIndexDimensionCount();
|
||||
this.dimensionNumBytes = ref.pointNumBytes();
|
||||
|
@ -504,6 +507,22 @@ public class FieldType implements IndexableFieldType {
|
|||
docValuesType = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDocValuesSkipIndex() {
|
||||
return docValuesSkipIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether to enable a skip index for doc values on this field. This is typically useful on
|
||||
* fields that are part of the {@link IndexWriterConfig#setIndexSort index sort}, or that
|
||||
* correlate with fields that are part of the index sort, so that values can be expected to be
|
||||
* clustered in the doc ID space.
|
||||
*/
|
||||
public void setDocValuesSkipIndex(boolean docValuesSkipIndex) {
|
||||
checkIfFrozen();
|
||||
this.docValuesSkipIndex = docValuesSkipIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
|
@ -512,6 +531,7 @@ public class FieldType implements IndexableFieldType {
|
|||
result = prime * result + indexDimensionCount;
|
||||
result = prime * result + dimensionNumBytes;
|
||||
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
|
||||
result = prime * result + Boolean.hashCode(docValuesSkipIndex);
|
||||
result = prime * result + indexOptions.hashCode();
|
||||
result = prime * result + (omitNorms ? 1231 : 1237);
|
||||
result = prime * result + (storeTermVectorOffsets ? 1231 : 1237);
|
||||
|
@ -533,6 +553,7 @@ public class FieldType implements IndexableFieldType {
|
|||
if (indexDimensionCount != other.indexDimensionCount) return false;
|
||||
if (dimensionNumBytes != other.dimensionNumBytes) return false;
|
||||
if (docValuesType != other.docValuesType) return false;
|
||||
if (docValuesSkipIndex != other.docValuesSkipIndex) return false;
|
||||
if (indexOptions != other.indexOptions) return false;
|
||||
if (omitNorms != other.omitNorms) return false;
|
||||
if (storeTermVectorOffsets != other.storeTermVectorOffsets) return false;
|
||||
|
|
|
@ -35,9 +35,27 @@ public class NumericDocValuesField extends Field {
|
|||
/** Type for numeric DocValues. */
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
|
||||
private static final FieldType INDEXED_TYPE;
|
||||
|
||||
static {
|
||||
TYPE.setDocValuesType(DocValuesType.NUMERIC);
|
||||
TYPE.freeze();
|
||||
|
||||
INDEXED_TYPE = new FieldType(TYPE);
|
||||
INDEXED_TYPE.setDocValuesSkipIndex(true);
|
||||
INDEXED_TYPE.freeze();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link NumericDocValuesField} with the specified 64-bit long value that also
|
||||
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
|
||||
*
|
||||
* @param name field name
|
||||
* @param value 64-bit long value
|
||||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public static NumericDocValuesField indexedField(String name, long value) {
|
||||
return new NumericDocValuesField(name, value, INDEXED_TYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -60,7 +78,11 @@ public class NumericDocValuesField extends Field {
|
|||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public NumericDocValuesField(String name, Long value) {
|
||||
super(name, TYPE);
|
||||
this(name, value, TYPE);
|
||||
}
|
||||
|
||||
private NumericDocValuesField(String name, Long value, FieldType fieldType) {
|
||||
super(name, fieldType);
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,9 +41,27 @@ public class SortedDocValuesField extends Field {
|
|||
/** Type for sorted bytes DocValues */
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
|
||||
private static final FieldType INDEXED_TYPE;
|
||||
|
||||
static {
|
||||
TYPE.setDocValuesType(DocValuesType.SORTED);
|
||||
TYPE.freeze();
|
||||
|
||||
INDEXED_TYPE = new FieldType(TYPE);
|
||||
INDEXED_TYPE.setDocValuesSkipIndex(true);
|
||||
INDEXED_TYPE.freeze();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link SortedDocValuesField} with the specified 64-bit long value that also
|
||||
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
|
||||
*
|
||||
* @param name field name
|
||||
* @param bytes binary content
|
||||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public static SortedDocValuesField indexedField(String name, BytesRef bytes) {
|
||||
return new SortedDocValuesField(name, bytes, INDEXED_TYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -54,7 +72,11 @@ public class SortedDocValuesField extends Field {
|
|||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public SortedDocValuesField(String name, BytesRef bytes) {
|
||||
super(name, TYPE);
|
||||
this(name, bytes, TYPE);
|
||||
}
|
||||
|
||||
private SortedDocValuesField(String name, BytesRef bytes, FieldType fieldType) {
|
||||
super(name, fieldType);
|
||||
fieldsData = bytes;
|
||||
}
|
||||
|
||||
|
|
|
@ -43,9 +43,27 @@ public class SortedNumericDocValuesField extends Field {
|
|||
/** Type for sorted numeric DocValues. */
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
|
||||
private static final FieldType INDEXED_TYPE;
|
||||
|
||||
static {
|
||||
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
|
||||
TYPE.freeze();
|
||||
|
||||
INDEXED_TYPE = new FieldType(TYPE);
|
||||
INDEXED_TYPE.setDocValuesSkipIndex(true);
|
||||
INDEXED_TYPE.freeze();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link SortedNumericDocValuesField} with the specified 64-bit long value that
|
||||
* also creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
|
||||
*
|
||||
* @param name field name
|
||||
* @param value 64-bit long value
|
||||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public static SortedNumericDocValuesField indexedField(String name, long value) {
|
||||
return new SortedNumericDocValuesField(name, value, INDEXED_TYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -56,8 +74,12 @@ public class SortedNumericDocValuesField extends Field {
|
|||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public SortedNumericDocValuesField(String name, long value) {
|
||||
super(name, TYPE);
|
||||
fieldsData = Long.valueOf(value);
|
||||
this(name, Long.valueOf(value), TYPE);
|
||||
}
|
||||
|
||||
private SortedNumericDocValuesField(String name, Long value, FieldType fieldType) {
|
||||
super(name, fieldType);
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.document;
|
|||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
|
@ -109,9 +110,17 @@ final class SortedNumericDocValuesRangeQuery extends Query {
|
|||
if (context.reader().getFieldInfos().fieldInfo(field) == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
|
||||
if (skipper != null) {
|
||||
if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field);
|
||||
final NumericDocValues singleton = DocValues.unwrapSingleton(values);
|
||||
final TwoPhaseIterator iterator;
|
||||
TwoPhaseIterator iterator;
|
||||
if (singleton != null) {
|
||||
iterator =
|
||||
new TwoPhaseIterator(singleton) {
|
||||
|
@ -149,6 +158,9 @@ final class SortedNumericDocValuesRangeQuery extends Query {
|
|||
}
|
||||
};
|
||||
}
|
||||
if (skipper != null) {
|
||||
iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue);
|
||||
}
|
||||
final var scorer = new ConstantScoreScorer(score(), scoreMode, iterator);
|
||||
return new DefaultScorerSupplier(scorer);
|
||||
}
|
||||
|
|
|
@ -42,9 +42,27 @@ public class SortedSetDocValuesField extends Field {
|
|||
/** Type for sorted bytes DocValues */
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
|
||||
private static final FieldType INDEXED_TYPE;
|
||||
|
||||
static {
|
||||
TYPE.setDocValuesType(DocValuesType.SORTED_SET);
|
||||
TYPE.freeze();
|
||||
|
||||
INDEXED_TYPE = new FieldType(TYPE);
|
||||
INDEXED_TYPE.setDocValuesSkipIndex(true);
|
||||
INDEXED_TYPE.freeze();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link SortedSetDocValuesField} with the specified 64-bit long value that also
|
||||
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
|
||||
*
|
||||
* @param name field name
|
||||
* @param bytes binary content
|
||||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public static SortedSetDocValuesField indexedField(String name, BytesRef bytes) {
|
||||
return new SortedSetDocValuesField(name, bytes, INDEXED_TYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -55,7 +73,11 @@ public class SortedSetDocValuesField extends Field {
|
|||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public SortedSetDocValuesField(String name, BytesRef bytes) {
|
||||
super(name, TYPE);
|
||||
this(name, bytes, TYPE);
|
||||
}
|
||||
|
||||
private SortedSetDocValuesField(String name, BytesRef bytes, FieldType fieldType) {
|
||||
super(name, fieldType);
|
||||
fieldsData = bytes;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.document;
|
|||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
|
@ -113,6 +114,7 @@ final class SortedSetDocValuesRangeQuery extends Query {
|
|||
if (context.reader().getFieldInfos().fieldInfo(field) == null) {
|
||||
return null;
|
||||
}
|
||||
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(field);
|
||||
SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
|
||||
|
||||
// implement ScorerSupplier, since we do some expensive stuff to make a scorer
|
||||
|
@ -149,12 +151,15 @@ final class SortedSetDocValuesRangeQuery extends Query {
|
|||
}
|
||||
|
||||
// no terms matched in this segment
|
||||
if (minOrd > maxOrd) {
|
||||
// no terms matched in this segment
|
||||
if (minOrd > maxOrd
|
||||
|| (skipper != null
|
||||
&& (minOrd > skipper.maxValue() || maxOrd < skipper.minValue()))) {
|
||||
return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.empty());
|
||||
}
|
||||
|
||||
final SortedDocValues singleton = DocValues.unwrapSingleton(values);
|
||||
final TwoPhaseIterator iterator;
|
||||
TwoPhaseIterator iterator;
|
||||
if (singleton != null) {
|
||||
iterator =
|
||||
new TwoPhaseIterator(singleton) {
|
||||
|
@ -192,6 +197,9 @@ final class SortedSetDocValuesRangeQuery extends Query {
|
|||
}
|
||||
};
|
||||
}
|
||||
if (skipper != null) {
|
||||
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd);
|
||||
}
|
||||
return new ConstantScoreScorer(score(), scoreMode, iterator);
|
||||
}
|
||||
|
||||
|
|
|
@ -365,6 +365,9 @@ public final class CheckIndex implements Closeable {
|
|||
/** Total number of sortedset fields */
|
||||
public long totalSortedSetFields;
|
||||
|
||||
/** Total number of skipping index tested. */
|
||||
public long totalSkippingIndex;
|
||||
|
||||
/** Exception thrown during doc values test (null on success) */
|
||||
public Throwable error;
|
||||
}
|
||||
|
@ -3228,13 +3231,14 @@ public final class CheckIndex implements Closeable {
|
|||
infoStream,
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"OK [%d docvalues fields; %d BINARY; %d NUMERIC; %d SORTED; %d SORTED_NUMERIC; %d SORTED_SET] [took %.3f sec]",
|
||||
"OK [%d docvalues fields; %d BINARY; %d NUMERIC; %d SORTED; %d SORTED_NUMERIC; %d SORTED_SET; %d SKIPPING INDEX] [took %.3f sec]",
|
||||
status.totalValueFields,
|
||||
status.totalBinaryFields,
|
||||
status.totalNumericFields,
|
||||
status.totalSortedFields,
|
||||
status.totalSortedNumericFields,
|
||||
status.totalSortedSetFields,
|
||||
status.totalSkippingIndex,
|
||||
nsToSec(System.nanoTime() - startNS)));
|
||||
} catch (Throwable e) {
|
||||
if (failFast) {
|
||||
|
@ -3254,6 +3258,94 @@ public final class CheckIndex implements Closeable {
|
|||
DocValuesIterator get(FieldInfo fi) throws IOException;
|
||||
}
|
||||
|
||||
private static void checkDocValueSkipper(FieldInfo fi, DocValuesSkipper skipper)
|
||||
throws IOException {
|
||||
String fieldName = fi.name;
|
||||
if (skipper.maxDocID(0) != -1) {
|
||||
throw new CheckIndexException(
|
||||
"binary dv iterator for field: "
|
||||
+ fieldName
|
||||
+ " should start at docID=-1, but got "
|
||||
+ skipper.maxDocID(0));
|
||||
}
|
||||
if (skipper.docCount() > 0 && skipper.minValue() > skipper.maxValue()) {
|
||||
throw new CheckIndexException(
|
||||
"skipper dv iterator for field: "
|
||||
+ fieldName
|
||||
+ " reports wrong global value range, got "
|
||||
+ skipper.minValue()
|
||||
+ " > "
|
||||
+ skipper.maxValue());
|
||||
}
|
||||
int docCount = 0;
|
||||
int doc;
|
||||
while (true) {
|
||||
doc = skipper.maxDocID(0) + 1;
|
||||
skipper.advance(doc);
|
||||
if (skipper.maxDocID(0) == NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
int levels = skipper.numLevels();
|
||||
for (int level = 0; level < levels; level++) {
|
||||
if (skipper.minDocID(level) < doc) {
|
||||
throw new CheckIndexException(
|
||||
"skipper dv iterator for field: "
|
||||
+ fieldName
|
||||
+ " reports wrong minDocID, got "
|
||||
+ skipper.minDocID(level)
|
||||
+ " < "
|
||||
+ doc);
|
||||
}
|
||||
if (skipper.minDocID(level) > skipper.maxDocID(level)) {
|
||||
throw new CheckIndexException(
|
||||
"skipper dv iterator for field: "
|
||||
+ fieldName
|
||||
+ " reports wrong doc range, got "
|
||||
+ skipper.minDocID(level)
|
||||
+ " > "
|
||||
+ skipper.maxDocID(level));
|
||||
}
|
||||
if (skipper.minValue() > skipper.minValue(level)) {
|
||||
throw new CheckIndexException(
|
||||
"skipper dv iterator for field: "
|
||||
+ fieldName
|
||||
+ " : global minValue "
|
||||
+ skipper.minValue()
|
||||
+ " , got "
|
||||
+ skipper.minValue(level));
|
||||
}
|
||||
if (skipper.maxValue() < skipper.maxValue(level)) {
|
||||
throw new CheckIndexException(
|
||||
"skipper dv iterator for field: "
|
||||
+ fieldName
|
||||
+ " : global maxValue "
|
||||
+ skipper.maxValue()
|
||||
+ " , got "
|
||||
+ skipper.maxValue(level));
|
||||
}
|
||||
if (skipper.minValue(level) > skipper.maxValue(level)) {
|
||||
throw new CheckIndexException(
|
||||
"skipper dv iterator for field: "
|
||||
+ fieldName
|
||||
+ " reports wrong value range, got "
|
||||
+ skipper.minValue(level)
|
||||
+ " > "
|
||||
+ skipper.maxValue(level));
|
||||
}
|
||||
}
|
||||
docCount += skipper.docCount(0);
|
||||
}
|
||||
if (skipper.docCount() != docCount) {
|
||||
throw new CheckIndexException(
|
||||
"skipper dv iterator for field: "
|
||||
+ fieldName
|
||||
+ " inconsistent docCount, got "
|
||||
+ skipper.docCount()
|
||||
+ " != "
|
||||
+ docCount);
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkDVIterator(FieldInfo fi, DocValuesIteratorSupplier producer)
|
||||
throws IOException {
|
||||
String field = fi.name;
|
||||
|
@ -3627,6 +3719,10 @@ public final class CheckIndex implements Closeable {
|
|||
|
||||
private static void checkDocValues(
|
||||
FieldInfo fi, DocValuesProducer dvReader, DocValuesStatus status) throws Exception {
|
||||
if (fi.hasDocValuesSkipIndex()) {
|
||||
status.totalSkippingIndex++;
|
||||
checkDocValueSkipper(fi, dvReader.getSkipper(fi));
|
||||
}
|
||||
switch (fi.getDocValuesType()) {
|
||||
case SORTED:
|
||||
status.totalSortedFields++;
|
||||
|
|
|
@ -196,6 +196,16 @@ public abstract class CodecReader extends LeafReader {
|
|||
return getDocValuesReader().getSortedSet(fi);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
ensureOpen();
|
||||
FieldInfo fi = getFieldInfos().fieldInfo(field);
|
||||
if (fi == null || fi.hasDocValuesSkipIndex() == false) {
|
||||
return null;
|
||||
}
|
||||
return getDocValuesReader().getSkipper(fi);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final NumericDocValues getNormValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
|
|
|
@ -108,4 +108,9 @@ abstract class DocValuesLeafReader extends LeafReader {
|
|||
public final CacheHelper getReaderCacheHelper() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* Skipper for {@link DocValues}.
|
||||
*
|
||||
* <p>A skipper has a position that can only be advanced via {@link #advance(int)}. The next advance
|
||||
* position must be greater than {@link #maxDocID(int)} at level 0. A skipper's position, along with
|
||||
* a {@code level}, determines the interval at which the skipper is currently situated.
|
||||
*/
|
||||
public abstract class DocValuesSkipper {
|
||||
|
||||
/**
|
||||
* Advance this skipper so that all levels contain the next document on or after {@code target}.
|
||||
*
|
||||
* <p><b>NOTE</b>: The behavior is undefined if {@code target} is less than or equal to {@code
|
||||
* maxDocID(0)}.
|
||||
*
|
||||
* <p><b>NOTE</b>: {@code minDocID(0)} may return a doc ID that is greater than {@code target} if
|
||||
* the target document doesn't have a value.
|
||||
*/
|
||||
public abstract void advance(int target) throws IOException;
|
||||
|
||||
/** Return the number of levels. This number may change when moving to a different interval. */
|
||||
public abstract int numLevels();
|
||||
|
||||
/**
|
||||
* Return the minimum doc ID of the interval on the given level, inclusive. This returns {@code
|
||||
* -1} if {@link #advance(int)} has not been called yet and {@link DocIdSetIterator#NO_MORE_DOCS}
|
||||
* if the iterator is exhausted. This method is non-increasing when {@code level} increases. Said
|
||||
* otherwise {@code minDocID(level+1) <= minDocId(level)}.
|
||||
*/
|
||||
public abstract int minDocID(int level);
|
||||
|
||||
/**
|
||||
* Return the maximum doc ID of the interval on the given level, inclusive. This returns {@code
|
||||
* -1} if {@link #advance(int)} has not been called yet and {@link DocIdSetIterator#NO_MORE_DOCS}
|
||||
* if the iterator is exhausted. This method is non-decreasing when {@code level} decreases. Said
|
||||
* otherwise {@code maxDocID(level+1) >= maxDocId(level)}.
|
||||
*/
|
||||
public abstract int maxDocID(int level);
|
||||
|
||||
/**
|
||||
* Return the minimum value of the interval at the given level, inclusive.
|
||||
*
|
||||
* <p><b>NOTE</b>: It is only guaranteed that values in this interval are greater than or equal
|
||||
* the returned value. There is no guarantee that one document actually has this value.
|
||||
*/
|
||||
public abstract long minValue(int level);
|
||||
|
||||
/**
|
||||
* Return the maximum value of the interval at the given level, inclusive.
|
||||
*
|
||||
* <p><b>NOTE</b>: It is only guaranteed that values in this interval are less than or equal the
|
||||
* returned value. There is no guarantee that one document actually has this value.
|
||||
*/
|
||||
public abstract long maxValue(int level);
|
||||
|
||||
/**
|
||||
* Return the number of documents that have a value in the interval associated with the given
|
||||
* level.
|
||||
*/
|
||||
public abstract int docCount(int level);
|
||||
|
||||
/**
|
||||
* Return the global minimum value.
|
||||
*
|
||||
* <p><b>NOTE</b>: It is only guaranteed that values are greater than or equal the returned value.
|
||||
* There is no guarantee that one document actually has this value.
|
||||
*/
|
||||
public abstract long minValue();
|
||||
|
||||
/**
|
||||
* Return the global maximum value.
|
||||
*
|
||||
* <p><b>NOTE</b>: It is only guaranteed that values are less than or equal the returned value.
|
||||
* There is no guarantee that one document actually has this value.
|
||||
*/
|
||||
public abstract long maxValue();
|
||||
|
||||
/** Return the global number of documents with a value for the field. */
|
||||
public abstract int docCount();
|
||||
}
|
|
@ -22,31 +22,37 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
public enum DocValuesType {
|
||||
/** No doc values for this field. */
|
||||
NONE,
|
||||
NONE(false),
|
||||
/** A per-document Number */
|
||||
NUMERIC,
|
||||
NUMERIC(true),
|
||||
/**
|
||||
* A per-document byte[]. Values may be larger than 32766 bytes, but different codecs may enforce
|
||||
* their own limits.
|
||||
*/
|
||||
BINARY,
|
||||
BINARY(false),
|
||||
/**
|
||||
* A pre-sorted byte[]. Fields with this type only store distinct byte values and store an
|
||||
* additional offset pointer per document to dereference the shared byte[]. The stored byte[] is
|
||||
* presorted and allows access via document id, ordinal and by-value. Values must be {@code <=
|
||||
* 32766} bytes.
|
||||
*/
|
||||
SORTED,
|
||||
SORTED(true),
|
||||
/**
|
||||
* A pre-sorted Number[]. Fields with this type store numeric values in sorted order according to
|
||||
* {@link Long#compare(long, long)}.
|
||||
*/
|
||||
SORTED_NUMERIC,
|
||||
SORTED_NUMERIC(true),
|
||||
/**
|
||||
* A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values and store
|
||||
* additional offset pointers per document to dereference the shared byte[]s. The stored byte[] is
|
||||
* presorted and allows access via document id, ordinal and by-value. Values must be {@code <=
|
||||
* 32766} bytes.
|
||||
*/
|
||||
SORTED_SET,
|
||||
SORTED_SET(true);
|
||||
|
||||
final boolean supportsSkipIndex; // pkg-private for use in FieldInfo
|
||||
|
||||
DocValuesType(boolean supportsSkipIndex) {
|
||||
this.supportsSkipIndex = supportsSkipIndex;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,6 +51,11 @@ public abstract class EmptyDocValuesProducer extends DocValuesProducer {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -28,13 +28,16 @@ import java.util.Objects;
|
|||
* threads accessing this object.
|
||||
*/
|
||||
public final class FieldInfo {
|
||||
|
||||
/** Field's name */
|
||||
public final String name;
|
||||
|
||||
/** Internal field number */
|
||||
public final int number;
|
||||
|
||||
private DocValuesType docValuesType;
|
||||
private DocValuesType docValuesType = DocValuesType.NONE;
|
||||
|
||||
private final boolean docValuesSkipIndex;
|
||||
|
||||
// True if any document indexed term vectors
|
||||
private boolean storeTermVector;
|
||||
|
@ -80,6 +83,7 @@ public final class FieldInfo {
|
|||
boolean storePayloads,
|
||||
IndexOptions indexOptions,
|
||||
DocValuesType docValues,
|
||||
boolean hasDocValuesSkipIndex,
|
||||
long dvGen,
|
||||
Map<String, String> attributes,
|
||||
int pointDimensionCount,
|
||||
|
@ -95,6 +99,7 @@ public final class FieldInfo {
|
|||
this.docValuesType =
|
||||
Objects.requireNonNull(
|
||||
docValues, "DocValuesType must not be null (field: \"" + name + "\")");
|
||||
this.docValuesSkipIndex = hasDocValuesSkipIndex;
|
||||
this.indexOptions =
|
||||
Objects.requireNonNull(
|
||||
indexOptions, "IndexOptions must not be null (field: \"" + name + "\")");
|
||||
|
@ -152,6 +157,13 @@ public final class FieldInfo {
|
|||
if (docValuesType == null) {
|
||||
throw new IllegalArgumentException("DocValuesType must not be null (field: '" + name + "')");
|
||||
}
|
||||
if (docValuesType.supportsSkipIndex == false && docValuesSkipIndex) {
|
||||
throw new IllegalArgumentException(
|
||||
"field '"
|
||||
+ name
|
||||
+ "' cannot have docValuesSkipIndex set to true with doc values type "
|
||||
+ docValuesType);
|
||||
}
|
||||
if (dvGen != -1 && docValuesType == DocValuesType.NONE) {
|
||||
throw new IllegalArgumentException(
|
||||
"field '"
|
||||
|
@ -235,6 +247,7 @@ public final class FieldInfo {
|
|||
verifySameStoreTermVectors(fieldName, this.storeTermVector, o.storeTermVector);
|
||||
}
|
||||
verifySameDocValuesType(fieldName, this.docValuesType, o.docValuesType);
|
||||
verifySameDocValuesSkipIndex(fieldName, this.docValuesSkipIndex, o.docValuesSkipIndex);
|
||||
verifySamePointsOptions(
|
||||
fieldName,
|
||||
this.pointDimensionCount,
|
||||
|
@ -289,6 +302,24 @@ public final class FieldInfo {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that the provided docValues type are the same
|
||||
*
|
||||
* @throws IllegalArgumentException if they are not the same
|
||||
*/
|
||||
static void verifySameDocValuesSkipIndex(
|
||||
String fieldName, boolean hasDocValuesSkipIndex1, boolean hasDocValuesSkipIndex2) {
|
||||
if (hasDocValuesSkipIndex1 != hasDocValuesSkipIndex2) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot change field \""
|
||||
+ fieldName
|
||||
+ "\" from docValuesSkipIndex="
|
||||
+ hasDocValuesSkipIndex1
|
||||
+ " to inconsistent docValuesSkipIndex="
|
||||
+ hasDocValuesSkipIndex2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that the provided store term vectors options are the same
|
||||
*
|
||||
|
@ -557,6 +588,11 @@ public final class FieldInfo {
|
|||
return docValuesType;
|
||||
}
|
||||
|
||||
/** Returns true if, and only if, this field has a skip index. */
|
||||
public boolean hasDocValuesSkipIndex() {
|
||||
return docValuesSkipIndex;
|
||||
}
|
||||
|
||||
/** Sets the docValues generation of this field. */
|
||||
void setDocValuesGen(long dvGen) {
|
||||
this.dvGen = dvGen;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import static org.apache.lucene.index.FieldInfo.verifySameDocValuesSkipIndex;
|
||||
import static org.apache.lucene.index.FieldInfo.verifySameDocValuesType;
|
||||
import static org.apache.lucene.index.FieldInfo.verifySameIndexOptions;
|
||||
import static org.apache.lucene.index.FieldInfo.verifySameOmitNorms;
|
||||
|
@ -364,6 +365,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
IndexOptions indexOptions,
|
||||
IndexOptionsProperties indexOptionsProperties,
|
||||
DocValuesType docValuesType,
|
||||
boolean docValuesSkipIndex,
|
||||
FieldDimensions fieldDimensions,
|
||||
FieldVectorProperties fieldVectorProperties) {}
|
||||
|
||||
|
@ -442,6 +444,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
? new IndexOptionsProperties(fi.hasVectors(), fi.omitsNorms())
|
||||
: null,
|
||||
fi.getDocValuesType(),
|
||||
fi.hasDocValuesSkipIndex(),
|
||||
new FieldDimensions(
|
||||
fi.getPointDimensionCount(),
|
||||
fi.getPointIndexDimensionCount(),
|
||||
|
@ -521,6 +524,9 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
|
||||
DocValuesType currentDVType = fieldProperties.docValuesType;
|
||||
verifySameDocValuesType(fieldName, currentDVType, fi.getDocValuesType());
|
||||
boolean currentDocValuesSkipIndex = fieldProperties.docValuesSkipIndex;
|
||||
verifySameDocValuesSkipIndex(
|
||||
fieldName, currentDocValuesSkipIndex, fi.hasDocValuesSkipIndex());
|
||||
|
||||
FieldDimensions dims = fieldProperties.fieldDimensions;
|
||||
verifySamePointsOptions(
|
||||
|
@ -576,6 +582,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
dvType,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
@ -602,6 +609,15 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
+ fieldDvType
|
||||
+ "].");
|
||||
}
|
||||
boolean hasDocValuesSkipIndex = fieldProperties.docValuesSkipIndex;
|
||||
if (hasDocValuesSkipIndex) {
|
||||
throw new IllegalArgumentException(
|
||||
"Can't update ["
|
||||
+ dvType
|
||||
+ "] doc values; the field ["
|
||||
+ fieldName
|
||||
+ "] must be doc values only field, bit it has doc values skip index");
|
||||
}
|
||||
FieldDimensions fdimensions = fieldProperties.fieldDimensions;
|
||||
if (fdimensions != null && fdimensions.dimensionCount != 0) {
|
||||
throw new IllegalArgumentException(
|
||||
|
@ -660,6 +676,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
dvType,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
@ -780,6 +797,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
|
|||
fi.hasPayloads(),
|
||||
fi.getIndexOptions(),
|
||||
fi.getDocValuesType(),
|
||||
fi.hasDocValuesSkipIndex(),
|
||||
dvGen,
|
||||
// original attributes is UnmodifiableMap
|
||||
new HashMap<>(fi.attributes()),
|
||||
|
|
|
@ -441,6 +441,12 @@ public abstract class FilterLeafReader extends LeafReader {
|
|||
return in.getSortedSetDocValues(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
ensureOpen();
|
||||
return in.getDocValuesSkipper(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
|
|
|
@ -86,6 +86,9 @@ public interface IndexableFieldType {
|
|||
/** DocValues {@link DocValuesType}: how the field's value will be indexed into docValues. */
|
||||
DocValuesType docValuesType();
|
||||
|
||||
/** Whether a skip index for doc values should be created on this field. */
|
||||
boolean hasDocValuesSkipIndex();
|
||||
|
||||
/**
|
||||
* If this is positive (representing the number of point dimensions), the field is indexed as a
|
||||
* point.
|
||||
|
|
|
@ -680,6 +680,7 @@ final class IndexingChain implements Accountable {
|
|||
false,
|
||||
s.indexOptions,
|
||||
s.docValuesType,
|
||||
s.hasDocValuesSkipIndex,
|
||||
-1,
|
||||
s.attributes,
|
||||
s.pointDimensionCount,
|
||||
|
@ -831,7 +832,12 @@ final class IndexingChain implements Accountable {
|
|||
verifyUnIndexedFieldType(fieldName, fieldType);
|
||||
}
|
||||
if (fieldType.docValuesType() != DocValuesType.NONE) {
|
||||
schema.setDocValues(fieldType.docValuesType());
|
||||
schema.setDocValues(fieldType.docValuesType(), fieldType.hasDocValuesSkipIndex());
|
||||
} else if (fieldType.hasDocValuesSkipIndex()) {
|
||||
throw new IllegalArgumentException(
|
||||
"field '"
|
||||
+ schema.name
|
||||
+ "' cannot have docValuesSkipIndex set to true without doc values");
|
||||
}
|
||||
if (fieldType.pointDimensionCount() != 0) {
|
||||
schema.setPoints(
|
||||
|
@ -1432,6 +1438,7 @@ final class IndexingChain implements Accountable {
|
|||
private boolean storeTermVector = false;
|
||||
private IndexOptions indexOptions = IndexOptions.NONE;
|
||||
private DocValuesType docValuesType = DocValuesType.NONE;
|
||||
private boolean hasDocValuesSkipIndex = false;
|
||||
private int pointDimensionCount = 0;
|
||||
private int pointIndexDimensionCount = 0;
|
||||
private int pointNumBytes = 0;
|
||||
|
@ -1497,11 +1504,13 @@ final class IndexingChain implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
void setDocValues(DocValuesType newDocValuesType) {
|
||||
void setDocValues(DocValuesType newDocValuesType, boolean newHasDocValuesSkipIndex) {
|
||||
if (docValuesType == DocValuesType.NONE) {
|
||||
this.docValuesType = newDocValuesType;
|
||||
this.hasDocValuesSkipIndex = newHasDocValuesSkipIndex;
|
||||
} else {
|
||||
assertSame("doc values type", docValuesType, newDocValuesType);
|
||||
assertSame("doc values skip index", hasDocValuesSkipIndex, newHasDocValuesSkipIndex);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1549,6 +1558,7 @@ final class IndexingChain implements Accountable {
|
|||
assertSame("omit norms", fi.omitsNorms(), omitNorms);
|
||||
assertSame("store term vector", fi.hasVectors(), storeTermVector);
|
||||
assertSame("doc values type", fi.getDocValuesType(), docValuesType);
|
||||
assertSame("doc values skip index", fi.hasDocValuesSkipIndex(), hasDocValuesSkipIndex);
|
||||
assertSame(
|
||||
"vector similarity function", fi.getVectorSimilarityFunction(), vectorSimilarityFunction);
|
||||
assertSame("vector encoding", fi.getVectorEncoding(), vectorEncoding);
|
||||
|
|
|
@ -202,6 +202,13 @@ public abstract non-sealed class LeafReader extends IndexReader {
|
|||
*/
|
||||
public abstract NumericDocValues getNormValues(String field) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns a {@link DocValuesSkipper} allowing skipping ranges of doc IDs that are not of
|
||||
* interest, or {@code null} if a skip index was not indexed. The returned instance should be
|
||||
* confined to the thread that created it.
|
||||
*/
|
||||
public abstract DocValuesSkipper getDocValuesSkipper(String field) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns {@link FloatVectorValues} for this field, or null if no {@link FloatVectorValues} were
|
||||
* indexed. The returned instance should only be used by a single thread.
|
||||
|
|
|
@ -399,6 +399,13 @@ public class ParallelLeafReader extends LeafReader {
|
|||
return reader == null ? null : reader.getSortedSetDocValues(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
ensureOpen();
|
||||
LeafReader reader = fieldToReader.get(field);
|
||||
return reader == null ? null : reader.getDocValuesSkipper(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) throws IOException {
|
||||
ensureOpen();
|
||||
|
|
|
@ -713,6 +713,7 @@ final class ReadersAndUpdates {
|
|||
fi.hasPayloads(),
|
||||
fi.getIndexOptions(),
|
||||
fi.getDocValuesType(),
|
||||
fi.hasDocValuesSkipIndex(),
|
||||
fi.getDocValuesGen(),
|
||||
new HashMap<>(fi.attributes()),
|
||||
fi.getPointDimensionCount(),
|
||||
|
|
|
@ -124,6 +124,13 @@ class SegmentDocValuesProducer extends DocValuesProducer {
|
|||
return dvProducer.getSortedSet(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
|
||||
DocValuesProducer dvProducer = dvProducersByField.get(field.name);
|
||||
assert dvProducer != null;
|
||||
return dvProducer.getSkipper(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
for (DocValuesProducer producer : dvProducers) {
|
||||
|
|
|
@ -240,6 +240,11 @@ public final class SlowCodecReaderWrapper {
|
|||
return reader.getSortedSetDocValues(field.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
|
||||
return reader.getDocValuesSkipper(field.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
// We already checkIntegrity the entire reader up front
|
||||
|
|
|
@ -494,6 +494,11 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
|
|||
}
|
||||
return new MultiSortedSetDocValues(values, docStarts, map, totalCost);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
|
||||
throw new UnsupportedOperationException("This method is for searching not for merging");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -637,6 +637,12 @@ public final class SortingCodecReader extends FilterCodecReader {
|
|||
public void close() throws IOException {
|
||||
delegate.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
|
||||
// We can hardly return information about min/max values if doc IDs have been reordered.
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,273 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
public class TestDocValuesRangeIterator extends LuceneTestCase {
|
||||
|
||||
public void testSingleLevel() throws IOException {
|
||||
doTestBasics(false);
|
||||
}
|
||||
|
||||
public void testMultipleLevels() throws IOException {
|
||||
doTestBasics(true);
|
||||
}
|
||||
|
||||
private void doTestBasics(boolean doLevels) throws IOException {
|
||||
long queryMin = 10;
|
||||
long queryMax = 20;
|
||||
|
||||
// Fake numeric doc values so that:
|
||||
// docs 0-256 all match
|
||||
// docs in 256-512 are all greater than queryMax
|
||||
// docs in 512-768 are all less than queryMin
|
||||
// docs in 768-1024 have some docs that match the range, others not
|
||||
// docs in 1024-2048 follow a similar pattern as docs in 0-1024 except that not all docs have a
|
||||
// value
|
||||
NumericDocValues values =
|
||||
new NumericDocValues() {
|
||||
|
||||
int doc = -1;
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target < 1024) {
|
||||
// dense up to 1024
|
||||
return doc = target;
|
||||
} else if (doc < 2047) {
|
||||
// 50% docs have a value up to 2048
|
||||
return doc = target + (target & 1);
|
||||
} else {
|
||||
return doc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longValue() throws IOException {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return (queryMin + queryMax) >> 1;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 512) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return switch ((d / 2) % 3) {
|
||||
case 0 -> queryMin - 1;
|
||||
case 1 -> queryMax + 1;
|
||||
case 2 -> (queryMin + queryMax) >> 1;
|
||||
default -> throw new AssertionError();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 42;
|
||||
}
|
||||
};
|
||||
|
||||
AtomicBoolean twoPhaseCalled = new AtomicBoolean();
|
||||
TwoPhaseIterator twoPhase =
|
||||
new TwoPhaseIterator(values) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
twoPhaseCalled.set(true);
|
||||
long v = values.longValue();
|
||||
return v >= queryMin && v <= queryMax;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 2f; // 2 comparisons
|
||||
}
|
||||
};
|
||||
|
||||
DocValuesSkipper skipper =
|
||||
new DocValuesSkipper() {
|
||||
|
||||
int doc = -1;
|
||||
|
||||
@Override
|
||||
public void advance(int target) throws IOException {
|
||||
doc = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return doLevels ? 3 : 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minDocID(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
// the level is the log2 of the interval
|
||||
if (doc < 0) {
|
||||
return -1;
|
||||
} else if (doc >= 2048) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
int mask = (1 << rangeLog) - 1;
|
||||
// prior multiple of 2^level
|
||||
return doc & ~mask;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDocID(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
int minDocID = minDocID(level);
|
||||
return switch (minDocID) {
|
||||
case -1 -> -1;
|
||||
case DocIdSetIterator.NO_MORE_DOCS -> DocIdSetIterator.NO_MORE_DOCS;
|
||||
default -> minDocID + (1 << rangeLog) - 1;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue(int level) {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return queryMin;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 768) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return queryMin - 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue(int level) {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return queryMax;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 768) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return queryMax + 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
if (doc < 1024) {
|
||||
return 1 << rangeLog;
|
||||
} else {
|
||||
// half docs have a value
|
||||
return 1 << rangeLog >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() {
|
||||
return Long.MIN_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() {
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount() {
|
||||
return 1024 + 1024 / 2;
|
||||
}
|
||||
};
|
||||
|
||||
DocValuesRangeIterator rangeIterator =
|
||||
new DocValuesRangeIterator(twoPhase, skipper, queryMin, queryMax);
|
||||
DocValuesRangeIterator.Approximation rangeApproximation =
|
||||
(DocValuesRangeIterator.Approximation) rangeIterator.approximation();
|
||||
|
||||
assertEquals(100, rangeApproximation.advance(100));
|
||||
assertEquals(DocValuesRangeIterator.Match.YES, rangeApproximation.match);
|
||||
assertEquals(255, rangeApproximation.upTo);
|
||||
assertTrue(rangeIterator.matches());
|
||||
assertTrue(values.docID() < rangeApproximation.docID()); // we did not advance doc values
|
||||
assertFalse(twoPhaseCalled.get());
|
||||
|
||||
assertEquals(768, rangeApproximation.advance(300));
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
|
||||
if (doLevels) {
|
||||
assertEquals(831, rangeApproximation.upTo);
|
||||
} else {
|
||||
assertEquals(1023, rangeApproximation.upTo);
|
||||
}
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertEquals(twoPhase.matches(), rangeIterator.matches());
|
||||
assertTrue(twoPhaseCalled.get());
|
||||
twoPhaseCalled.set(false);
|
||||
rangeApproximation.nextDoc();
|
||||
}
|
||||
|
||||
assertEquals(1100, rangeApproximation.advance(1099));
|
||||
assertEquals(DocValuesRangeIterator.Match.IF_DOC_HAS_VALUE, rangeApproximation.match);
|
||||
assertEquals(1024 + 256 - 1, rangeApproximation.upTo);
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertTrue(rangeIterator.matches());
|
||||
assertFalse(twoPhaseCalled.get());
|
||||
|
||||
assertEquals(1024 + 768, rangeApproximation.advance(1024 + 300));
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
|
||||
if (doLevels) {
|
||||
assertEquals(1024 + 831, rangeApproximation.upTo);
|
||||
} else {
|
||||
assertEquals(2047, rangeApproximation.upTo);
|
||||
}
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertEquals(twoPhase.matches(), rangeIterator.matches());
|
||||
assertTrue(twoPhaseCalled.get());
|
||||
twoPhaseCalled.set(false);
|
||||
rangeApproximation.nextDoc();
|
||||
}
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, rangeApproximation.advance(2048));
|
||||
}
|
||||
}
|
|
@ -105,6 +105,9 @@ public class TestCheckIndex extends BaseTestCheckIndex {
|
|||
// doc value
|
||||
doc.add(new NumericDocValuesField("dv", random().nextLong()));
|
||||
|
||||
// doc value with skip index
|
||||
doc.add(NumericDocValuesField.indexedField("dv_skip", random().nextLong()));
|
||||
|
||||
// point value
|
||||
byte[] point = new byte[4];
|
||||
NumericUtils.intToSortableBytes(random().nextInt(), point, 0);
|
||||
|
@ -154,7 +157,7 @@ public class TestCheckIndex extends BaseTestCheckIndex {
|
|||
assertNull(segStatus.liveDocStatus.error);
|
||||
|
||||
// confirm field infos testing status
|
||||
assertEquals(8, segStatus.fieldInfoStatus.totFields);
|
||||
assertEquals(9, segStatus.fieldInfoStatus.totFields);
|
||||
assertTrue(output.toString(UTF_8).contains("test: field infos"));
|
||||
assertNull(segStatus.fieldInfoStatus.error);
|
||||
|
||||
|
@ -184,7 +187,8 @@ public class TestCheckIndex extends BaseTestCheckIndex {
|
|||
assertNull(segStatus.termVectorStatus.error);
|
||||
|
||||
// confirm doc values testing status
|
||||
assertEquals(2, segStatus.docValuesStatus.totalNumericFields);
|
||||
assertEquals(3, segStatus.docValuesStatus.totalNumericFields);
|
||||
assertEquals(1, segStatus.docValuesStatus.totalSkippingIndex);
|
||||
assertTrue(output.toString(UTF_8).contains("test: docvalues"));
|
||||
assertNull(segStatus.docValuesStatus.error);
|
||||
|
||||
|
|
|
@ -106,6 +106,7 @@ public class TestCodecs extends LuceneTestCase {
|
|||
storePayloads,
|
||||
indexOptions,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
|
|
@ -250,6 +250,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
@ -271,6 +272,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
@ -294,6 +296,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
|
|
@ -58,6 +58,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
false,
|
||||
ift.indexOptions(),
|
||||
ift.docValuesType(),
|
||||
ift.hasDocValuesSkipIndex(),
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
|
|
@ -4976,4 +4976,62 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testDocValuesMixedSkippingIndex() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter writer =
|
||||
new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())))) {
|
||||
Document doc1 = new Document();
|
||||
doc1.add(SortedNumericDocValuesField.indexedField("test", random().nextLong()));
|
||||
writer.addDocument(doc1);
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(new SortedNumericDocValuesField("test", random().nextLong()));
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc2));
|
||||
assertEquals(
|
||||
"Inconsistency of field data structures across documents for field [test] of doc [1]. doc values skip index: expected 'true', but it has 'false'.",
|
||||
ex.getMessage());
|
||||
}
|
||||
}
|
||||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter writer =
|
||||
new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())))) {
|
||||
Document doc1 = new Document();
|
||||
doc1.add(new SortedSetDocValuesField("test", TestUtil.randomBinaryTerm(random())));
|
||||
writer.addDocument(doc1);
|
||||
|
||||
Document doc2 = new Document();
|
||||
doc2.add(SortedSetDocValuesField.indexedField("test", TestUtil.randomBinaryTerm(random())));
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc2));
|
||||
assertEquals(
|
||||
"Inconsistency of field data structures across documents for field [test] of doc [1]. doc values skip index: expected 'false', but it has 'true'.",
|
||||
ex.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testDocValuesSkippingIndexWithoutDocValues() throws Exception {
|
||||
for (DocValuesType docValuesType :
|
||||
new DocValuesType[] {DocValuesType.NONE, DocValuesType.BINARY}) {
|
||||
FieldType fieldType = new FieldType();
|
||||
fieldType.setStored(true);
|
||||
fieldType.setDocValuesType(docValuesType);
|
||||
fieldType.setDocValuesSkipIndex(true);
|
||||
fieldType.freeze();
|
||||
try (Directory dir = newMockDirectory()) {
|
||||
try (IndexWriter writer =
|
||||
new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())))) {
|
||||
Document doc1 = new Document();
|
||||
doc1.add(new Field("test", new byte[10], fieldType));
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc1));
|
||||
assertTrue(
|
||||
ex.getMessage()
|
||||
.startsWith("field 'test' cannot have docValuesSkipIndex set to true"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,6 +95,11 @@ public class TestIndexableField extends LuceneTestCase {
|
|||
return DocValuesType.NONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDocValuesSkipIndex() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int pointDimensionCount() {
|
||||
return 0;
|
||||
|
|
|
@ -191,6 +191,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
DocValuesType.NUMERIC,
|
||||
false,
|
||||
0,
|
||||
Collections.emptyMap(),
|
||||
0,
|
||||
|
@ -230,6 +231,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
DocValuesType.NUMERIC,
|
||||
false,
|
||||
1,
|
||||
Collections.emptyMap(),
|
||||
0,
|
||||
|
@ -295,6 +297,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
DocValuesType.NUMERIC,
|
||||
false,
|
||||
segmentInfo.getNextDocValuesGen(),
|
||||
Collections.emptyMap(),
|
||||
0,
|
||||
|
@ -365,6 +368,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
DocValuesType.NUMERIC,
|
||||
false,
|
||||
segmentInfo.getNextDocValuesGen(),
|
||||
Collections.emptyMap(),
|
||||
0,
|
||||
|
@ -403,6 +407,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
DocValuesType.NUMERIC,
|
||||
false,
|
||||
segmentInfo.getNextDocValuesGen(),
|
||||
Collections.emptyMap(),
|
||||
0,
|
||||
|
|
|
@ -101,6 +101,11 @@ public class TestSegmentToThreadMapping extends LuceneTestCase {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointValues getPointValues(String field) {
|
||||
return null;
|
||||
|
|
|
@ -1289,6 +1289,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
false,
|
||||
IndexOptions.NONE,
|
||||
fi.getDocValuesType(),
|
||||
fi.hasDocValuesSkipIndex(),
|
||||
fi.getDocValuesGen(),
|
||||
fi.attributes(),
|
||||
0,
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Collections;
|
|||
import java.util.Iterator;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.ByteVectorValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -95,6 +96,7 @@ public class TermVectorLeafReader extends LeafReader {
|
|||
terms.hasPayloads(),
|
||||
indexOptions,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
Collections.emptyMap(),
|
||||
0,
|
||||
|
@ -141,6 +143,11 @@ public class TermVectorLeafReader extends LeafReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) throws IOException {
|
||||
return null; // Is this needed? See MemoryIndex for a way to do it.
|
||||
|
|
|
@ -728,6 +728,7 @@ public class MemoryIndex {
|
|||
storePayloads,
|
||||
indexOptions,
|
||||
fieldType.docValuesType(),
|
||||
false,
|
||||
-1,
|
||||
Collections.emptyMap(),
|
||||
fieldType.pointDimensionCount(),
|
||||
|
@ -782,6 +783,7 @@ public class MemoryIndex {
|
|||
info.fieldInfo.hasPayloads(),
|
||||
info.fieldInfo.getIndexOptions(),
|
||||
docValuesType,
|
||||
false,
|
||||
-1,
|
||||
info.fieldInfo.attributes(),
|
||||
info.fieldInfo.getPointDimensionCount(),
|
||||
|
@ -1622,6 +1624,12 @@ public class MemoryIndex {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
// Skipping isn't needed on a 1-doc index.
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PointValues getPointValues(String fieldName) {
|
||||
Info info = fields.get(fieldName);
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
|
@ -280,6 +281,14 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
|
|||
return AssertingLeafReader.AssertingSortedSetDocValues.create(values, maxDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
|
||||
assert field.hasDocValuesSkipIndex();
|
||||
DocValuesSkipper skipper = in.getSkipper(field);
|
||||
assert skipper != null;
|
||||
return new AssertingLeafReader.AssertingDocValuesSkipper(skipper);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.List;
|
|||
import java.util.Objects;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -1155,6 +1156,109 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
}
|
||||
|
||||
/** Wraps a DocValuesSkipper but with additional asserts */
|
||||
public static class AssertingDocValuesSkipper extends DocValuesSkipper {
|
||||
|
||||
private final Thread creationThread = Thread.currentThread();
|
||||
private final DocValuesSkipper in;
|
||||
|
||||
/** Sole constructor */
|
||||
public AssertingDocValuesSkipper(DocValuesSkipper in) {
|
||||
this.in = in;
|
||||
assert minDocID(0) == -1;
|
||||
assert maxDocID(0) == -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void advance(int target) throws IOException {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
assert target > maxDocID(0)
|
||||
: "Illegal to call advance() on a target that is not beyond the current interval";
|
||||
in.advance(target);
|
||||
assert in.minDocID(0) <= in.maxDocID(0);
|
||||
}
|
||||
|
||||
private boolean iterating() {
|
||||
return maxDocID(0) != -1
|
||||
&& minDocID(0) != -1
|
||||
&& maxDocID(0) != DocIdSetIterator.NO_MORE_DOCS
|
||||
&& minDocID(0) != DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
return in.numLevels();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minDocID(int level) {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
Objects.checkIndex(level, numLevels());
|
||||
int minDocID = in.minDocID(level);
|
||||
assert minDocID <= in.maxDocID(level);
|
||||
if (level > 0) {
|
||||
assert minDocID <= in.minDocID(level - 1);
|
||||
}
|
||||
return minDocID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDocID(int level) {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
Objects.checkIndex(level, numLevels());
|
||||
int maxDocID = in.maxDocID(level);
|
||||
|
||||
assert maxDocID >= in.minDocID(level);
|
||||
if (level > 0) {
|
||||
assert maxDocID >= in.maxDocID(level - 1);
|
||||
}
|
||||
return maxDocID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue(int level) {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
assert iterating() : "Unpositioned iterator";
|
||||
Objects.checkIndex(level, numLevels());
|
||||
return in.minValue(level);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue(int level) {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
assert iterating() : "Unpositioned iterator";
|
||||
Objects.checkIndex(level, numLevels());
|
||||
return in.maxValue(level);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount(int level) {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
assert iterating() : "Unpositioned iterator";
|
||||
Objects.checkIndex(level, numLevels());
|
||||
return in.docCount(level);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
return in.minValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
return in.maxValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount() {
|
||||
assertThread("Doc values skipper", creationThread);
|
||||
return in.docCount();
|
||||
}
|
||||
}
|
||||
|
||||
/** Wraps a SortedSetDocValues but with additional asserts */
|
||||
public static class AssertingPointValues extends PointValues {
|
||||
private final Thread creationThread = Thread.currentThread();
|
||||
|
@ -1483,6 +1587,19 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
DocValuesSkipper skipper = super.getDocValuesSkipper(field);
|
||||
FieldInfo fi = getFieldInfos().fieldInfo(field);
|
||||
if (skipper != null) {
|
||||
assert fi.hasDocValuesSkipIndex();
|
||||
return new AssertingDocValuesSkipper(skipper);
|
||||
} else {
|
||||
assert fi == null || fi.hasDocValuesSkipIndex() == false;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) throws IOException {
|
||||
NumericDocValues dv = super.getNormValues(field);
|
||||
|
|
|
@ -56,6 +56,7 @@ import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
|
|||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -101,6 +102,24 @@ import org.apache.lucene.util.automaton.RegExp;
|
|||
*/
|
||||
public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTestCase {
|
||||
|
||||
/**
|
||||
* Override and return {@code false} if the {@link DocValuesSkipper} produced by this format
|
||||
* sometimes returns documents in {@link DocValuesSkipper#minDocID(int)} or {@link
|
||||
* DocValuesSkipper#maxDocID(int)} that may not have a value.
|
||||
*/
|
||||
protected boolean skipperHasAccurateDocBounds() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Override and return {@code false} if the {@link DocValuesSkipper} produced by this format
|
||||
* sometimes returns values in {@link DocValuesSkipper#minValue(int)} or {@link
|
||||
* DocValuesSkipper#maxValue(int)} that none of the documents in the range have.
|
||||
*/
|
||||
protected boolean skipperHasAccurateValueBounds() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addRandomFields(Document doc) {
|
||||
if (usually()) {
|
||||
|
@ -889,7 +908,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
iwriter.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("field", newBytesRef("hello")));
|
||||
doc.add(SortedDocValuesField.indexedField("field", newBytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
iwriter.deleteDocuments(new Term("id", "1"));
|
||||
|
@ -901,6 +920,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
|
||||
assertEquals(NO_MORE_DOCS, dv.nextDoc());
|
||||
|
||||
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
|
||||
assertEquals(0, skipper.docCount());
|
||||
skipper.advance(0);
|
||||
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
|
||||
|
||||
TermsEnum termsEnum = dv.termsEnum();
|
||||
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
|
||||
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
|
||||
|
@ -2234,7 +2258,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
iwriter.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
|
||||
doc.add(SortedSetDocValuesField.indexedField("field", newBytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
iwriter.deleteDocuments(new Term("id", "1"));
|
||||
|
@ -2246,6 +2270,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
|
||||
assertEquals(0, dv.getValueCount());
|
||||
|
||||
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
|
||||
assertEquals(0, skipper.docCount());
|
||||
skipper.advance(0);
|
||||
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
|
||||
|
||||
TermsEnum termsEnum = dv.termsEnum();
|
||||
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
|
||||
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
|
||||
|
@ -3211,7 +3240,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
iwriter.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("field", 5));
|
||||
doc.add(NumericDocValuesField.indexedField("field", 5));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
iwriter.deleteDocuments(new Term("id", "1"));
|
||||
|
@ -3223,6 +3252,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field");
|
||||
assertEquals(NO_MORE_DOCS, dv.nextDoc());
|
||||
|
||||
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
|
||||
assertEquals(0, skipper.docCount());
|
||||
skipper.advance(0);
|
||||
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -3337,7 +3371,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
iwriter.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedNumericDocValuesField("field", 5));
|
||||
doc.add(SortedNumericDocValuesField.indexedField("field", 5));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.commit();
|
||||
iwriter.deleteDocuments(new Term("id", "1"));
|
||||
|
@ -3349,6 +3383,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
SortedNumericDocValues dv = getOnlyLeafReader(ireader).getSortedNumericDocValues("field");
|
||||
assertEquals(NO_MORE_DOCS, dv.nextDoc());
|
||||
|
||||
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
|
||||
assertEquals(0, skipper.docCount());
|
||||
skipper.advance(0);
|
||||
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -3499,7 +3538,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedDocValuesField("field", newBytesRef("hello")));
|
||||
doc.add(SortedDocValuesField.indexedField("field", newBytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
final int numEmptyDocs = atLeast(1024);
|
||||
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||
|
@ -3515,6 +3554,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
|
||||
assertEquals(NO_MORE_DOCS, dv.nextDoc());
|
||||
|
||||
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
|
||||
assertEquals(0, skipper.docCount());
|
||||
skipper.advance(0);
|
||||
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
|
||||
|
||||
TermsEnum termsEnum = dv.termsEnum();
|
||||
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
|
||||
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
|
||||
|
@ -3534,7 +3578,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedSetDocValuesField("field", newBytesRef("hello")));
|
||||
doc.add(SortedSetDocValuesField.indexedField("field", newBytesRef("hello")));
|
||||
iwriter.addDocument(doc);
|
||||
final int numEmptyDocs = atLeast(1024);
|
||||
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||
|
@ -3550,6 +3594,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
|
||||
assertEquals(NO_MORE_DOCS, dv.nextDoc());
|
||||
|
||||
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
|
||||
assertEquals(0, skipper.docCount());
|
||||
skipper.advance(0);
|
||||
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
|
||||
|
||||
TermsEnum termsEnum = dv.termsEnum();
|
||||
assertFalse(termsEnum.seekExact(new BytesRef("lucene")));
|
||||
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("lucene")));
|
||||
|
@ -3569,7 +3618,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("field", 42L));
|
||||
doc.add(NumericDocValuesField.indexedField("field", 42L));
|
||||
iwriter.addDocument(doc);
|
||||
final int numEmptyDocs = atLeast(1024);
|
||||
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||
|
@ -3585,6 +3634,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field");
|
||||
assertEquals(NO_MORE_DOCS, dv.nextDoc());
|
||||
|
||||
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
|
||||
assertEquals(0, skipper.docCount());
|
||||
skipper.advance(0);
|
||||
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -3600,7 +3654,7 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "1", Field.Store.NO));
|
||||
doc.add(new SortedNumericDocValuesField("field", 42L));
|
||||
doc.add(SortedNumericDocValuesField.indexedField("field", 42L));
|
||||
iwriter.addDocument(doc);
|
||||
final int numEmptyDocs = atLeast(1024);
|
||||
for (int i = 0; i < numEmptyDocs; ++i) {
|
||||
|
@ -3616,6 +3670,11 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
SortedNumericDocValues dv = getOnlyLeafReader(ireader).getSortedNumericDocValues("field");
|
||||
assertEquals(NO_MORE_DOCS, dv.nextDoc());
|
||||
|
||||
DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field");
|
||||
assertEquals(0, skipper.docCount());
|
||||
skipper.advance(0);
|
||||
assertEquals(NO_MORE_DOCS, skipper.minDocID(0));
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
@ -3807,4 +3866,457 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void testNumericDocValuesWithSkipperSmall() throws Exception {
|
||||
doTestNumericDocValuesWithSkipper(random().nextInt(1, 1000));
|
||||
}
|
||||
|
||||
public void testNumericDocValuesWithSkipperMedium() throws Exception {
|
||||
doTestNumericDocValuesWithSkipper(random().nextInt(1000, 20000));
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testNumericDocValuesWithSkipperBig() throws Exception {
|
||||
doTestNumericDocValuesWithSkipper(random().nextInt(50000, 100000));
|
||||
}
|
||||
|
||||
private void doTestNumericDocValuesWithSkipper(int totalDocs) throws Exception {
|
||||
assertDocValuesWithSkipper(
|
||||
totalDocs,
|
||||
new TestDocValueSkipper() {
|
||||
@Override
|
||||
public void populateDoc(Document doc) {
|
||||
doc.add(NumericDocValuesField.indexedField("test", random().nextLong()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException {
|
||||
NumericDocValues numericDocValues = leafReader.getNumericDocValues("test");
|
||||
return new DocValuesWrapper() {
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return numericDocValues.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
return numericDocValues.advanceExact(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() throws IOException {
|
||||
return numericDocValues.longValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() throws IOException {
|
||||
return numericDocValues.longValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return numericDocValues.docID();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException {
|
||||
return leafReader.getDocValuesSkipper("test");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void testSortedNumericDocValuesWithSkipperSmall() throws Exception {
|
||||
doTestSortedNumericDocValuesWithSkipper(random().nextInt(1, 1000));
|
||||
}
|
||||
|
||||
public void testSortedNumericDocValuesWithSkipperMedium() throws Exception {
|
||||
doTestSortedNumericDocValuesWithSkipper(random().nextInt(1000, 20000));
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testSortedNumericDocValuesWithSkipperBig() throws Exception {
|
||||
doTestSortedNumericDocValuesWithSkipper(random().nextInt(50000, 100000));
|
||||
}
|
||||
|
||||
private void doTestSortedNumericDocValuesWithSkipper(int totalDocs) throws Exception {
|
||||
assertDocValuesWithSkipper(
|
||||
totalDocs,
|
||||
new TestDocValueSkipper() {
|
||||
@Override
|
||||
public void populateDoc(Document doc) {
|
||||
for (int j = 0; j < random().nextInt(1, 5); j++) {
|
||||
doc.add(SortedNumericDocValuesField.indexedField("test", random().nextLong()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException {
|
||||
SortedNumericDocValues sortedNumericDocValues =
|
||||
leafReader.getSortedNumericDocValues("test");
|
||||
return new DocValuesWrapper() {
|
||||
long max;
|
||||
long min;
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int doc = sortedNumericDocValues.advance(target);
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
readValues();
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
if (sortedNumericDocValues.advanceExact(target)) {
|
||||
readValues();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void readValues() throws IOException {
|
||||
max = Long.MIN_VALUE;
|
||||
min = Long.MAX_VALUE;
|
||||
for (int i = 0; i < sortedNumericDocValues.docValueCount(); i++) {
|
||||
long value = sortedNumericDocValues.nextValue();
|
||||
max = Math.max(max, value);
|
||||
min = Math.min(min, value);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() {
|
||||
return max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() {
|
||||
return min;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return sortedNumericDocValues.docID();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException {
|
||||
return leafReader.getDocValuesSkipper("test");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void testSortedDocValuesWithSkipperSmall() throws Exception {
|
||||
doTestSortedDocValuesWithSkipper(random().nextInt(1, 1000));
|
||||
}
|
||||
|
||||
public void testSortedDocValuesWithSkipperMedium() throws Exception {
|
||||
doTestSortedDocValuesWithSkipper(random().nextInt(1000, 20000));
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testSortedDocValuesWithSkipperBig() throws Exception {
|
||||
doTestSortedDocValuesWithSkipper(random().nextInt(50000, 100000));
|
||||
}
|
||||
|
||||
private void doTestSortedDocValuesWithSkipper(int totalDocs) throws Exception {
|
||||
assertDocValuesWithSkipper(
|
||||
totalDocs,
|
||||
new TestDocValueSkipper() {
|
||||
@Override
|
||||
public void populateDoc(Document doc) {
|
||||
doc.add(SortedDocValuesField.indexedField("test", TestUtil.randomBinaryTerm(random())));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException {
|
||||
SortedDocValues sortedDocValues = leafReader.getSortedDocValues("test");
|
||||
return new DocValuesWrapper() {
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return sortedDocValues.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
return sortedDocValues.advanceExact(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() throws IOException {
|
||||
return sortedDocValues.ordValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() throws IOException {
|
||||
return sortedDocValues.ordValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return sortedDocValues.docID();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException {
|
||||
return leafReader.getDocValuesSkipper("test");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void testSortedSetDocValuesWithSkipperSmall() throws Exception {
|
||||
doTestSortedSetDocValuesWithSkipper(random().nextInt(1, 1000));
|
||||
}
|
||||
|
||||
public void testSortedSetDocValuesWithSkipperMedium() throws Exception {
|
||||
doTestSortedSetDocValuesWithSkipper(random().nextInt(10000, 20000));
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testSortedSetDocValuesWithSkipperBig() throws Exception {
|
||||
doTestSortedSetDocValuesWithSkipper(random().nextInt(50000, 100000));
|
||||
}
|
||||
|
||||
private void doTestSortedSetDocValuesWithSkipper(int totalDocs) throws Exception {
|
||||
assertDocValuesWithSkipper(
|
||||
totalDocs,
|
||||
new TestDocValueSkipper() {
|
||||
@Override
|
||||
public void populateDoc(Document doc) {
|
||||
for (int j = 0; j < random().nextInt(1, 5); j++) {
|
||||
doc.add(
|
||||
SortedSetDocValuesField.indexedField(
|
||||
"test", TestUtil.randomBinaryTerm(random())));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException {
|
||||
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("test");
|
||||
return new DocValuesWrapper() {
|
||||
long max;
|
||||
long min;
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int doc = sortedSetDocValues.advance(target);
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
readValues();
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
if (sortedSetDocValues.advanceExact(target)) {
|
||||
readValues();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void readValues() throws IOException {
|
||||
max = Long.MIN_VALUE;
|
||||
min = Long.MAX_VALUE;
|
||||
for (int i = 0; i < sortedSetDocValues.docValueCount(); i++) {
|
||||
long value = sortedSetDocValues.nextOrd();
|
||||
max = Math.max(max, value);
|
||||
min = Math.min(min, value);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() {
|
||||
return max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() {
|
||||
return min;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return sortedSetDocValues.docID();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException {
|
||||
return leafReader.getDocValuesSkipper("test");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void assertDocValuesWithSkipper(int totalDocs, TestDocValueSkipper testDocValueSkipper)
|
||||
throws Exception {
|
||||
Supplier<Boolean> booleanSupplier;
|
||||
switch (random().nextInt(3)) {
|
||||
case 0 -> booleanSupplier = () -> true;
|
||||
case 1 -> booleanSupplier = () -> random().nextBoolean();
|
||||
case 2 -> booleanSupplier = () -> random().nextBoolean() && random().nextBoolean();
|
||||
default -> throw new AssertionError();
|
||||
}
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
int numDocs = 0;
|
||||
for (int i = 0; i < totalDocs; i++) {
|
||||
Document doc = new Document();
|
||||
if (booleanSupplier.get()) {
|
||||
testDocValueSkipper.populateDoc(doc);
|
||||
numDocs++;
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
if (rarely()) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
writer.flush();
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
writer.forceMerge(1);
|
||||
}
|
||||
|
||||
IndexReader r = writer.getReader();
|
||||
int readDocs = 0;
|
||||
for (LeafReaderContext readerContext : r.leaves()) {
|
||||
LeafReader reader = readerContext.reader();
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
PrintStream infoStream = new PrintStream(bos, false, UTF_8);
|
||||
DocValuesStatus status = CheckIndex.testDocValues((CodecReader) reader, infoStream, true);
|
||||
if (status.error != null) {
|
||||
throw new Exception(status.error);
|
||||
}
|
||||
readDocs +=
|
||||
assertDocValuesSkipSequential(
|
||||
testDocValueSkipper.docValuesWrapper(reader),
|
||||
testDocValueSkipper.docValuesSkipper(reader));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
assertDocValuesSkipRandom(
|
||||
testDocValueSkipper.docValuesWrapper(reader),
|
||||
testDocValueSkipper.docValuesSkipper(reader),
|
||||
reader.maxDoc());
|
||||
}
|
||||
}
|
||||
assertEquals(numDocs, readDocs);
|
||||
IOUtils.close(r, writer, directory);
|
||||
}
|
||||
|
||||
private int assertDocValuesSkipSequential(DocValuesWrapper iterator, DocValuesSkipper skipper)
|
||||
throws IOException {
|
||||
if (skipper == null) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
assertEquals(-1, iterator.docID());
|
||||
assertEquals(-1, skipper.minDocID(0));
|
||||
assertEquals(-1, skipper.maxDocID(0));
|
||||
|
||||
iterator.advance(0);
|
||||
int docCount = 0;
|
||||
while (true) {
|
||||
int previousMaxDoc = skipper.maxDocID(0);
|
||||
skipper.advance(previousMaxDoc + 1);
|
||||
assertTrue(skipper.minDocID(0) > previousMaxDoc);
|
||||
if (skipperHasAccurateDocBounds()) {
|
||||
assertEquals(iterator.docID(), skipper.minDocID(0));
|
||||
} else {
|
||||
assertTrue(
|
||||
"Expected: " + iterator.docID() + " but got " + skipper.minDocID(0),
|
||||
skipper.minDocID(0) <= iterator.docID());
|
||||
}
|
||||
|
||||
if (skipper.minDocID(0) == NO_MORE_DOCS) {
|
||||
assertEquals(NO_MORE_DOCS, skipper.maxDocID(0));
|
||||
break;
|
||||
}
|
||||
assertTrue(skipper.docCount(0) > 0);
|
||||
|
||||
int maxDoc = -1;
|
||||
long minVal = Long.MAX_VALUE;
|
||||
long maxVal = Long.MIN_VALUE;
|
||||
for (int i = 0; i < skipper.docCount(0); ++i) {
|
||||
assertNotEquals(NO_MORE_DOCS, iterator.docID());
|
||||
maxDoc = Math.max(maxDoc, iterator.docID());
|
||||
minVal = Math.min(minVal, iterator.minValue());
|
||||
maxVal = Math.max(maxVal, iterator.maxValue());
|
||||
iterator.advance(iterator.docID() + 1);
|
||||
}
|
||||
if (skipperHasAccurateDocBounds()) {
|
||||
assertEquals(maxDoc, skipper.maxDocID(0));
|
||||
} else {
|
||||
assertTrue(
|
||||
"Expected: " + maxDoc + " but got " + skipper.maxDocID(0),
|
||||
skipper.maxDocID(0) >= maxDoc);
|
||||
}
|
||||
if (skipperHasAccurateValueBounds()) {
|
||||
assertEquals(minVal, skipper.minValue(0));
|
||||
assertEquals(maxVal, skipper.maxValue(0));
|
||||
} else {
|
||||
assertTrue(
|
||||
"Expected: " + minVal + " but got " + skipper.minValue(0),
|
||||
minVal >= skipper.minValue(0));
|
||||
assertTrue(
|
||||
"Expected: " + maxVal + " but got " + skipper.maxValue(0),
|
||||
maxVal <= skipper.maxValue(0));
|
||||
}
|
||||
docCount += skipper.docCount(0);
|
||||
}
|
||||
|
||||
assertEquals(docCount, skipper.docCount());
|
||||
return docCount;
|
||||
}
|
||||
|
||||
private static void assertDocValuesSkipRandom(
|
||||
DocValuesWrapper iterator, DocValuesSkipper skipper, int maxDoc) throws IOException {
|
||||
if (skipper == null) {
|
||||
return;
|
||||
}
|
||||
while (true) {
|
||||
int doc = random().nextInt(skipper.maxDocID(0), maxDoc + 1) + 1;
|
||||
skipper.advance(doc);
|
||||
if (skipper.minDocID(0) == NO_MORE_DOCS) {
|
||||
assertEquals(NO_MORE_DOCS, skipper.maxDocID(0));
|
||||
return;
|
||||
}
|
||||
if (iterator.advanceExact(doc)) {
|
||||
assertTrue(iterator.docID() >= skipper.minDocID(0));
|
||||
assertTrue(iterator.docID() <= skipper.maxDocID(0));
|
||||
assertTrue(iterator.minValue() >= skipper.minValue(0));
|
||||
assertTrue(iterator.maxValue() <= skipper.maxValue(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private interface TestDocValueSkipper {
|
||||
|
||||
void populateDoc(Document doc);
|
||||
|
||||
DocValuesWrapper docValuesWrapper(LeafReader leafReader) throws IOException;
|
||||
|
||||
DocValuesSkipper docValuesSkipper(LeafReader leafReader) throws IOException;
|
||||
}
|
||||
|
||||
private interface DocValuesWrapper {
|
||||
|
||||
int advance(int target) throws IOException;
|
||||
|
||||
boolean advanceExact(int target) throws IOException;
|
||||
|
||||
long maxValue() throws IOException;
|
||||
|
||||
long minValue() throws IOException;
|
||||
|
||||
int docID();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.tests.index;
|
|||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
|
@ -295,6 +296,15 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
|||
storePayloads = random().nextBoolean();
|
||||
}
|
||||
}
|
||||
boolean hasDocValuesSkipIndex = false;
|
||||
if (EnumSet.of(
|
||||
DocValuesType.NUMERIC,
|
||||
DocValuesType.SORTED,
|
||||
DocValuesType.SORTED_NUMERIC,
|
||||
DocValuesType.SORTED_SET)
|
||||
.contains(fieldType.docValuesType())) {
|
||||
hasDocValuesSkipIndex = fieldType.hasDocValuesSkipIndex();
|
||||
}
|
||||
FieldInfo fi =
|
||||
new FieldInfo(
|
||||
field,
|
||||
|
@ -304,6 +314,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
|||
storePayloads,
|
||||
fieldType.indexOptions(),
|
||||
fieldType.docValuesType(),
|
||||
hasDocValuesSkipIndex,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
fieldType.pointDimensionCount(),
|
||||
|
@ -349,8 +360,15 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
|||
}
|
||||
|
||||
if (r.nextBoolean()) {
|
||||
DocValuesType values[] = DocValuesType.values();
|
||||
DocValuesType[] values = DocValuesType.values();
|
||||
DocValuesType current = values[r.nextInt(values.length)];
|
||||
type.setDocValuesType(values[r.nextInt(values.length)]);
|
||||
if (current == DocValuesType.NUMERIC
|
||||
|| current == DocValuesType.SORTED_NUMERIC
|
||||
|| current == DocValuesType.SORTED
|
||||
|| current == DocValuesType.SORTED_SET) {
|
||||
type.setDocValuesSkipIndex(random().nextBoolean());
|
||||
}
|
||||
}
|
||||
|
||||
if (r.nextBoolean()) {
|
||||
|
@ -389,6 +407,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
|||
assertEquals(expected.number, actual.number);
|
||||
assertEquals(expected.name, actual.name);
|
||||
assertEquals(expected.getDocValuesType(), actual.getDocValuesType());
|
||||
assertEquals(expected.hasDocValuesSkipIndex(), actual.hasDocValuesSkipIndex());
|
||||
assertEquals(expected.getIndexOptions(), actual.getIndexOptions());
|
||||
assertEquals(expected.hasNorms(), actual.hasNorms());
|
||||
assertEquals(expected.hasPayloads(), actual.hasPayloads());
|
||||
|
@ -429,6 +448,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|
|||
false,
|
||||
TextField.TYPE_STORED.indexOptions(),
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
|
|
@ -357,6 +357,7 @@ public abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
|
|||
proto.hasPayloads(),
|
||||
proto.getIndexOptions(),
|
||||
proto.getDocValuesType(),
|
||||
proto.hasDocValuesSkipIndex(),
|
||||
proto.getDocValuesGen(),
|
||||
new HashMap<>(),
|
||||
proto.getPointDimensionCount(),
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.codecs.TermVectorsReader;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.ByteVectorValues;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
|
@ -187,6 +188,17 @@ class MergeReaderWrapper extends LeafReader {
|
|||
return norms.getNorms(fi);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
ensureOpen();
|
||||
FieldInfo fi = getFieldInfos().fieldInfo(field);
|
||||
if (fi == null) {
|
||||
// Field does not exist
|
||||
return null;
|
||||
}
|
||||
return docValues.getSkipper(fi);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
return in.getFieldInfos();
|
||||
|
|
|
@ -103,6 +103,7 @@ public class MismatchedLeafReader extends FilterLeafReader {
|
|||
oldInfo.hasPayloads(), // storePayloads
|
||||
oldInfo.getIndexOptions(), // indexOptions
|
||||
oldInfo.getDocValuesType(), // docValuesType
|
||||
oldInfo.hasDocValuesSkipIndex(), // hasDocValuesSkipIndex
|
||||
oldInfo.getDocValuesGen(), // dvGen
|
||||
oldInfo.attributes(), // attributes
|
||||
oldInfo.getPointDimensionCount(), // data dimension count
|
||||
|
|
|
@ -157,6 +157,7 @@ public class RandomPostingsTester {
|
|||
true,
|
||||
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
@ -731,6 +732,7 @@ public class RandomPostingsTester {
|
|||
doPayloads,
|
||||
indexOptions,
|
||||
DocValuesType.NONE,
|
||||
false,
|
||||
-1,
|
||||
new HashMap<>(),
|
||||
0,
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.List;
|
|||
import java.util.Random;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.ByteVectorValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -227,6 +228,11 @@ public class QueryUtils {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
|
||||
return null;
|
||||
|
|
Loading…
Reference in New Issue