Change docValuesSkipIndex from a boolean to an enum. (#13784)

At the moment, our skip indexes record min/max ordinal/value per range
of doc IDs. It would be natural to extend it to other pre-aggregated
data such as a sum and value count, which facets could take advantage
of. This change switches `docValuesSkipIndex` from a boolean to an enum
so that we could release such changes in the future in an additive
fashion, by adding constants to this enum and new methods to
`DocValuesSkipper`.
This commit is contained in:
Adrien Grand 2024-09-17 14:35:30 +02:00 committed by GitHub
parent 644feeb02a
commit b59a357e58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
39 changed files with 214 additions and 108 deletions

View File

@ -24,6 +24,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -209,7 +210,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
false,
DocValuesSkipIndexType.NONE,
dvGen,
attributes,
pointDataDimensionCount,

View File

@ -23,6 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -186,7 +187,7 @@ public final class Lucene90FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
false,
DocValuesSkipIndexType.NONE,
dvGen,
attributes,
pointDataDimensionCount,

View File

@ -22,6 +22,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -125,8 +126,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
boolean docValueSkipper =
Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
DocValuesSkipIndexType docValueSkipper =
docValuesSkipIndexType(readString(DOCVALUES_SKIP_INDEX.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
@ -221,6 +222,10 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
return DocValuesType.valueOf(dvType);
}
public DocValuesSkipIndexType docValuesSkipIndexType(String dvSkipIndexType) {
return DocValuesSkipIndexType.valueOf(dvSkipIndexType);
}
public VectorEncoding vectorEncoding(String vectorEncoding) {
return VectorEncoding.valueOf(vectorEncoding);
}
@ -284,7 +289,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
SimpleTextUtil.write(out, getDocValuesSkipIndexType(fi.docValuesSkipIndexType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_GEN);
@ -355,4 +360,8 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
private static String getDocValuesType(DocValuesType type) {
return type.toString();
}
private static String getDocValuesSkipIndexType(DocValuesSkipIndexType type) {
return type.toString();
}
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.codecs.uniformsplit;
import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.codecs.lucene90.tests.MockTermStateFactory;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
@ -111,7 +112,7 @@ public class TestBlockWriter extends LuceneTestCase {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
Collections.emptyMap(),
0,

View File

@ -34,6 +34,7 @@ import org.apache.lucene.codecs.uniformsplit.FSTDictionary;
import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
import org.apache.lucene.codecs.uniformsplit.TermBytes;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -198,7 +199,7 @@ public class TestSTBlockReader extends LuceneTestCase {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
Collections.emptyMap(),
0,

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -77,7 +78,7 @@ public abstract class DocValuesProducer implements Closeable {
/**
* Returns a {@link DocValuesSkipper} for this field. The returned instance need not be
* thread-safe: it will only be used by a single thread. The return value is undefined if {@link
* FieldInfo#hasDocValuesSkipIndex()} doesn't return {@code true}.
* FieldInfo#docValuesSkipIndexType()} returns {@link DocValuesSkipIndexType#NONE}.
*/
public abstract DocValuesSkipper getSkipper(FieldInfo field) throws IOException;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
@ -143,7 +144,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
return DocValues.singleton(valuesProducer.getNumeric(field));
}
};
if (field.hasDocValuesSkipIndex()) {
if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
writeSkipIndex(field, producer);
}
writeValues(field, producer, false);
@ -248,7 +249,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
assert field.hasDocValuesSkipIndex();
assert field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE;
final long start = data.getFilePointer();
final SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
long globalMaxValue = Long.MIN_VALUE;
@ -700,7 +701,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
return DocValues.singleton(sortedOrds);
}
};
if (field.hasDocValuesSkipIndex()) {
if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
writeSkipIndex(field, producer);
}
if (addTypeByte) {
@ -873,7 +874,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
private void doAddSortedNumericField(
FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException {
if (field.hasDocValuesSkipIndex()) {
if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
writeSkipIndex(field, valuesProducer);
}
if (ords) {

View File

@ -27,6 +27,7 @@ import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -191,7 +192,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
}
byte type = meta.readByte();
if (info.hasDocValuesSkipIndex()) {
if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
skippers.put(info.number, readDocValueSkipperMeta(meta));
}
if (type == Lucene90DocValuesFormat.NUMERIC) {

View File

@ -24,6 +24,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -163,8 +164,6 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
boolean isParentField =
format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
boolean hasDocValuesSkipIndex =
format >= FORMAT_DOCVALUE_SKIPPER ? (bits & DOCVALUES_SKIPPER) != 0 : false;
if ((bits & 0xC0) != 0) {
throw new CorruptIndexException(
@ -187,6 +186,12 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
// DV Types are packed in one byte
final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
final DocValuesSkipIndexType docValuesSkipIndex;
if (format >= FORMAT_DOCVALUE_SKIPPER) {
docValuesSkipIndex = getDocValuesSkipIndexType(input, input.readByte());
} else {
docValuesSkipIndex = DocValuesSkipIndexType.NONE;
}
final long dvGen = input.readLong();
Map<String, String> attributes = input.readMapOfStrings();
// just use the last field's map if its the same
@ -217,7 +222,7 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
storePayloads,
indexOptions,
docValuesType,
hasDocValuesSkipIndex,
docValuesSkipIndex,
dvGen,
attributes,
pointDataDimensionCount,
@ -270,6 +275,18 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
}
}
private static byte docValuesSkipIndexByte(DocValuesSkipIndexType type) {
switch (type) {
case NONE:
return 0;
case RANGE:
return 1;
default:
// BUG
throw new AssertionError("unhandled DocValuesSkipIndexType: " + type);
}
}
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
switch (b) {
case 0:
@ -289,6 +306,18 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
}
}
private static DocValuesSkipIndexType getDocValuesSkipIndexType(IndexInput input, byte b)
throws IOException {
switch (b) {
case 0:
return DocValuesSkipIndexType.NONE;
case 1:
return DocValuesSkipIndexType.RANGE;
default:
throw new CorruptIndexException("invalid docvaluesskipindex byte: " + b, input);
}
}
private static VectorEncoding getVectorEncoding(IndexInput input, byte b) throws IOException {
if (b < 0 || b >= VectorEncoding.values().length) {
throw new CorruptIndexException("invalid vector encoding: " + b, input);
@ -404,13 +433,13 @@ public final class Lucene94FieldInfosFormat extends FieldInfosFormat {
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
if (fi.hasDocValuesSkipIndex()) bits |= DOCVALUES_SKIPPER;
output.writeByte(bits);
output.writeByte(indexOptionsByte(fi.getIndexOptions()));
// pack the DV type and hasNorms in one byte
output.writeByte(docValuesByte(fi.getDocValuesType()));
output.writeByte(docValuesSkipIndexByte(fi.docValuesSkipIndexType()));
output.writeLong(fi.getDocValuesGen());
output.writeMapOfStrings(fi.attributes());
output.writeVInt(fi.getPointDimensionCount());

View File

@ -20,6 +20,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig;
@ -41,7 +42,7 @@ public class FieldType implements IndexableFieldType {
private IndexOptions indexOptions = IndexOptions.NONE;
private boolean frozen;
private DocValuesType docValuesType = DocValuesType.NONE;
private boolean docValuesSkipIndex;
private DocValuesSkipIndexType docValuesSkipIndex = DocValuesSkipIndexType.NONE;
private int dimensionCount;
private int indexDimensionCount;
private int dimensionNumBytes;
@ -61,7 +62,7 @@ public class FieldType implements IndexableFieldType {
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.docValuesType = ref.docValuesType();
this.docValuesSkipIndex = ref.hasDocValuesSkipIndex();
this.docValuesSkipIndex = ref.docValuesSkipIndexType();
this.dimensionCount = ref.pointDimensionCount();
this.indexDimensionCount = ref.pointIndexDimensionCount();
this.dimensionNumBytes = ref.pointNumBytes();
@ -508,7 +509,7 @@ public class FieldType implements IndexableFieldType {
}
@Override
public boolean hasDocValuesSkipIndex() {
public DocValuesSkipIndexType docValuesSkipIndexType() {
return docValuesSkipIndex;
}
@ -518,7 +519,7 @@ public class FieldType implements IndexableFieldType {
* correlate with fields that are part of the index sort, so that values can be expected to be
* clustered in the doc ID space.
*/
public void setDocValuesSkipIndex(boolean docValuesSkipIndex) {
public void setDocValuesSkipIndexType(DocValuesSkipIndexType docValuesSkipIndex) {
checkIfFrozen();
this.docValuesSkipIndex = docValuesSkipIndex;
}
@ -531,7 +532,7 @@ public class FieldType implements IndexableFieldType {
result = prime * result + indexDimensionCount;
result = prime * result + dimensionNumBytes;
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
result = prime * result + Boolean.hashCode(docValuesSkipIndex);
result = prime * result + (docValuesSkipIndex == null ? 0 : docValuesSkipIndex.hashCode());
result = prime * result + indexOptions.hashCode();
result = prime * result + (omitNorms ? 1231 : 1237);
result = prime * result + (storeTermVectorOffsets ? 1231 : 1237);

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.document;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
@ -42,13 +43,13 @@ public class NumericDocValuesField extends Field {
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link NumericDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
* creates a {@link FieldType#docValuesSkipIndexType() skip index}.
*
* @param name field name
* @param value 64-bit long value

View File

@ -17,6 +17,7 @@
package org.apache.lucene.document;
import java.util.Collection;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.MultiTermQuery;
@ -48,13 +49,13 @@ public class SortedDocValuesField extends Field {
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
* creates a {@link FieldType#docValuesSkipIndexType() skip index}.
*
* @param name field name
* @param bytes binary content

View File

@ -16,6 +16,7 @@
*/
package org.apache.lucene.document;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
@ -50,13 +51,13 @@ public class SortedNumericDocValuesField extends Field {
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedNumericDocValuesField} with the specified 64-bit long value that
* also creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
* also creates a {@link FieldType#docValuesSkipIndexType() skip index}.
*
* @param name field name
* @param value 64-bit long value

View File

@ -17,6 +17,7 @@
package org.apache.lucene.document;
import java.util.Collection;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.MultiTermQuery;
@ -49,13 +50,13 @@ public class SortedSetDocValuesField extends Field {
TYPE.freeze();
INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
INDEXED_TYPE.freeze();
}
/**
* Creates a new {@link SortedSetDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
* creates a {@link FieldType#docValuesSkipIndexType() skip index}.
*
* @param name field name
* @param bytes binary content

View File

@ -3731,7 +3731,7 @@ public final class CheckIndex implements Closeable {
private static void checkDocValues(
FieldInfo fi, DocValuesProducer dvReader, DocValuesStatus status) throws Exception {
if (fi.hasDocValuesSkipIndex()) {
if (fi.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
status.totalSkippingIndex++;
checkDocValueSkipper(fi, dvReader.getSkipper(fi));
}

View File

@ -200,7 +200,7 @@ public abstract class CodecReader extends LeafReader {
public final DocValuesSkipper getDocValuesSkipper(String field) throws IOException {
ensureOpen();
FieldInfo fi = getFieldInfos().fieldInfo(field);
if (fi == null || fi.hasDocValuesSkipIndex() == false) {
if (fi == null || fi.docValuesSkipIndexType() == DocValuesSkipIndexType.NONE) {
return null;
}
return getDocValuesReader().getSkipper(fi);

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
/** Options for skip indexes on doc values. */
public enum DocValuesSkipIndexType {
/** No skip index should be created. */
NONE {
@Override
boolean isCompatibleWith(DocValuesType dvType) {
return true;
}
},
/**
* Record range of values. This is suitable for {@link DocValuesType#NUMERIC}, {@link
* DocValuesType#SORTED_NUMERIC}, {@link DocValuesType#SORTED} and {@link
* DocValuesType#SORTED_SET} doc values, and will record the min/max values per range of doc IDs.
*/
RANGE {
@Override
boolean isCompatibleWith(DocValuesType dvType) {
return dvType == DocValuesType.NUMERIC
|| dvType == DocValuesType.SORTED_NUMERIC
|| dvType == DocValuesType.SORTED
|| dvType == DocValuesType.SORTED_SET;
}
};
// TODO: add support for pre-aggregated integer/float/double
abstract boolean isCompatibleWith(DocValuesType dvType);
}

View File

@ -22,37 +22,31 @@ package org.apache.lucene.index;
*/
public enum DocValuesType {
/** No doc values for this field. */
NONE(false),
NONE,
/** A per-document Number */
NUMERIC(true),
NUMERIC,
/**
* A per-document byte[]. Values may be larger than 32766 bytes, but different codecs may enforce
* their own limits.
*/
BINARY(false),
BINARY,
/**
* A pre-sorted byte[]. Fields with this type only store distinct byte values and store an
* additional offset pointer per document to dereference the shared byte[]. The stored byte[] is
* presorted and allows access via document id, ordinal and by-value. Values must be {@code <=
* 32766} bytes.
*/
SORTED(true),
SORTED,
/**
* A pre-sorted Number[]. Fields with this type store numeric values in sorted order according to
* {@link Long#compare(long, long)}.
*/
SORTED_NUMERIC(true),
SORTED_NUMERIC,
/**
* A pre-sorted Set&lt;byte[]&gt;. Fields with this type only store distinct byte values and store
* additional offset pointers per document to dereference the shared byte[]s. The stored byte[] is
* presorted and allows access via document id, ordinal and by-value. Values must be {@code <=
* 32766} bytes.
*/
SORTED_SET(true);
final boolean supportsSkipIndex; // pkg-private for use in FieldInfo
DocValuesType(boolean supportsSkipIndex) {
this.supportsSkipIndex = supportsSkipIndex;
}
SORTED_SET;
}

View File

@ -37,7 +37,7 @@ public final class FieldInfo {
private DocValuesType docValuesType = DocValuesType.NONE;
private final boolean docValuesSkipIndex;
private final DocValuesSkipIndexType docValuesSkipIndex;
// True if any document indexed term vectors
private boolean storeTermVector;
@ -83,7 +83,7 @@ public final class FieldInfo {
boolean storePayloads,
IndexOptions indexOptions,
DocValuesType docValues,
boolean hasDocValuesSkipIndex,
DocValuesSkipIndexType docValuesSkipIndex,
long dvGen,
Map<String, String> attributes,
int pointDimensionCount,
@ -99,7 +99,7 @@ public final class FieldInfo {
this.docValuesType =
Objects.requireNonNull(
docValues, "DocValuesType must not be null (field: \"" + name + "\")");
this.docValuesSkipIndex = hasDocValuesSkipIndex;
this.docValuesSkipIndex = docValuesSkipIndex;
this.indexOptions =
Objects.requireNonNull(
indexOptions, "IndexOptions must not be null (field: \"" + name + "\")");
@ -157,11 +157,13 @@ public final class FieldInfo {
if (docValuesType == null) {
throw new IllegalArgumentException("DocValuesType must not be null (field: '" + name + "')");
}
if (docValuesType.supportsSkipIndex == false && docValuesSkipIndex) {
if (docValuesSkipIndex.isCompatibleWith(docValuesType) == false) {
throw new IllegalArgumentException(
"field '"
+ name
+ "' cannot have docValuesSkipIndex set to true with doc values type "
+ "' cannot have docValuesSkipIndexType="
+ docValuesSkipIndex
+ " with doc values type "
+ docValuesType);
}
if (dvGen != -1 && docValuesType == DocValuesType.NONE) {
@ -308,14 +310,16 @@ public final class FieldInfo {
* @throws IllegalArgumentException if they are not the same
*/
static void verifySameDocValuesSkipIndex(
String fieldName, boolean hasDocValuesSkipIndex1, boolean hasDocValuesSkipIndex2) {
String fieldName,
DocValuesSkipIndexType hasDocValuesSkipIndex1,
DocValuesSkipIndexType hasDocValuesSkipIndex2) {
if (hasDocValuesSkipIndex1 != hasDocValuesSkipIndex2) {
throw new IllegalArgumentException(
"cannot change field \""
+ fieldName
+ "\" from docValuesSkipIndex="
+ "\" from docValuesSkipIndexType="
+ hasDocValuesSkipIndex1
+ " to inconsistent docValuesSkipIndex="
+ " to inconsistent docValuesSkipIndexType="
+ hasDocValuesSkipIndex2);
}
}
@ -589,7 +593,7 @@ public final class FieldInfo {
}
/** Returns true if, and only if, this field has a skip index. */
public boolean hasDocValuesSkipIndex() {
public DocValuesSkipIndexType docValuesSkipIndexType() {
return docValuesSkipIndex;
}

View File

@ -365,7 +365,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
IndexOptions indexOptions,
IndexOptionsProperties indexOptionsProperties,
DocValuesType docValuesType,
boolean docValuesSkipIndex,
DocValuesSkipIndexType docValuesSkipIndex,
FieldDimensions fieldDimensions,
FieldVectorProperties fieldVectorProperties) {}
@ -444,7 +444,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
? new IndexOptionsProperties(fi.hasTermVectors(), fi.omitsNorms())
: null,
fi.getDocValuesType(),
fi.hasDocValuesSkipIndex(),
fi.docValuesSkipIndexType(),
new FieldDimensions(
fi.getPointDimensionCount(),
fi.getPointIndexDimensionCount(),
@ -524,9 +524,9 @@ public class FieldInfos implements Iterable<FieldInfo> {
DocValuesType currentDVType = fieldProperties.docValuesType;
verifySameDocValuesType(fieldName, currentDVType, fi.getDocValuesType());
boolean currentDocValuesSkipIndex = fieldProperties.docValuesSkipIndex;
DocValuesSkipIndexType currentDocValuesSkipIndex = fieldProperties.docValuesSkipIndex;
verifySameDocValuesSkipIndex(
fieldName, currentDocValuesSkipIndex, fi.hasDocValuesSkipIndex());
fieldName, currentDocValuesSkipIndex, fi.docValuesSkipIndexType());
FieldDimensions dims = fieldProperties.fieldDimensions;
verifySamePointsOptions(
@ -582,7 +582,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
false,
IndexOptions.NONE,
dvType,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,
@ -609,8 +609,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
+ fieldDvType
+ "].");
}
boolean hasDocValuesSkipIndex = fieldProperties.docValuesSkipIndex;
if (hasDocValuesSkipIndex) {
DocValuesSkipIndexType hasDocValuesSkipIndex = fieldProperties.docValuesSkipIndex;
if (hasDocValuesSkipIndex != DocValuesSkipIndexType.NONE) {
throw new IllegalArgumentException(
"Can't update ["
+ dvType
@ -676,7 +676,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
false,
IndexOptions.NONE,
dvType,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,
@ -797,7 +797,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
fi.hasPayloads(),
fi.getIndexOptions(),
fi.getDocValuesType(),
fi.hasDocValuesSkipIndex(),
fi.docValuesSkipIndexType(),
dvGen,
// original attributes is UnmodifiableMap
new HashMap<>(fi.attributes()),

View File

@ -87,7 +87,7 @@ public interface IndexableFieldType {
DocValuesType docValuesType();
/** Whether a skip index for doc values should be created on this field. */
boolean hasDocValuesSkipIndex();
DocValuesSkipIndexType docValuesSkipIndexType();
/**
* If this is positive (representing the number of point dimensions), the field is indexed as a

View File

@ -680,7 +680,7 @@ final class IndexingChain implements Accountable {
false,
s.indexOptions,
s.docValuesType,
s.hasDocValuesSkipIndex,
s.docValuesSkipIndex,
-1,
s.attributes,
s.pointDimensionCount,
@ -832,12 +832,14 @@ final class IndexingChain implements Accountable {
verifyUnIndexedFieldType(fieldName, fieldType);
}
if (fieldType.docValuesType() != DocValuesType.NONE) {
schema.setDocValues(fieldType.docValuesType(), fieldType.hasDocValuesSkipIndex());
} else if (fieldType.hasDocValuesSkipIndex()) {
schema.setDocValues(fieldType.docValuesType(), fieldType.docValuesSkipIndexType());
} else if (fieldType.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
throw new IllegalArgumentException(
"field '"
+ schema.name
+ "' cannot have docValuesSkipIndex set to true without doc values");
+ "' cannot have docValuesSkipIndexType="
+ fieldType.docValuesSkipIndexType()
+ " without doc values");
}
if (fieldType.pointDimensionCount() != 0) {
schema.setPoints(
@ -1440,7 +1442,7 @@ final class IndexingChain implements Accountable {
private boolean storeTermVector = false;
private IndexOptions indexOptions = IndexOptions.NONE;
private DocValuesType docValuesType = DocValuesType.NONE;
private boolean hasDocValuesSkipIndex = false;
private DocValuesSkipIndexType docValuesSkipIndex = DocValuesSkipIndexType.NONE;
private int pointDimensionCount = 0;
private int pointIndexDimensionCount = 0;
private int pointNumBytes = 0;
@ -1506,13 +1508,14 @@ final class IndexingChain implements Accountable {
}
}
void setDocValues(DocValuesType newDocValuesType, boolean newHasDocValuesSkipIndex) {
void setDocValues(
DocValuesType newDocValuesType, DocValuesSkipIndexType newDocValuesSkipIndex) {
if (docValuesType == DocValuesType.NONE) {
this.docValuesType = newDocValuesType;
this.hasDocValuesSkipIndex = newHasDocValuesSkipIndex;
this.docValuesSkipIndex = newDocValuesSkipIndex;
} else {
assertSame("doc values type", docValuesType, newDocValuesType);
assertSame("doc values skip index", hasDocValuesSkipIndex, newHasDocValuesSkipIndex);
assertSame("doc values skip index type", docValuesSkipIndex, newDocValuesSkipIndex);
}
}
@ -1560,7 +1563,7 @@ final class IndexingChain implements Accountable {
assertSame("omit norms", fi.omitsNorms(), omitNorms);
assertSame("store term vector", fi.hasTermVectors(), storeTermVector);
assertSame("doc values type", fi.getDocValuesType(), docValuesType);
assertSame("doc values skip index", fi.hasDocValuesSkipIndex(), hasDocValuesSkipIndex);
assertSame("doc values skip index type", fi.docValuesSkipIndexType(), docValuesSkipIndex);
assertSame(
"vector similarity function", fi.getVectorSimilarityFunction(), vectorSimilarityFunction);
assertSame("vector encoding", fi.getVectorEncoding(), vectorEncoding);

View File

@ -713,7 +713,7 @@ final class ReadersAndUpdates {
fi.hasPayloads(),
fi.getIndexOptions(),
fi.getDocValuesType(),
fi.hasDocValuesSkipIndex(),
fi.docValuesSkipIndexType(),
fi.getDocValuesGen(),
new HashMap<>(fi.attributes()),
fi.getPointDimensionCount(),

View File

@ -106,7 +106,7 @@ public class TestCodecs extends LuceneTestCase {
storePayloads,
indexOptions,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,

View File

@ -250,7 +250,7 @@ public class TestFieldInfos extends LuceneTestCase {
false,
IndexOptions.NONE,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,
@ -272,7 +272,7 @@ public class TestFieldInfos extends LuceneTestCase {
false,
IndexOptions.NONE,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,
@ -296,7 +296,7 @@ public class TestFieldInfos extends LuceneTestCase {
false,
IndexOptions.NONE,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,

View File

@ -58,7 +58,7 @@ public class TestFieldsReader extends LuceneTestCase {
false,
ift.indexOptions(),
ift.docValuesType(),
ift.hasDocValuesSkipIndex(),
ift.docValuesSkipIndexType(),
-1,
new HashMap<>(),
0,

View File

@ -4992,8 +4992,9 @@ public class TestIndexWriter extends LuceneTestCase {
doc2.add(new SortedNumericDocValuesField("test", random().nextLong()));
IllegalArgumentException ex =
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc2));
ex.printStackTrace();
assertEquals(
"Inconsistency of field data structures across documents for field [test] of doc [1]. doc values skip index: expected 'true', but it has 'false'.",
"Inconsistency of field data structures across documents for field [test] of doc [1]. doc values skip index type: expected 'RANGE', but it has 'NONE'.",
ex.getMessage());
}
}
@ -5009,7 +5010,7 @@ public class TestIndexWriter extends LuceneTestCase {
IllegalArgumentException ex =
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc2));
assertEquals(
"Inconsistency of field data structures across documents for field [test] of doc [1]. doc values skip index: expected 'false', but it has 'true'.",
"Inconsistency of field data structures across documents for field [test] of doc [1]. doc values skip index type: expected 'NONE', but it has 'RANGE'.",
ex.getMessage());
}
}
@ -5021,7 +5022,7 @@ public class TestIndexWriter extends LuceneTestCase {
FieldType fieldType = new FieldType();
fieldType.setStored(true);
fieldType.setDocValuesType(docValuesType);
fieldType.setDocValuesSkipIndex(true);
fieldType.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
fieldType.freeze();
try (Directory dir = newMockDirectory()) {
try (IndexWriter writer =
@ -5031,8 +5032,7 @@ public class TestIndexWriter extends LuceneTestCase {
IllegalArgumentException ex =
expectThrows(IllegalArgumentException.class, () -> writer.addDocument(doc1));
assertTrue(
ex.getMessage()
.startsWith("field 'test' cannot have docValuesSkipIndex set to true"));
ex.getMessage().startsWith("field 'test' cannot have docValuesSkipIndexType=RANGE"));
}
}
}

View File

@ -96,8 +96,8 @@ public class TestIndexableField extends LuceneTestCase {
}
@Override
public boolean hasDocValuesSkipIndex() {
return false;
public DocValuesSkipIndexType docValuesSkipIndexType() {
return DocValuesSkipIndexType.NONE;
}
@Override

View File

@ -191,7 +191,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
DocValuesSkipIndexType.NONE,
0,
Collections.emptyMap(),
0,
@ -231,7 +231,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
DocValuesSkipIndexType.NONE,
1,
Collections.emptyMap(),
0,
@ -297,7 +297,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
DocValuesSkipIndexType.NONE,
segmentInfo.getNextDocValuesGen(),
Collections.emptyMap(),
0,
@ -368,7 +368,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
DocValuesSkipIndexType.NONE,
segmentInfo.getNextDocValuesGen(),
Collections.emptyMap(),
0,
@ -407,7 +407,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
false,
IndexOptions.NONE,
DocValuesType.NUMERIC,
false,
DocValuesSkipIndexType.NONE,
segmentInfo.getNextDocValuesGen(),
Collections.emptyMap(),
0,

View File

@ -1313,7 +1313,7 @@ public class TestSortOptimization extends LuceneTestCase {
false,
IndexOptions.NONE,
fi.getDocValuesType(),
fi.hasDocValuesSkipIndex(),
fi.docValuesSkipIndexType(),
fi.getDocValuesGen(),
fi.attributes(),
0,

View File

@ -21,6 +21,7 @@ import java.util.Collections;
import java.util.Iterator;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -96,7 +97,7 @@ public class TermVectorLeafReader extends LeafReader {
terms.hasPayloads(),
indexOptions,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
Collections.emptyMap(),
0,

View File

@ -736,7 +736,7 @@ public class MemoryIndex {
storePayloads,
indexOptions,
fieldType.docValuesType(),
false,
fieldType.docValuesSkipIndexType(),
-1,
Collections.emptyMap(),
fieldType.pointDimensionCount(),
@ -841,7 +841,7 @@ public class MemoryIndex {
info.fieldInfo.hasPayloads(),
info.fieldInfo.getIndexOptions(),
docValuesType,
false,
DocValuesSkipIndexType.NONE,
-1,
info.fieldInfo.attributes(),
info.fieldInfo.getPointDimensionCount(),

View File

@ -23,6 +23,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -283,7 +284,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
assert field.hasDocValuesSkipIndex();
assert field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE;
DocValuesSkipper skipper = in.getSkipper(field);
assert skipper != null;
return new AssertingLeafReader.AssertingDocValuesSkipper(skipper);

View File

@ -23,6 +23,7 @@ import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -1625,10 +1626,10 @@ public class AssertingLeafReader extends FilterLeafReader {
DocValuesSkipper skipper = super.getDocValuesSkipper(field);
FieldInfo fi = getFieldInfos().fieldInfo(field);
if (skipper != null) {
assert fi.hasDocValuesSkipIndex();
assert fi.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE;
return new AssertingDocValuesSkipper(skipper);
} else {
assert fi == null || fi.hasDocValuesSkipIndex() == false;
assert fi == null || fi.docValuesSkipIndexType() == DocValuesSkipIndexType.NONE;
return null;
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -303,14 +304,14 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
storePayloads = random().nextBoolean();
}
}
boolean hasDocValuesSkipIndex = false;
DocValuesSkipIndexType docValuesSkipIndexType = DocValuesSkipIndexType.NONE;
if (EnumSet.of(
DocValuesType.NUMERIC,
DocValuesType.SORTED,
DocValuesType.SORTED_NUMERIC,
DocValuesType.SORTED_SET)
.contains(fieldType.docValuesType())) {
hasDocValuesSkipIndex = fieldType.hasDocValuesSkipIndex();
docValuesSkipIndexType = fieldType.docValuesSkipIndexType();
}
FieldInfo fi =
new FieldInfo(
@ -321,7 +322,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
storePayloads,
fieldType.indexOptions(),
fieldType.docValuesType(),
hasDocValuesSkipIndex,
docValuesSkipIndexType,
-1,
new HashMap<>(),
fieldType.pointDimensionCount(),
@ -374,7 +375,10 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
|| current == DocValuesType.SORTED_NUMERIC
|| current == DocValuesType.SORTED
|| current == DocValuesType.SORTED_SET) {
type.setDocValuesSkipIndex(supportDocValuesSkipIndex() && random().nextBoolean());
type.setDocValuesSkipIndexType(
supportDocValuesSkipIndex()
? DocValuesSkipIndexType.RANGE
: DocValuesSkipIndexType.NONE);
}
}
@ -414,7 +418,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
assertEquals(expected.number, actual.number);
assertEquals(expected.name, actual.name);
assertEquals(expected.getDocValuesType(), actual.getDocValuesType());
assertEquals(expected.hasDocValuesSkipIndex(), actual.hasDocValuesSkipIndex());
assertEquals(expected.docValuesSkipIndexType(), actual.docValuesSkipIndexType());
assertEquals(expected.getIndexOptions(), actual.getIndexOptions());
assertEquals(expected.hasNorms(), actual.hasNorms());
assertEquals(expected.hasPayloads(), actual.hasPayloads());
@ -455,7 +459,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
false,
TextField.TYPE_STORED.indexOptions(),
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,

View File

@ -357,7 +357,7 @@ public abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
proto.hasPayloads(),
proto.getIndexOptions(),
proto.getDocValuesType(),
proto.hasDocValuesSkipIndex(),
proto.docValuesSkipIndexType(),
proto.getDocValuesGen(),
new HashMap<>(),
proto.getPointDimensionCount(),

View File

@ -46,6 +46,7 @@ import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -377,7 +378,7 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
false,
IndexOptions.NONE,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
Map.of(),
0,

View File

@ -103,7 +103,7 @@ public class MismatchedLeafReader extends FilterLeafReader {
oldInfo.hasPayloads(), // storePayloads
oldInfo.getIndexOptions(), // indexOptions
oldInfo.getDocValuesType(), // docValuesType
oldInfo.hasDocValuesSkipIndex(), // hasDocValuesSkipIndex
oldInfo.docValuesSkipIndexType(), // docValuesSkipIndexType
oldInfo.getDocValuesGen(), // dvGen
oldInfo.attributes(), // attributes
oldInfo.getPointDimensionCount(), // data dimension count

View File

@ -45,6 +45,7 @@ import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
@ -157,7 +158,7 @@ public class RandomPostingsTester {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,
@ -732,7 +733,7 @@ public class RandomPostingsTester {
doPayloads,
indexOptions,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
new HashMap<>(),
0,