LUCENE-9675: Binary doc values fields now expose their configured compression mode in the attributes of the field info.

This commit is contained in:
jimczi 2021-01-19 10:03:13 +01:00
parent 227256d951
commit 5af12b5f14
4 changed files with 31 additions and 21 deletions

View File

@ -265,6 +265,9 @@ Improvements
* LUCENE-9023: GlobalOrdinalsWithScore should not compute occurrences when the
provided min is 1. (Jim Ferenczi)
* LUCENE-9675: Binary doc values fields now expose their configured compression mode
in the attributes of the field info. (Jim Ferenczi)
Optimizations
---------------------

View File

@ -539,16 +539,15 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer implements Close
@Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
field.putAttribute(Lucene80DocValuesFormat.MODE_KEY, mode.name());
meta.writeInt(field.number);
meta.writeByte(Lucene80DocValuesFormat.BINARY);
switch (mode) {
case BEST_SPEED:
meta.writeByte((byte) 0);
doAddUncompressedBinaryField(field, valuesProducer);
break;
case BEST_COMPRESSION:
meta.writeByte((byte) 1);
doAddCompressedBinaryField(field, valuesProducer);
break;
default:

View File

@ -147,6 +147,9 @@ public final class Lucene80DocValuesFormat extends DocValuesFormat {
BEST_COMPRESSION
}
/** Attribute key for compression mode. */
public static final String MODE_KEY = Lucene80DocValuesFormat.class.getSimpleName() + ".mode";
private final Mode mode;
/** Default constructor. */

View File

@ -88,7 +88,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
state.segmentInfo.getId(),
state.segmentSuffix);
readFields(in, state.fieldInfos);
readFields(state.segmentInfo.name, in, state.fieldInfos);
} catch (Throwable exception) {
priorE = exception;
@ -129,7 +129,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
}
}
private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
private void readFields(String segmentName, IndexInput meta, FieldInfos infos)
throws IOException {
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
FieldInfo info = infos.fieldInfo(fieldNumber);
if (info == null) {
@ -139,7 +140,24 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
if (type == Lucene80DocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta));
} else if (type == Lucene80DocValuesFormat.BINARY) {
binaries.put(info.name, readBinary(meta));
final boolean compressed;
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
String value = info.getAttribute(Lucene80DocValuesFormat.MODE_KEY);
if (value == null) {
throw new IllegalStateException(
"missing value for "
+ Lucene80DocValuesFormat.MODE_KEY
+ " for field: "
+ info.name
+ " in segment: "
+ segmentName);
}
Lucene80DocValuesFormat.Mode mode = Lucene80DocValuesFormat.Mode.valueOf(value);
compressed = mode == Lucene80DocValuesFormat.Mode.BEST_COMPRESSION;
} else {
compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
}
binaries.put(info.name, readBinary(meta, compressed));
} else if (type == Lucene80DocValuesFormat.SORTED) {
sorted.put(info.name, readSorted(meta));
} else if (type == Lucene80DocValuesFormat.SORTED_SET) {
@ -188,22 +206,9 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
entry.valueJumpTableOffset = meta.readLong();
}
private BinaryEntry readBinary(IndexInput meta) throws IOException {
BinaryEntry entry = new BinaryEntry();
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
int b = meta.readByte();
switch (b) {
case 0:
case 1:
// valid
break;
default:
throw new CorruptIndexException("Unexpected byte: " + b + ", expected 0 or 1", meta);
}
entry.compressed = b != 0;
} else {
entry.compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
}
private BinaryEntry readBinary(IndexInput meta, boolean compressed) throws IOException {
final BinaryEntry entry = new BinaryEntry();
entry.compressed = compressed;
entry.dataOffset = meta.readLong();
entry.dataLength = meta.readLong();
entry.docsWithFieldOffset = meta.readLong();