LUCENE-9675: Binary doc values fields now expose their configured compression mode in the attributes of the field info.

This commit is contained in:
jimczi 2021-01-19 10:03:13 +01:00
parent 227256d951
commit 5af12b5f14
4 changed files with 31 additions and 21 deletions

View File

@ -265,6 +265,9 @@ Improvements
* LUCENE-9023: GlobalOrdinalsWithScore should not compute occurrences when the * LUCENE-9023: GlobalOrdinalsWithScore should not compute occurrences when the
provided min is 1. (Jim Ferenczi) provided min is 1. (Jim Ferenczi)
* LUCENE-9675: Binary doc values fields now expose their configured compression mode
in the attributes of the field info. (Jim Ferenczi)
Optimizations Optimizations
--------------------- ---------------------

View File

@ -539,16 +539,15 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer implements Close
@Override @Override
public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
field.putAttribute(Lucene80DocValuesFormat.MODE_KEY, mode.name());
meta.writeInt(field.number); meta.writeInt(field.number);
meta.writeByte(Lucene80DocValuesFormat.BINARY); meta.writeByte(Lucene80DocValuesFormat.BINARY);
switch (mode) { switch (mode) {
case BEST_SPEED: case BEST_SPEED:
meta.writeByte((byte) 0);
doAddUncompressedBinaryField(field, valuesProducer); doAddUncompressedBinaryField(field, valuesProducer);
break; break;
case BEST_COMPRESSION: case BEST_COMPRESSION:
meta.writeByte((byte) 1);
doAddCompressedBinaryField(field, valuesProducer); doAddCompressedBinaryField(field, valuesProducer);
break; break;
default: default:

View File

@ -147,6 +147,9 @@ public final class Lucene80DocValuesFormat extends DocValuesFormat {
BEST_COMPRESSION BEST_COMPRESSION
} }
/** Attribute key for compression mode. */
public static final String MODE_KEY = Lucene80DocValuesFormat.class.getSimpleName() + ".mode";
private final Mode mode; private final Mode mode;
/** Default constructor. */ /** Default constructor. */

View File

@ -88,7 +88,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
state.segmentInfo.getId(), state.segmentInfo.getId(),
state.segmentSuffix); state.segmentSuffix);
readFields(in, state.fieldInfos); readFields(state.segmentInfo.name, in, state.fieldInfos);
} catch (Throwable exception) { } catch (Throwable exception) {
priorE = exception; priorE = exception;
@ -129,7 +129,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
} }
} }
private void readFields(IndexInput meta, FieldInfos infos) throws IOException { private void readFields(String segmentName, IndexInput meta, FieldInfos infos)
throws IOException {
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
FieldInfo info = infos.fieldInfo(fieldNumber); FieldInfo info = infos.fieldInfo(fieldNumber);
if (info == null) { if (info == null) {
@ -139,7 +140,24 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
if (type == Lucene80DocValuesFormat.NUMERIC) { if (type == Lucene80DocValuesFormat.NUMERIC) {
numerics.put(info.name, readNumeric(meta)); numerics.put(info.name, readNumeric(meta));
} else if (type == Lucene80DocValuesFormat.BINARY) { } else if (type == Lucene80DocValuesFormat.BINARY) {
binaries.put(info.name, readBinary(meta)); final boolean compressed;
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
String value = info.getAttribute(Lucene80DocValuesFormat.MODE_KEY);
if (value == null) {
throw new IllegalStateException(
"missing value for "
+ Lucene80DocValuesFormat.MODE_KEY
+ " for field: "
+ info.name
+ " in segment: "
+ segmentName);
}
Lucene80DocValuesFormat.Mode mode = Lucene80DocValuesFormat.Mode.valueOf(value);
compressed = mode == Lucene80DocValuesFormat.Mode.BEST_COMPRESSION;
} else {
compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
}
binaries.put(info.name, readBinary(meta, compressed));
} else if (type == Lucene80DocValuesFormat.SORTED) { } else if (type == Lucene80DocValuesFormat.SORTED) {
sorted.put(info.name, readSorted(meta)); sorted.put(info.name, readSorted(meta));
} else if (type == Lucene80DocValuesFormat.SORTED_SET) { } else if (type == Lucene80DocValuesFormat.SORTED_SET) {
@ -188,22 +206,9 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
entry.valueJumpTableOffset = meta.readLong(); entry.valueJumpTableOffset = meta.readLong();
} }
private BinaryEntry readBinary(IndexInput meta) throws IOException { private BinaryEntry readBinary(IndexInput meta, boolean compressed) throws IOException {
BinaryEntry entry = new BinaryEntry(); final BinaryEntry entry = new BinaryEntry();
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) { entry.compressed = compressed;
int b = meta.readByte();
switch (b) {
case 0:
case 1:
// valid
break;
default:
throw new CorruptIndexException("Unexpected byte: " + b + ", expected 0 or 1", meta);
}
entry.compressed = b != 0;
} else {
entry.compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
}
entry.dataOffset = meta.readLong(); entry.dataOffset = meta.readLong();
entry.dataLength = meta.readLong(); entry.dataLength = meta.readLong();
entry.docsWithFieldOffset = meta.readLong(); entry.docsWithFieldOffset = meta.readLong();