From 5af12b5f149920a627f7bc125d67622c52942447 Mon Sep 17 00:00:00 2001 From: jimczi Date: Tue, 19 Jan 2021 10:03:13 +0100 Subject: [PATCH] LUCENE-9675: Binary doc values fields now expose their configured compression mode in the attributes of the field info. --- lucene/CHANGES.txt | 3 ++ .../lucene80/Lucene80DocValuesConsumer.java | 3 +- .../lucene80/Lucene80DocValuesFormat.java | 3 ++ .../lucene80/Lucene80DocValuesProducer.java | 43 +++++++++++-------- 4 files changed, 31 insertions(+), 21 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 71630e7002b..2bdd5d45e25 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -265,6 +265,9 @@ Improvements * LUCENE-9023: GlobalOrdinalsWithScore should not compute occurrences when the provided min is 1. (Jim Ferenczi) +* LUCENE-9675: Binary doc values fields now expose their configured compression mode + in the attributes of the field info. (Jim Ferenczi) + Optimizations --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java index 7cd8e88436b..761e2d9afb4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java @@ -539,16 +539,15 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer implements Close @Override public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + field.putAttribute(Lucene80DocValuesFormat.MODE_KEY, mode.name()); meta.writeInt(field.number); meta.writeByte(Lucene80DocValuesFormat.BINARY); switch (mode) { case BEST_SPEED: - meta.writeByte((byte) 0); doAddUncompressedBinaryField(field, valuesProducer); break; case BEST_COMPRESSION: - meta.writeByte((byte) 1); doAddCompressedBinaryField(field, valuesProducer); break; default: diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java index b120a72d329..daa41c547e3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesFormat.java @@ -147,6 +147,9 @@ public final class Lucene80DocValuesFormat extends DocValuesFormat { BEST_COMPRESSION } + /** Attribute key for compression mode. */ + public static final String MODE_KEY = Lucene80DocValuesFormat.class.getSimpleName() + ".mode"; + private final Mode mode; /** Default constructor. */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java index 6929deeadde..db579a0dc21 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java @@ -88,7 +88,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close state.segmentInfo.getId(), state.segmentSuffix); - readFields(in, state.fieldInfos); + readFields(state.segmentInfo.name, in, state.fieldInfos); } catch (Throwable exception) { priorE = exception; @@ -129,7 +129,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close } } - private void readFields(IndexInput meta, FieldInfos infos) throws IOException { + private void readFields(String segmentName, IndexInput meta, FieldInfos infos) + throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); if (info == null) { @@ -139,7 +140,24 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close if (type == Lucene80DocValuesFormat.NUMERIC) { numerics.put(info.name, readNumeric(meta)); } else if (type == Lucene80DocValuesFormat.BINARY) { - binaries.put(info.name, readBinary(meta)); + final boolean compressed; + if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) { + String value = info.getAttribute(Lucene80DocValuesFormat.MODE_KEY); + if (value == null) { + throw new IllegalStateException( + "missing value for " + + Lucene80DocValuesFormat.MODE_KEY + + " for field: " + + info.name + + " in segment: " + + segmentName); + } + Lucene80DocValuesFormat.Mode mode = Lucene80DocValuesFormat.Mode.valueOf(value); + compressed = mode == Lucene80DocValuesFormat.Mode.BEST_COMPRESSION; + } else { + compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED; + } + binaries.put(info.name, readBinary(meta, compressed)); } else if (type == Lucene80DocValuesFormat.SORTED) { sorted.put(info.name, readSorted(meta)); } else if (type == Lucene80DocValuesFormat.SORTED_SET) { @@ -188,22 +206,9 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close entry.valueJumpTableOffset = meta.readLong(); } - private BinaryEntry readBinary(IndexInput meta) throws IOException { - BinaryEntry entry = new BinaryEntry(); - if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) { - int b = meta.readByte(); - switch (b) { - case 0: - case 1: - // valid - break; - default: - throw new CorruptIndexException("Unexpected byte: " + b + ", expected 0 or 1", meta); - } - entry.compressed = b != 0; - } else { - entry.compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED; - } + private BinaryEntry readBinary(IndexInput meta, boolean compressed) throws IOException { + final BinaryEntry entry = new BinaryEntry(); + entry.compressed = compressed; entry.dataOffset = meta.readLong(); entry.dataLength = meta.readLong(); entry.docsWithFieldOffset = meta.readLong();