From d48913a957392e2746b489fe5aef77a21250e4b4 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Mon, 25 Sep 2023 11:09:32 +0200 Subject: [PATCH] Allow reading / writing binary stored fields as DataInput (#12581) This commit adds the possibility to read / write binary stored values using a DataInput and the number of bytes. By default the implementations will allocate those bytes in a newly created byte array and call the already existing method. --- lucene/CHANGES.txt | 5 +++++ .../Lucene50CompressingStoredFieldsReader.java | 4 +--- .../apache/lucene/codecs/StoredFieldsWriter.java | 13 +++++++++++++ .../Lucene90CompressingStoredFieldsReader.java | 4 +--- .../Lucene90CompressingStoredFieldsWriter.java | 10 ++++++++++ .../lucene/index/SortingStoredFieldsConsumer.java | 5 +++++ .../apache/lucene/index/StoredFieldVisitor.java | 14 ++++++++++++++ .../asserting/AssertingStoredFieldsFormat.java | 7 +++++++ .../codecs/cranky/CrankyStoredFieldsFormat.java | 9 +++++++++ 9 files changed, 65 insertions(+), 6 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f09161a19d6..a779f285d92 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -137,6 +137,11 @@ API Changes * GITHUB#12578: Deprecate IndexSearcher#getExecutor in favour of executing concurrent tasks using the TaskExecutor that the searcher holds, retrieved via IndexSearcher#getTaskExecutor (Luca Cavanna) +* GITHUB#12556: StoredFieldVisitor has a new expert method StoredFieldVisitor#binaryField(FieldInfo, DataInput, int) + that allows implementors to read binary values directly from the DataInput without having to allocate a byte[]. + The default implementation allocates an ew byte array and call StoredFieldVisitor#binaryField(FieldInfo, byte[]). + (Ignacio Vera) + New Features --------------------- (No changes) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java index a76b699cc72..a451874f907 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java @@ -285,9 +285,7 @@ public final class Lucene50CompressingStoredFieldsReader extends StoredFieldsRea switch (bits & TYPE_MASK) { case BYTE_ARR: int length = in.readVInt(); - byte[] data = new byte[length]; - in.readBytes(data, 0, length); - visitor.binaryField(info, data); + visitor.binaryField(info, in, length); break; case STRING: visitor.stringField(info, in.readString()); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java index 21fbf0c7002..6824b227f68 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.store.DataInput; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.BytesRef; @@ -72,6 +73,13 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable { /** Writes a stored double value. */ public abstract void writeField(FieldInfo info, double value) throws IOException; + /** Writes a stored binary value from a {@link DataInput} and a {@code length}. */ + public void writeField(FieldInfo info, DataInput value, int length) throws IOException { + final byte[] bytes = new byte[length]; + value.readBytes(bytes, 0, length); + writeField(info, new BytesRef(bytes, 0, length)); + } + /** Writes a stored binary value. */ public abstract void writeField(FieldInfo info, BytesRef value) throws IOException; @@ -182,6 +190,11 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable { } } + @Override + public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException { + writeField(remap(fieldInfo), value, length); + } + @Override public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { // TODO: can we avoid new BR here? diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java index 9bf70a6a227..4999af53196 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java @@ -240,9 +240,7 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea switch (bits & TYPE_MASK) { case BYTE_ARR: int length = in.readVInt(); - byte[] data = new byte[length]; - in.readBytes(data, 0, length); - visitor.binaryField(info, data); + visitor.binaryField(info, in, length); break; case STRING: visitor.stringField(info, in.readString()); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java index c797c6326f2..1b518868169 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java @@ -36,6 +36,7 @@ import org.apache.lucene.index.MergeState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.ByteBuffersDataOutput; +import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -306,6 +307,15 @@ public final class Lucene90CompressingStoredFieldsWriter extends StoredFieldsWri bufferedDocs.writeBytes(value.bytes, value.offset, value.length); } + @Override + public void writeField(FieldInfo info, DataInput value, int length) throws IOException { + ++numStoredFieldsInDoc; + final long infoAndBits = (((long) info.number) << TYPE_BITS) | BYTE_ARR; + bufferedDocs.writeVLong(infoAndBits); + bufferedDocs.writeVInt(length); + bufferedDocs.copyBytes(value, length); + } + @Override public void writeField(FieldInfo info, String value) throws IOException { ++numStoredFieldsInDoc; diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java index 55a87641e86..61bd680f2bd 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java @@ -139,6 +139,11 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer { this.writer = writer; } + @Override + public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException { + writer.writeField(fieldInfo, value, length); + } + @Override public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { // TODO: can we avoid new BR here? diff --git a/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java b/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java index 27637f3af0f..2457f392d11 100644 --- a/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java +++ b/lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.DocumentStoredFieldVisitor; +import org.apache.lucene.store.DataInput; /** * Expert: provides a low-level means of accessing the stored field values in an index. See {@link @@ -39,6 +40,19 @@ public abstract class StoredFieldVisitor { /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */ protected StoredFieldVisitor() {} + /** + * Expert: Process a binary field directly from the {@link DataInput}. Implementors of this method + * must read {@code length} bytes from the given {@link DataInput}. The default implementation + * reads all byes in a newly created byte array and calls {@link #binaryField(FieldInfo, byte[])}. + * + * @param value newly allocated byte array with the binary contents. + */ + public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException { + final byte[] data = new byte[length]; + value.readBytes(data, 0, length); + binaryField(fieldInfo, data); + } + /** * Process a binary field. * diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingStoredFieldsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingStoredFieldsFormat.java index 1b2f50f6730..d8d37718c11 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingStoredFieldsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingStoredFieldsFormat.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.store.DataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.tests.util.TestUtil; @@ -159,6 +160,12 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat { in.writeField(info, value); } + @Override + public void writeField(FieldInfo info, DataInput value, int length) throws IOException { + assert docStatus == Status.STARTED; + in.writeField(info, value, length); + } + @Override public void writeField(FieldInfo info, String value) throws IOException { assert docStatus == Status.STARTED; diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/cranky/CrankyStoredFieldsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/cranky/CrankyStoredFieldsFormat.java index 0bb54e2b899..25d1695f534 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/cranky/CrankyStoredFieldsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/cranky/CrankyStoredFieldsFormat.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.DataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Accountable; @@ -147,6 +148,14 @@ class CrankyStoredFieldsFormat extends StoredFieldsFormat { delegate.writeField(info, value); } + @Override + public void writeField(FieldInfo info, DataInput value, int length) throws IOException { + if (random.nextInt(10000) == 0) { + throw new IOException("Fake IOException from StoredFieldsWriter.writeField()"); + } + delegate.writeField(info, value, length); + } + @Override public void writeField(FieldInfo info, String value) throws IOException { if (random.nextInt(10000) == 0) {