Allow reading / writing binary stored fields as DataInput (#12581)

This commit adds the possibility to read / write binary stored values using a DataInput and the number of bytes. By default the implementations will allocate those bytes in a newly created byte array and call the already existing method.
This commit is contained in:
Ignacio Vera 2023-09-25 11:09:32 +02:00 committed by GitHub
parent 8b84f6c096
commit d48913a957
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 65 additions and 6 deletions

View File

@ -137,6 +137,11 @@ API Changes
* GITHUB#12578: Deprecate IndexSearcher#getExecutor in favour of executing concurrent tasks using
the TaskExecutor that the searcher holds, retrieved via IndexSearcher#getTaskExecutor (Luca Cavanna)
* GITHUB#12556: StoredFieldVisitor has a new expert method StoredFieldVisitor#binaryField(FieldInfo, DataInput, int)
that allows implementors to read binary values directly from the DataInput without having to allocate a byte[].
The default implementation allocates an ew byte array and call StoredFieldVisitor#binaryField(FieldInfo, byte[]).
(Ignacio Vera)
New Features
---------------------
(No changes)

View File

@ -285,9 +285,7 @@ public final class Lucene50CompressingStoredFieldsReader extends StoredFieldsRea
switch (bits & TYPE_MASK) {
case BYTE_ARR:
int length = in.readVInt();
byte[] data = new byte[length];
in.readBytes(data, 0, length);
visitor.binaryField(info, data);
visitor.binaryField(info, in, length);
break;
case STRING:
visitor.stringField(info, in.readString());

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
@ -72,6 +73,13 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
/** Writes a stored double value. */
public abstract void writeField(FieldInfo info, double value) throws IOException;
/** Writes a stored binary value from a {@link DataInput} and a {@code length}. */
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
final byte[] bytes = new byte[length];
value.readBytes(bytes, 0, length);
writeField(info, new BytesRef(bytes, 0, length));
}
/** Writes a stored binary value. */
public abstract void writeField(FieldInfo info, BytesRef value) throws IOException;
@ -182,6 +190,11 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
}
}
@Override
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
writeField(remap(fieldInfo), value, length);
}
@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
// TODO: can we avoid new BR here?

View File

@ -240,9 +240,7 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
switch (bits & TYPE_MASK) {
case BYTE_ARR:
int length = in.readVInt();
byte[] data = new byte[length];
in.readBytes(data, 0, length);
visitor.binaryField(info, data);
visitor.binaryField(info, in, length);
break;
case STRING:
visitor.stringField(info, in.readString());

View File

@ -36,6 +36,7 @@ import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@ -306,6 +307,15 @@ public final class Lucene90CompressingStoredFieldsWriter extends StoredFieldsWri
bufferedDocs.writeBytes(value.bytes, value.offset, value.length);
}
@Override
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
++numStoredFieldsInDoc;
final long infoAndBits = (((long) info.number) << TYPE_BITS) | BYTE_ARR;
bufferedDocs.writeVLong(infoAndBits);
bufferedDocs.writeVInt(length);
bufferedDocs.copyBytes(value, length);
}
@Override
public void writeField(FieldInfo info, String value) throws IOException {
++numStoredFieldsInDoc;

View File

@ -139,6 +139,11 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
this.writer = writer;
}
@Override
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
writer.writeField(fieldInfo, value, length);
}
@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
// TODO: can we avoid new BR here?

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.store.DataInput;
/**
* Expert: provides a low-level means of accessing the stored field values in an index. See {@link
@ -39,6 +40,19 @@ public abstract class StoredFieldVisitor {
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
protected StoredFieldVisitor() {}
/**
* Expert: Process a binary field directly from the {@link DataInput}. Implementors of this method
* must read {@code length} bytes from the given {@link DataInput}. The default implementation
* reads all byes in a newly created byte array and calls {@link #binaryField(FieldInfo, byte[])}.
*
* @param value newly allocated byte array with the binary contents.
*/
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
final byte[] data = new byte[length];
value.readBytes(data, 0, length);
binaryField(fieldInfo, data);
}
/**
* Process a binary field.
*

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.tests.util.TestUtil;
@ -159,6 +160,12 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat {
in.writeField(info, value);
}
@Override
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
assert docStatus == Status.STARTED;
in.writeField(info, value, length);
}
@Override
public void writeField(FieldInfo info, String value) throws IOException {
assert docStatus == Status.STARTED;

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Accountable;
@ -147,6 +148,14 @@ class CrankyStoredFieldsFormat extends StoredFieldsFormat {
delegate.writeField(info, value);
}
@Override
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
if (random.nextInt(10000) == 0) {
throw new IOException("Fake IOException from StoredFieldsWriter.writeField()");
}
delegate.writeField(info, value, length);
}
@Override
public void writeField(FieldInfo info, String value) throws IOException {
if (random.nextInt(10000) == 0) {