mirror of https://github.com/apache/lucene.git
Allow reading / writing binary stored fields as DataInput (#12581)
This commit adds the possibility to read / write binary stored values using a DataInput and the number of bytes. By default the implementations will allocate those bytes in a newly created byte array and call the already existing method.
This commit is contained in:
parent
8b84f6c096
commit
d48913a957
|
@ -137,6 +137,11 @@ API Changes
|
||||||
* GITHUB#12578: Deprecate IndexSearcher#getExecutor in favour of executing concurrent tasks using
|
* GITHUB#12578: Deprecate IndexSearcher#getExecutor in favour of executing concurrent tasks using
|
||||||
the TaskExecutor that the searcher holds, retrieved via IndexSearcher#getTaskExecutor (Luca Cavanna)
|
the TaskExecutor that the searcher holds, retrieved via IndexSearcher#getTaskExecutor (Luca Cavanna)
|
||||||
|
|
||||||
|
* GITHUB#12556: StoredFieldVisitor has a new expert method StoredFieldVisitor#binaryField(FieldInfo, DataInput, int)
|
||||||
|
that allows implementors to read binary values directly from the DataInput without having to allocate a byte[].
|
||||||
|
The default implementation allocates an ew byte array and call StoredFieldVisitor#binaryField(FieldInfo, byte[]).
|
||||||
|
(Ignacio Vera)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
(No changes)
|
||||||
|
|
|
@ -285,9 +285,7 @@ public final class Lucene50CompressingStoredFieldsReader extends StoredFieldsRea
|
||||||
switch (bits & TYPE_MASK) {
|
switch (bits & TYPE_MASK) {
|
||||||
case BYTE_ARR:
|
case BYTE_ARR:
|
||||||
int length = in.readVInt();
|
int length = in.readVInt();
|
||||||
byte[] data = new byte[length];
|
visitor.binaryField(info, in, length);
|
||||||
in.readBytes(data, 0, length);
|
|
||||||
visitor.binaryField(info, data);
|
|
||||||
break;
|
break;
|
||||||
case STRING:
|
case STRING:
|
||||||
visitor.stringField(info, in.readString());
|
visitor.stringField(info, in.readString());
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.MergeState;
|
import org.apache.lucene.index.MergeState;
|
||||||
import org.apache.lucene.index.StoredFieldVisitor;
|
import org.apache.lucene.index.StoredFieldVisitor;
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
@ -72,6 +73,13 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
|
||||||
/** Writes a stored double value. */
|
/** Writes a stored double value. */
|
||||||
public abstract void writeField(FieldInfo info, double value) throws IOException;
|
public abstract void writeField(FieldInfo info, double value) throws IOException;
|
||||||
|
|
||||||
|
/** Writes a stored binary value from a {@link DataInput} and a {@code length}. */
|
||||||
|
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
|
||||||
|
final byte[] bytes = new byte[length];
|
||||||
|
value.readBytes(bytes, 0, length);
|
||||||
|
writeField(info, new BytesRef(bytes, 0, length));
|
||||||
|
}
|
||||||
|
|
||||||
/** Writes a stored binary value. */
|
/** Writes a stored binary value. */
|
||||||
public abstract void writeField(FieldInfo info, BytesRef value) throws IOException;
|
public abstract void writeField(FieldInfo info, BytesRef value) throws IOException;
|
||||||
|
|
||||||
|
@ -182,6 +190,11 @@ public abstract class StoredFieldsWriter implements Closeable, Accountable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
|
||||||
|
writeField(remap(fieldInfo), value, length);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||||
// TODO: can we avoid new BR here?
|
// TODO: can we avoid new BR here?
|
||||||
|
|
|
@ -240,9 +240,7 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
|
||||||
switch (bits & TYPE_MASK) {
|
switch (bits & TYPE_MASK) {
|
||||||
case BYTE_ARR:
|
case BYTE_ARR:
|
||||||
int length = in.readVInt();
|
int length = in.readVInt();
|
||||||
byte[] data = new byte[length];
|
visitor.binaryField(info, in, length);
|
||||||
in.readBytes(data, 0, length);
|
|
||||||
visitor.binaryField(info, data);
|
|
||||||
break;
|
break;
|
||||||
case STRING:
|
case STRING:
|
||||||
visitor.stringField(info, in.readString());
|
visitor.stringField(info, in.readString());
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.index.MergeState;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.store.ByteBuffersDataInput;
|
import org.apache.lucene.store.ByteBuffersDataInput;
|
||||||
import org.apache.lucene.store.ByteBuffersDataOutput;
|
import org.apache.lucene.store.ByteBuffersDataOutput;
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
@ -306,6 +307,15 @@ public final class Lucene90CompressingStoredFieldsWriter extends StoredFieldsWri
|
||||||
bufferedDocs.writeBytes(value.bytes, value.offset, value.length);
|
bufferedDocs.writeBytes(value.bytes, value.offset, value.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
|
||||||
|
++numStoredFieldsInDoc;
|
||||||
|
final long infoAndBits = (((long) info.number) << TYPE_BITS) | BYTE_ARR;
|
||||||
|
bufferedDocs.writeVLong(infoAndBits);
|
||||||
|
bufferedDocs.writeVInt(length);
|
||||||
|
bufferedDocs.copyBytes(value, length);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeField(FieldInfo info, String value) throws IOException {
|
public void writeField(FieldInfo info, String value) throws IOException {
|
||||||
++numStoredFieldsInDoc;
|
++numStoredFieldsInDoc;
|
||||||
|
|
|
@ -139,6 +139,11 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
|
||||||
|
writer.writeField(fieldInfo, value, length);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||||
// TODO: can we avoid new BR here?
|
// TODO: can we avoid new BR here?
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
import org.apache.lucene.document.DocumentStoredFieldVisitor;
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: provides a low-level means of accessing the stored field values in an index. See {@link
|
* Expert: provides a low-level means of accessing the stored field values in an index. See {@link
|
||||||
|
@ -39,6 +40,19 @@ public abstract class StoredFieldVisitor {
|
||||||
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
|
/** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
|
||||||
protected StoredFieldVisitor() {}
|
protected StoredFieldVisitor() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: Process a binary field directly from the {@link DataInput}. Implementors of this method
|
||||||
|
* must read {@code length} bytes from the given {@link DataInput}. The default implementation
|
||||||
|
* reads all byes in a newly created byte array and calls {@link #binaryField(FieldInfo, byte[])}.
|
||||||
|
*
|
||||||
|
* @param value newly allocated byte array with the binary contents.
|
||||||
|
*/
|
||||||
|
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
|
||||||
|
final byte[] data = new byte[length];
|
||||||
|
value.readBytes(data, 0, length);
|
||||||
|
binaryField(fieldInfo, data);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process a binary field.
|
* Process a binary field.
|
||||||
*
|
*
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.StoredFieldVisitor;
|
import org.apache.lucene.index.StoredFieldVisitor;
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.tests.util.TestUtil;
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
|
@ -159,6 +160,12 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat {
|
||||||
in.writeField(info, value);
|
in.writeField(info, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
|
||||||
|
assert docStatus == Status.STARTED;
|
||||||
|
in.writeField(info, value, length);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeField(FieldInfo info, String value) throws IOException {
|
public void writeField(FieldInfo info, String value) throws IOException {
|
||||||
assert docStatus == Status.STARTED;
|
assert docStatus == Status.STARTED;
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.MergeState;
|
import org.apache.lucene.index.MergeState;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
@ -147,6 +148,14 @@ class CrankyStoredFieldsFormat extends StoredFieldsFormat {
|
||||||
delegate.writeField(info, value);
|
delegate.writeField(info, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
|
||||||
|
if (random.nextInt(10000) == 0) {
|
||||||
|
throw new IOException("Fake IOException from StoredFieldsWriter.writeField()");
|
||||||
|
}
|
||||||
|
delegate.writeField(info, value, length);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeField(FieldInfo info, String value) throws IOException {
|
public void writeField(FieldInfo info, String value) throws IOException {
|
||||||
if (random.nextInt(10000) == 0) {
|
if (random.nextInt(10000) == 0) {
|
||||||
|
|
Loading…
Reference in New Issue