mirror of https://github.com/apache/lucene.git
LUCENE-5583: Add DataInput.skipBytes, ChecksumIndexInput can now seek forward.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1586231 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f954405782
commit
60be1d46bb
|
@ -163,6 +163,9 @@ New Features
|
||||||
* LUCENE-5580: Checksums are automatically verified on the default stored
|
* LUCENE-5580: Checksums are automatically verified on the default stored
|
||||||
fields format when performing a bulk merge. (Adrien Grand)
|
fields format when performing a bulk merge. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-5583: Added DataInput.skipBytes. ChecksumIndexInput can now seek, but
|
||||||
|
only forward. (Adrien Grand, Mike McCandless, Simon Willnauer, Uwe Schindler)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
|
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
|
||||||
|
|
|
@ -264,13 +264,7 @@ public final class CodecUtil {
|
||||||
clone.seek(0);
|
clone.seek(0);
|
||||||
ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
|
ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
|
||||||
assert in.getFilePointer() == 0;
|
assert in.getFilePointer() == 0;
|
||||||
final byte[] buffer = new byte[1024];
|
in.seek(in.length() - footerLength());
|
||||||
long bytesToRead = in.length() - footerLength();
|
|
||||||
for (long skipped = 0; skipped < bytesToRead; ) {
|
|
||||||
final int toRead = (int) Math.min(bytesToRead - skipped, buffer.length);
|
|
||||||
in.readBytes(buffer, 0, toRead);
|
|
||||||
skipped += toRead;
|
|
||||||
}
|
|
||||||
return checkFooter(in);
|
return checkFooter(in);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,18 +70,6 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
// Do not reuse the decompression buffer when there is more than 32kb to decompress
|
// Do not reuse the decompression buffer when there is more than 32kb to decompress
|
||||||
private static final int BUFFER_REUSE_THRESHOLD = 1 << 15;
|
private static final int BUFFER_REUSE_THRESHOLD = 1 << 15;
|
||||||
|
|
||||||
private static final byte[] SKIP_BUFFER = new byte[1024];
|
|
||||||
|
|
||||||
// TODO: should this be a method on DataInput?
|
|
||||||
private static void skipBytes(DataInput in, long numBytes) throws IOException {
|
|
||||||
assert numBytes >= 0;
|
|
||||||
for (long skipped = 0; skipped < numBytes; ) {
|
|
||||||
final int toRead = (int) Math.min(numBytes - skipped, SKIP_BUFFER.length);
|
|
||||||
in.readBytes(SKIP_BUFFER, 0, toRead);
|
|
||||||
skipped += toRead;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final int version;
|
private final int version;
|
||||||
private final FieldInfos fieldInfos;
|
private final FieldInfos fieldInfos;
|
||||||
private final CompressingStoredFieldsIndexReader indexReader;
|
private final CompressingStoredFieldsIndexReader indexReader;
|
||||||
|
@ -223,7 +211,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
case BYTE_ARR:
|
case BYTE_ARR:
|
||||||
case STRING:
|
case STRING:
|
||||||
final int length = in.readVInt();
|
final int length = in.readVInt();
|
||||||
skipBytes(in, length);
|
in.skipBytes(length);
|
||||||
break;
|
break;
|
||||||
case NUMERIC_INT:
|
case NUMERIC_INT:
|
||||||
case NUMERIC_FLOAT:
|
case NUMERIC_FLOAT:
|
||||||
|
@ -416,24 +404,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
|
|
||||||
IndexInput in = CompressingStoredFieldsReader.this.fieldsStream;
|
IndexInput in = CompressingStoredFieldsReader.this.fieldsStream;
|
||||||
in.seek(0);
|
in.seek(0);
|
||||||
fieldsStream = new BufferedChecksumIndexInput(in) {
|
fieldsStream = new BufferedChecksumIndexInput(in);
|
||||||
|
|
||||||
final byte[] skipBuffer = new byte[256];
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void seek(long target) throws IOException {
|
|
||||||
final long skip = target - getFilePointer();
|
|
||||||
if (skip < 0) {
|
|
||||||
throw new IllegalStateException("Seeking backward on merge: " + skip);
|
|
||||||
}
|
|
||||||
for (long skipped = 0; skipped < skip; ) {
|
|
||||||
final int step = (int) Math.min(skipBuffer.length, skip - skipped);
|
|
||||||
readBytes(skipBuffer, 0, step);
|
|
||||||
skipped += step;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
fieldsStream.seek(indexReader.getStartPointer(startDocId));
|
fieldsStream.seek(indexReader.getStartPointer(startDocId));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -75,7 +75,8 @@ public final class ByteArrayDataInput extends DataInput {
|
||||||
return pos == limit;
|
return pos == limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void skipBytes(int count) {
|
@Override
|
||||||
|
public void skipBytes(long count) {
|
||||||
pos += count;
|
pos += count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,8 +35,19 @@ public abstract class ChecksumIndexInput extends IndexInput {
|
||||||
/** Returns the current checksum value */
|
/** Returns the current checksum value */
|
||||||
public abstract long getChecksum() throws IOException;
|
public abstract long getChecksum() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*
|
||||||
|
* {@link ChecksumIndexInput} can only seek forward and seeks are expensive
|
||||||
|
* since they imply to read bytes in-between the current position and the
|
||||||
|
* target position in order to update the checksum.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void seek(long pos) throws IOException {
|
public void seek(long pos) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
final long skip = pos - getFilePointer();
|
||||||
|
if (skip < 0) {
|
||||||
|
throw new IllegalStateException(ChecksumIndexInput.class + " cannot seed backward");
|
||||||
|
}
|
||||||
|
skipBytes(skip);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,19 @@ import java.util.Set;
|
||||||
* resource, but positioned independently.
|
* resource, but positioned independently.
|
||||||
*/
|
*/
|
||||||
public abstract class DataInput implements Cloneable {
|
public abstract class DataInput implements Cloneable {
|
||||||
|
|
||||||
|
private static final int SKIP_BUFFER_SIZE = 1024;
|
||||||
|
|
||||||
|
/* This buffer is used to skip over bytes with the default implementation of
|
||||||
|
* skipBytes. The reason why we need to use an instance member instead of
|
||||||
|
* sharing a single instance across threads is that some delegating
|
||||||
|
* implementations of DataInput might want to reuse the provided buffer in
|
||||||
|
* order to eg. update the checksum. If we shared the same buffer across
|
||||||
|
* threads, then another thread might update the buffer while the checksum is
|
||||||
|
* being computed, making it invalid. See LUCENE-5583 for more information.
|
||||||
|
*/
|
||||||
|
private byte[] skipBuffer;
|
||||||
|
|
||||||
/** Reads and returns a single byte.
|
/** Reads and returns a single byte.
|
||||||
* @see DataOutput#writeByte(byte)
|
* @see DataOutput#writeByte(byte)
|
||||||
*/
|
*/
|
||||||
|
@ -233,4 +246,26 @@ public abstract class DataInput implements Cloneable {
|
||||||
|
|
||||||
return set;
|
return set;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Skip over <code>numBytes</code> bytes. The contract on this method is that it
|
||||||
|
* should have the same behavior as reading the same number of bytes into a
|
||||||
|
* buffer and discarding its content. Negative values of <code>numBytes</code>
|
||||||
|
* are not supported.
|
||||||
|
*/
|
||||||
|
public void skipBytes(final long numBytes) throws IOException {
|
||||||
|
if (numBytes < 0) {
|
||||||
|
throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes);
|
||||||
|
}
|
||||||
|
if (skipBuffer == null) {
|
||||||
|
skipBuffer = new byte[SKIP_BUFFER_SIZE];
|
||||||
|
}
|
||||||
|
assert skipBuffer.length == SKIP_BUFFER_SIZE;
|
||||||
|
for (long skipped = 0; skipped < numBytes; ) {
|
||||||
|
final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped);
|
||||||
|
readBytes(skipBuffer, 0, step, false);
|
||||||
|
skipped += step;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue