LUCENE-9480: Make DataInput.skipBytes(long) abstract

skipBytes() is a "relative" version of seek(), but DataInput previously
implemented it via read() calls, because DataInput's API does not
include absolute positioning methods (seek, getFilePointer).

This resulted in inefficiencies: calls to skipBytes() would cause
buffers to be allocated, bytes copied, etc.

Instead, make the subclass implement skipBytes() explicitly. The old
DataInput implementation is marked deprecated and renamed to skipBytesSlowly().

Some subclasses still implement skipBytes() via skipBytesSlowly(), to be
fixed in future improvements.
This commit is contained in:
Robert Muir 2021-02-20 12:11:32 -05:00
parent 2f0d191452
commit c51fee9c1a
No known key found for this signature in database
GPG Key ID: 817AE1DD322D7ECA
11 changed files with 235 additions and 2 deletions

View File

@ -89,6 +89,9 @@ API Changes
* LUCENE-9646: Set BM25Similarity discountOverlaps via the constructor (Patrick Marty via Bruno Roustant)
* LUCENE-9480: Make DataInput's skipBytes(long) abstract as the implementation was not performant.
IndexInput's api is unaffected: skipBytes() is implemented via seek(). (Greg Miller)
Improvements
* LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,

View File

@ -651,6 +651,11 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
bytes.offset += len;
bytes.length -= len;
}
@Override
public void skipBytes(long numBytes) throws IOException {
skipBytesSlowly(numBytes);
}
};
} else {
fieldsStream.seek(startPointer);

View File

@ -138,4 +138,9 @@ final class ByteSliceReader extends DataInput {
}
}
}
@Override
public void skipBytes(long numBytes) throws IOException {
skipBytesSlowly(numBytes);
}
}

View File

@ -17,6 +17,7 @@
package org.apache.lucene.store;
import java.io.EOFException;
import java.io.IOException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
@ -258,6 +259,11 @@ public final class ByteBuffersDataInput extends DataInput
}
}
@Override
public void skipBytes(long numBytes) throws IOException {
skipBytesSlowly(numBytes);
}
public ByteBuffersDataInput slice(long offset, long length) {
if (offset < 0 || length < 0 || offset + length > this.size) {
throw new IllegalArgumentException(

View File

@ -50,6 +50,10 @@ public abstract class ChecksumIndexInput extends IndexInput {
throw new IllegalStateException(
getClass() + " cannot seek backwards (pos=" + pos + " getFilePointer()=" + curFP + ")");
}
skipBytes(skip);
// we must skip slowly to ensure skipped bytes are still read and used
// to update checksums
// TODO: this "slow skip" logic should be moved into this class once
// no longer needed as default logic in DataInput
skipBytesSlowly(skip);
}
}

View File

@ -348,8 +348,12 @@ public abstract class DataInput implements Cloneable {
* Skip over <code>numBytes</code> bytes. The contract on this method is that it should have the
* same behavior as reading the same number of bytes into a buffer and discarding its content.
* Negative values of <code>numBytes</code> are not supported.
*
* @deprecated Implementing subclasses should override #skipBytes with a more performant solution
* where possible.
*/
public void skipBytes(final long numBytes) throws IOException {
@Deprecated
protected void skipBytesSlowly(final long numBytes) throws IOException {
if (numBytes < 0) {
throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes);
}
@ -363,4 +367,11 @@ public abstract class DataInput implements Cloneable {
skipped += step;
}
}
/**
* Skip over <code>numBytes</code> bytes. This method may skip bytes in whatever way is most
* optimal, and may not have the same behavior as reading the skipped bytes. In general, negative
* <code>numBytes</code> are not supported.
*/
public abstract void skipBytes(final long numBytes) throws IOException;
}

View File

@ -72,6 +72,23 @@ public abstract class IndexInput extends DataInput implements Cloneable, Closeab
*/
public abstract void seek(long pos) throws IOException;
/**
* {@inheritDoc}
*
* <p>Behavior is functionally equivalent to seeking to <code>getFilePointer() + numBytes</code>.
*
* @see #getFilePointer()
* @see #seek(long)
*/
@Override
public void skipBytes(long numBytes) throws IOException {
if (numBytes < 0) {
throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes);
}
final long skipTo = getFilePointer() + numBytes;
seek(skipTo);
}
/** The number of bytes in the file. */
public abstract long length();

View File

@ -50,4 +50,9 @@ public class InputStreamDataInput extends DataInput implements Closeable {
public void close() throws IOException {
is.close();
}
@Override
public void skipBytes(long numBytes) throws IOException {
skipBytesSlowly(numBytes);
}
}

View File

@ -344,6 +344,11 @@ public final class PagedBytes implements Accountable {
}
}
@Override
public void skipBytes(long numBytes) throws IOException {
skipBytesSlowly(numBytes);
}
private void nextBlock() {
currentBlockIndex++;
currentBlockUpto = 0;

View File

@ -263,6 +263,41 @@ public class TestIndexInput extends LuceneTestCase {
}
}
private void checkSeeksAndSkips(IndexInput is, Random random) throws IOException {
long len = is.length();
int iterations = LuceneTestCase.TEST_NIGHTLY ? 1_000 : 10;
for (int i = 0; i < iterations; i++) {
is.seek(0); // make sure we're at the start
for (long curr = 0; curr < len; ) {
long maxSkipTo = len - 1;
// if we're close to the end, just skip all the way
long skipTo = (len - curr < 10) ? maxSkipTo : TestUtil.nextLong(random, curr, maxSkipTo);
long skipDelta = skipTo - curr;
// first reposition using seek
byte startByte1 = is.readByte();
is.seek(skipTo);
byte endByte1 = is.readByte();
// do the same thing but with skipBytes
is.seek(curr);
byte startByte2 = is.readByte();
is.seek(curr);
is.skipBytes(skipDelta);
byte endByte2 = is.readByte();
assertEquals(startByte1, startByte2);
assertEquals(endByte1, endByte2);
// +1 since we read the byte we seek/skip to
assertEquals(curr + skipDelta + 1, is.getFilePointer());
curr = is.getFilePointer();
}
}
}
// this test checks the IndexInput methods of any impl
public void testRawIndexInputRead() throws IOException {
for (int i = 0; i < 10; i++) {
@ -273,6 +308,7 @@ public class TestIndexInput extends LuceneTestCase {
os.close();
IndexInput is = dir.openInput("foo", newIOContext(random));
checkReads(is, IOException.class);
checkSeeksAndSkips(is, random);
is.close();
os = dir.createOutput("bar", newIOContext(random));
@ -280,6 +316,7 @@ public class TestIndexInput extends LuceneTestCase {
os.close();
is = dir.openInput("bar", newIOContext(random));
checkRandomReads(is);
checkSeeksAndSkips(is, random);
is.close();
dir.close();
}
@ -291,4 +328,66 @@ public class TestIndexInput extends LuceneTestCase {
is = new ByteArrayDataInput(RANDOM_TEST_BYTES);
checkRandomReads(is);
}
public void testNoReadOnSkipBytes() throws IOException {
long len = LuceneTestCase.TEST_NIGHTLY ? Long.MAX_VALUE : 1_000_000;
long maxSeekPos = len - 1;
InterceptingIndexInput is = new InterceptingIndexInput("foo", len);
while (is.pos < maxSeekPos) {
long seekPos = TestUtil.nextLong(random(), is.pos, maxSeekPos);
long skipDelta = seekPos - is.pos;
is.skipBytes(skipDelta);
assertEquals(seekPos, is.pos);
}
}
/**
* Mock IndexInput that just tracks a position (which responds to seek/skip) and throws if
* #readByte or #readBytes are called, ensuring seek/skip doesn't invoke reads.
*/
private static final class InterceptingIndexInput extends IndexInput {
long pos = 0;
final long len;
protected InterceptingIndexInput(String resourceDescription, long len) {
super(resourceDescription);
this.len = len;
}
@Override
public void seek(long pos) {
this.pos = pos;
}
@Override
public long getFilePointer() {
return pos;
}
@Override
public long length() {
return len;
}
@Override
public byte readByte() {
throw new UnsupportedOperationException();
}
@Override
public void readBytes(byte[] b, int offset, int len) {
throw new UnsupportedOperationException();
}
@Override
public void close() {
// no-op
}
@Override
public IndexInput slice(String sliceDescription, long offset, long length) {
throw new UnsupportedOperationException();
}
}
}

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.store;
import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestChecksumIndexInput extends LuceneTestCase {
public void testSkipBytes() throws IOException {
int numTestBytes = TestUtil.nextInt(random(), 100, 1000);
byte[] testBytes = new byte[numTestBytes];
final Directory dir = newDirectory();
IndexOutput os = dir.createOutput("foo", newIOContext(random()));
os.writeBytes(testBytes, numTestBytes);
os.close();
IndexInput is = dir.openInput("foo", newIOContext(random()));
final InterceptingChecksumIndexInput checksumIndexInput =
new InterceptingChecksumIndexInput(is, numTestBytes);
// skip random chunks of bytes until everything has been skipped
for (int skipped = 0; skipped < numTestBytes; ) {
final int remaining = numTestBytes - skipped;
// when remaining gets small enough, just skip the rest
final int step = remaining < 10 ? remaining : random().nextInt(remaining);
checksumIndexInput.skipBytes(step);
skipped += step;
}
// ensure all skipped bytes are still "read" in so the checksum can be updated properly
assertArrayEquals(testBytes, checksumIndexInput.readBytes);
is.close();
dir.close();
}
/**
* Captures read bytes into a separate buffer for confirming that all #skipByte invocations
* delegate to #readBytes.
*/
private static final class InterceptingChecksumIndexInput extends BufferedChecksumIndexInput {
final byte[] readBytes;
private int off = 0;
public InterceptingChecksumIndexInput(IndexInput main, int len) {
super(main);
readBytes = new byte[len];
}
@Override
public void readBytes(final byte[] b, final int offset, final int len) throws IOException {
super.readBytes(b, offset, len);
System.arraycopy(b, offset, readBytes, off, len);
off += len;
}
}
}