LUCENE-7779: don't call BytesSequencesReader.next again after it already returned null

This commit is contained in:
Mike McCandless 2017-04-12 17:17:30 -04:00
parent 0bcd88b181
commit b954f220e0
2 changed files with 53 additions and 4 deletions

View File

@ -249,9 +249,14 @@ public class OfflineSorter {
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
boolean success = false;
boolean[] isExhausted = new boolean[1];
try (ByteSequencesReader is = getReader(dir.openChecksumInput(inputFileName, IOContext.READONCE), inputFileName)) {
int lineCount;
while ((lineCount = readPartition(is)) > 0) {
while (isExhausted[0] == false) {
int lineCount = readPartition(is, isExhausted);
if (lineCount == 0) {
assert isExhausted[0];
break;
}
segments.add(sortPartition(trackingDir));
sortInfo.tempMergeFiles++;
sortInfo.lineCount += lineCount;
@ -420,8 +425,8 @@ public class OfflineSorter {
sortInfo.tempMergeFiles++;
}
/** Read in a single partition of data */
int readPartition(ByteSequencesReader reader) throws IOException {
/** Read in a single partition of data, setting isExhausted[0] to true if there are no more items. */
int readPartition(ByteSequencesReader reader, boolean[] isExhausted) throws IOException {
long start = System.currentTimeMillis();
if (valueLength != -1) {
int limit = ramBufferSize.bytes / valueLength;
@ -433,6 +438,7 @@ public class OfflineSorter {
verifyChecksum(t, reader);
}
if (item == null) {
isExhausted[0] = true;
break;
}
buffer.append(item);
@ -446,6 +452,7 @@ public class OfflineSorter {
verifyChecksum(t, reader);
}
if (item == null) {
isExhausted[0] = true;
break;
}
buffer.append(item);

View File

@ -28,6 +28,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.CorruptingIndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory;
@ -455,6 +456,47 @@ public class TestOfflineSorter extends LuceneTestCase {
dir.close();
}
// OfflineSorter should not call my BytesSequencesReader.next() again after it already returned null:
public void testOverNexting() throws Exception {
Directory dir = newDirectory();
IndexOutput out = dir.createTempOutput("unsorted", "tmp", IOContext.DEFAULT);
try (ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(out)) {
byte[] bytes = new byte[Integer.BYTES];
random().nextBytes(bytes);
w.write(bytes);
CodecUtil.writeFooter(out);
}
new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(4), OfflineSorter.MAX_TEMPFILES, Integer.BYTES) {
@Override
protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
ByteSequencesReader other = super.getReader(in, name);
return new ByteSequencesReader(in, name) {
private boolean alreadyEnded;
@Override
public BytesRef next() throws IOException {
// if we returned null already, OfflineSorter should not call next() again
assertFalse(alreadyEnded);
BytesRef result = other.next();
if (result == null) {
alreadyEnded = true;
}
return result;
}
@Override
public void close() throws IOException {
other.close();
}
};
}
}.sort(out.getName());
dir.close();
}
public void testInvalidFixedLength() throws Exception {
IllegalArgumentException e;
e = expectThrows(IllegalArgumentException.class,