mirror of https://github.com/apache/lucene.git
LUCENE-7779: don't call BytesSequencesReader.next again after it already returned null
This commit is contained in:
parent
0bcd88b181
commit
b954f220e0
|
@ -249,9 +249,14 @@ public class OfflineSorter {
|
|||
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
|
||||
|
||||
boolean success = false;
|
||||
boolean[] isExhausted = new boolean[1];
|
||||
try (ByteSequencesReader is = getReader(dir.openChecksumInput(inputFileName, IOContext.READONCE), inputFileName)) {
|
||||
int lineCount;
|
||||
while ((lineCount = readPartition(is)) > 0) {
|
||||
while (isExhausted[0] == false) {
|
||||
int lineCount = readPartition(is, isExhausted);
|
||||
if (lineCount == 0) {
|
||||
assert isExhausted[0];
|
||||
break;
|
||||
}
|
||||
segments.add(sortPartition(trackingDir));
|
||||
sortInfo.tempMergeFiles++;
|
||||
sortInfo.lineCount += lineCount;
|
||||
|
@ -420,8 +425,8 @@ public class OfflineSorter {
|
|||
sortInfo.tempMergeFiles++;
|
||||
}
|
||||
|
||||
/** Read in a single partition of data */
|
||||
int readPartition(ByteSequencesReader reader) throws IOException {
|
||||
/** Read in a single partition of data, setting isExhausted[0] to true if there are no more items. */
|
||||
int readPartition(ByteSequencesReader reader, boolean[] isExhausted) throws IOException {
|
||||
long start = System.currentTimeMillis();
|
||||
if (valueLength != -1) {
|
||||
int limit = ramBufferSize.bytes / valueLength;
|
||||
|
@ -433,6 +438,7 @@ public class OfflineSorter {
|
|||
verifyChecksum(t, reader);
|
||||
}
|
||||
if (item == null) {
|
||||
isExhausted[0] = true;
|
||||
break;
|
||||
}
|
||||
buffer.append(item);
|
||||
|
@ -446,6 +452,7 @@ public class OfflineSorter {
|
|||
verifyChecksum(t, reader);
|
||||
}
|
||||
if (item == null) {
|
||||
isExhausted[0] = true;
|
||||
break;
|
||||
}
|
||||
buffer.append(item);
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.CorruptingIndexOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FilterDirectory;
|
||||
|
@ -455,6 +456,47 @@ public class TestOfflineSorter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// OfflineSorter should not call my BytesSequencesReader.next() again after it already returned null:
|
||||
public void testOverNexting() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexOutput out = dir.createTempOutput("unsorted", "tmp", IOContext.DEFAULT);
|
||||
try (ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(out)) {
|
||||
byte[] bytes = new byte[Integer.BYTES];
|
||||
random().nextBytes(bytes);
|
||||
w.write(bytes);
|
||||
CodecUtil.writeFooter(out);
|
||||
}
|
||||
|
||||
new OfflineSorter(dir, "foo", OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(4), OfflineSorter.MAX_TEMPFILES, Integer.BYTES) {
|
||||
@Override
|
||||
protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
|
||||
ByteSequencesReader other = super.getReader(in, name);
|
||||
|
||||
return new ByteSequencesReader(in, name) {
|
||||
|
||||
private boolean alreadyEnded;
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
// if we returned null already, OfflineSorter should not call next() again
|
||||
assertFalse(alreadyEnded);
|
||||
BytesRef result = other.next();
|
||||
if (result == null) {
|
||||
alreadyEnded = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
other.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
}.sort(out.getName());
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testInvalidFixedLength() throws Exception {
|
||||
IllegalArgumentException e;
|
||||
e = expectThrows(IllegalArgumentException.class,
|
||||
|
|
Loading…
Reference in New Issue