mirror of https://github.com/apache/lucene.git
LUCENE-10139: ExternalRefSorter returns a covariant with a subtype of BytesRefIterator that is Closeable. (#340)
This commit is contained in:
parent
b4fcdd9770
commit
2e57a40546
|
@ -157,6 +157,9 @@ API Changes
|
|||
|
||||
Improvements
|
||||
|
||||
* LUCENE-10139: ExternalRefSorter returns a covariant with a subtype of BytesRefIterator
|
||||
that is Closeable. (Dawid Weiss).
|
||||
|
||||
* LUCENE-10135: Correct passage selector behavior for long matching snippets (Dawid Weiss).
|
||||
|
||||
* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)
|
||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
@ -116,7 +117,7 @@ public class TestMatchHighlighter extends LuceneTestCase {
|
|||
static SynonymMap buildSynonymMap(String[][] synonyms) throws IOException {
|
||||
SynonymMap.Builder builder = new SynonymMap.Builder();
|
||||
for (String[] pair : synonyms) {
|
||||
assertThat(pair.length, Matchers.equalTo(2));
|
||||
MatcherAssert.assertThat(pair.length, Matchers.equalTo(2));
|
||||
builder.add(new CharsRef(pair[0]), new CharsRef(pair[1]), true);
|
||||
}
|
||||
return builder.build();
|
||||
|
|
|
@ -28,7 +28,9 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.OfflineSorter;
|
||||
|
||||
/**
|
||||
* Builds and iterates over sequences stored on disk.
|
||||
* An implementation of a {@link BytesRefSorter} that allows appending {@link BytesRef}s to an
|
||||
* {@link OfflineSorter} and returns a {@link Closeable} {@link ByteSequenceIterator} that iterates
|
||||
* over sequences stored on disk.
|
||||
*
|
||||
* @lucene.experimental
|
||||
* @lucene.internal
|
||||
|
@ -36,76 +38,87 @@ import org.apache.lucene.util.OfflineSorter;
|
|||
public class ExternalRefSorter implements BytesRefSorter, Closeable {
|
||||
private final OfflineSorter sorter;
|
||||
private OfflineSorter.ByteSequencesWriter writer;
|
||||
private IndexOutput input;
|
||||
private String sortedFileName;
|
||||
private IndexOutput tempOutput;
|
||||
private String sortedOutput;
|
||||
|
||||
/** Will buffer all sequences to a temporary file and then sort (all on-disk). */
|
||||
public ExternalRefSorter(OfflineSorter sorter) throws IOException {
|
||||
this.sorter = sorter;
|
||||
this.input =
|
||||
this.tempOutput =
|
||||
sorter
|
||||
.getDirectory()
|
||||
.createTempOutput(sorter.getTempFileNamePrefix(), "RefSorterRaw", IOContext.DEFAULT);
|
||||
this.writer = new OfflineSorter.ByteSequencesWriter(this.input);
|
||||
this.writer = new OfflineSorter.ByteSequencesWriter(this.tempOutput);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(BytesRef utf8) throws IOException {
|
||||
if (writer == null) {
|
||||
throw new IllegalStateException();
|
||||
throw new IllegalStateException(
|
||||
"Can't append after iterator() has been called and all the input sorted.");
|
||||
}
|
||||
writer.write(utf8);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns a {@link ByteSequenceIterator} that implements {@link BytesRefIterator} but is
|
||||
* also {@link Closeable}, ensuring any temporary resources are cleaned up if the iterator is
|
||||
* either exhausted or closed.
|
||||
*/
|
||||
@Override
|
||||
public BytesRefIterator iterator() throws IOException {
|
||||
if (sortedFileName == null) {
|
||||
public ByteSequenceIterator iterator() throws IOException {
|
||||
if (sortedOutput == null) {
|
||||
if (tempOutput == null) {
|
||||
throw new IOException("The sorter has been closed.");
|
||||
}
|
||||
|
||||
closeWriter();
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
sortedFileName = sorter.sort(input.getName());
|
||||
sortedOutput = sorter.sort(tempOutput.getName());
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
sorter.getDirectory().deleteFile(input.getName());
|
||||
sorter.getDirectory().deleteFile(tempOutput.getName());
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(sorter.getDirectory(), input.getName());
|
||||
IOUtils.deleteFilesIgnoringExceptions(sorter.getDirectory(), tempOutput.getName());
|
||||
}
|
||||
}
|
||||
|
||||
input = null;
|
||||
tempOutput = null;
|
||||
}
|
||||
|
||||
return new ByteSequenceIterator(
|
||||
new OfflineSorter.ByteSequencesReader(
|
||||
sorter.getDirectory().openChecksumInput(sortedFileName, IOContext.READONCE),
|
||||
sortedFileName));
|
||||
sorter.getDirectory().openChecksumInput(sortedOutput, IOContext.READONCE),
|
||||
sortedOutput));
|
||||
}
|
||||
|
||||
/** Close the writer but leave any sorted output for iteration. */
|
||||
private void closeWriter() throws IOException {
|
||||
if (writer != null) {
|
||||
CodecUtil.writeFooter(input);
|
||||
CodecUtil.writeFooter(tempOutput);
|
||||
writer.close();
|
||||
writer = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Removes any written temporary files. */
|
||||
/** Close the writer and remove any temporary files. */
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
closeWriter();
|
||||
} finally {
|
||||
IOUtils.deleteFilesIgnoringExceptions(
|
||||
sorter.getDirectory(), input == null ? null : input.getName(), sortedFileName);
|
||||
sorter.getDirectory(), tempOutput == null ? null : tempOutput.getName(), sortedOutput);
|
||||
}
|
||||
}
|
||||
|
||||
/** Iterate over byte refs in a file. */
|
||||
// TODO: this class is a bit silly ... sole purpose is to "remove" Closeable from what #iterator
|
||||
// returns:
|
||||
static class ByteSequenceIterator implements BytesRefIterator {
|
||||
/**
|
||||
* Iterates over {@link BytesRef}s in a file, closes the reader when the iterator is exhausted.
|
||||
*/
|
||||
static class ByteSequenceIterator implements BytesRefIterator, Closeable {
|
||||
private final OfflineSorter.ByteSequencesReader reader;
|
||||
private BytesRef scratch;
|
||||
|
||||
|
@ -119,7 +132,7 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
|
|||
try {
|
||||
scratch = reader.next();
|
||||
if (scratch == null) {
|
||||
reader.close();
|
||||
close();
|
||||
}
|
||||
success = true;
|
||||
return scratch;
|
||||
|
@ -129,8 +142,16 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Return the {@link Comparator} of the {@link OfflineSorter} used to sort byte sequences.
|
||||
*/
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return sorter.getComparator();
|
||||
|
|
|
@ -16,7 +16,12 @@
|
|||
*/
|
||||
package org.apache.lucene.search.suggest.fst;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.RandomizedContext;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomBytes;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.search.suggest.InMemorySorter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -35,17 +40,29 @@ public class TestBytesRefSorters extends LuceneTestCase {
|
|||
IOUtils.close(s, tempDir);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExternalRefSortersIteratorIsCloseable() throws Exception {
|
||||
try (Directory tempDir = newDirectory();
|
||||
ExternalRefSorter s = new ExternalRefSorter(new OfflineSorter(tempDir, "temp"))) {
|
||||
appendRandomSequences(s);
|
||||
|
||||
// Sometimes iterate over a subset of the entries in the sequence iterator, then
|
||||
// close it before exhausting the iterator.
|
||||
try (ExternalRefSorter.ByteSequenceIterator it = s.iterator()) {
|
||||
for (int i = 0; i < 5 && it.next() != null; i++) {
|
||||
// Empty.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInMemorySorter() throws Exception {
|
||||
check(new InMemorySorter(Comparator.naturalOrder()));
|
||||
}
|
||||
|
||||
private void check(BytesRefSorter sorter) throws Exception {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
byte[] current = new byte[random().nextInt(256)];
|
||||
random().nextBytes(current);
|
||||
sorter.add(new BytesRef(current));
|
||||
}
|
||||
appendRandomSequences(sorter);
|
||||
|
||||
// Create two iterators and check that they're aligned with each other.
|
||||
BytesRefIterator i1 = sorter.iterator();
|
||||
|
@ -67,4 +84,11 @@ public class TestBytesRefSorters extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void appendRandomSequences(BytesRefSorter sorter) throws IOException {
|
||||
Random rnd = new Random(RandomizedContext.current().getRandom().nextLong());
|
||||
for (int i = 0; i < RandomNumbers.randomIntBetween(rnd, 10, 100); i++) {
|
||||
sorter.add(new BytesRef(RandomBytes.randomBytesOfLengthBetween(rnd, 1, 256)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue