LUCENE-10027 provide leaf sorter from commit (#214)

Provide leaf sorter for directory readers opened from IndexCommit

LUCENE-9507 allowed to provide a leaf sorter for directory readers.
One API that was missed is to allow to provide a leaf sorter
for directory readers opened from an index commit.
This patch address this by adding an extra parameter: a custom
comparator for sorting leaf readers to the Directory reader open API
from indexCommit and minSupportedMajorVersion.

Relates to PR #32
This commit is contained in:
Mayya Sharipova 2021-07-30 09:15:21 -04:00 committed by GitHub
parent 56eb76dbaf
commit 1daf7e7c74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 67 additions and 54 deletions

View File

@ -360,6 +360,10 @@ API Changes
* LUCENE-10036: Replaced the ScoreCachingWrappingScorer ctor with a static factory method that
ensures unnecessary wrapping doesn't occur. (Greg Miller)
* LUCENE-10027: Directory reader open API from indexCommit and minSupportedMajorVersion has
been modified to add an extra parameter: a custom comparator for sorting leaf readers
(Mayya Sharipova)
New Features
---------------------
(No changes)

View File

@ -1071,7 +1071,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
// Query query = parser.parse("handle:1");
IndexCommit indexCommit = DirectoryReader.listCommits(dir).get(0);
IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion);
IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion, null);
IndexSearcher searcher = newSearcher(reader);
TestUtil.checkIndex(dir);
@ -2076,13 +2076,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
IndexFormatTooOldException ex =
expectThrows(
IndexFormatTooOldException.class,
() -> StandardDirectoryReader.open(commit, Version.LATEST.major));
() -> StandardDirectoryReader.open(commit, Version.LATEST.major, null));
assertTrue(
ex.getMessage()
.contains(
"only supports reading from version " + Version.LATEST.major + " upwards."));
// now open with allowed min version
StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR).close();
StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR, null).close();
}
}
@ -2092,7 +2092,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir);
try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) {
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR).close();
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();
}
}
}

View File

@ -132,12 +132,19 @@ public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> {
*
* @param commit the commit point to open
* @param minSupportedMajorVersion the minimum supported major index version
* @param leafSorter a comparator for sorting leaf readers. Providing leafSorter is useful for
* indices on which it is expected to run many queries with particular sort criteria (e.g. for
* time-based indices, this is usually a descending sort on timestamp). In this case {@code
* leafSorter} should sort leaves according to this sort criteria. Providing leafSorter allows
* to speed up this particular type of sort queries by early terminating while iterating
* through segments and segments' documents
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final IndexCommit commit, int minSupportedMajorVersion)
public static DirectoryReader open(
final IndexCommit commit, int minSupportedMajorVersion, Comparator<LeafReader> leafSorter)
throws IOException {
return StandardDirectoryReader.open(
commit.getDirectory(), minSupportedMajorVersion, commit, null);
commit.getDirectory(), minSupportedMajorVersion, commit, leafSorter);
}
/**

View File

@ -1107,8 +1107,8 @@ public class TestDirectoryReader extends LuceneTestCase {
writer.addDocument(doc);
writer.commit();
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1));
DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1)).close();
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1, null));
DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1), null).close();
}
}
}

View File

@ -49,6 +49,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.Version;
import org.junit.Test;
@SuppressCodecs("SimpleText") // too slow here
@ -1249,14 +1250,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
// Test1: test that leafReaders are sorted according to leafSorter provided in IndexWriterConfig
{
try (DirectoryReader reader = writer.getReader()) {
List<LeafReader> lrs =
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
List<LeafReader> expectedSortedlrs =
reader.leaves().stream()
.map(LeafReaderContext::reader)
.sorted(leafSorter)
.collect(toList());
assertEquals(expectedSortedlrs, lrs);
assertLeavesSorted(reader, leafSorter);
// add more documents that should be sorted first
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
@ -1269,28 +1263,16 @@ public class TestIndexWriterReader extends LuceneTestCase {
// and open again
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
expectedSortedlrs =
reader2.leaves().stream()
.map(LeafReaderContext::reader)
.sorted(leafSorter)
.collect(toList());
assertEquals(expectedSortedlrs, lrs);
assertLeavesSorted(reader2, leafSorter);
}
}
}
// Test2: test that leafReaders are sorted according to leafSorter provided in DirectoryReader
// Test2: test that leafReaders are sorted according to the provided leafSorter when opened from
// directory
{
try (DirectoryReader reader = DirectoryReader.open(dir, leafSorter)) {
List<LeafReader> lrs =
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
List<LeafReader> expectedSortedlrs =
reader.leaves().stream()
.map(LeafReaderContext::reader)
.sorted(leafSorter)
.collect(toList());
assertEquals(expectedSortedlrs, lrs);
assertLeavesSorted(reader, leafSorter);
// add more documents that should be sorted first
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
@ -1303,13 +1285,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
// and open again
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
expectedSortedlrs =
reader2.leaves().stream()
.map(LeafReaderContext::reader)
.sorted(leafSorter)
.collect(toList());
assertEquals(expectedSortedlrs, lrs);
assertLeavesSorted(reader2, leafSorter);
}
}
}
@ -1319,14 +1295,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
{
try (DirectoryReader reader =
new AssertingDirectoryReader(DirectoryReader.open(dir, leafSorter))) {
List<LeafReader> lrs =
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
List<LeafReader> expectedSortedlrs =
reader.leaves().stream()
.map(LeafReaderContext::reader)
.sorted(leafSorter)
.collect(toList());
assertEquals(expectedSortedlrs, lrs);
assertLeavesSorted(reader, leafSorter);
// add more documents that should be sorted first
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
@ -1339,13 +1308,32 @@ public class TestIndexWriterReader extends LuceneTestCase {
// and open again
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
expectedSortedlrs =
reader2.leaves().stream()
.map(LeafReaderContext::reader)
.sorted(leafSorter)
.collect(toList());
assertEquals(expectedSortedlrs, lrs);
assertLeavesSorted(reader2, leafSorter);
}
}
}
// Test4: test that leafReaders are sorted according to the provided leafSorter when opened from
// commit
{
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
IndexCommit latestCommit = commits.get(commits.size() - 1);
try (DirectoryReader reader =
DirectoryReader.open(latestCommit, Version.MIN_SUPPORTED_MAJOR, leafSorter)) {
assertLeavesSorted(reader, leafSorter);
// add more documents that should be sorted first
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
for (int i = 0; i < 10; ++i) {
final Document doc = new Document();
doc.add(new LongPoint(FIELD_NAME, FIRST_VALUE));
writer.addDocument(doc);
}
writer.commit();
// and open again
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
assertLeavesSorted(reader2, leafSorter);
}
}
}
@ -1353,4 +1341,18 @@ public class TestIndexWriterReader extends LuceneTestCase {
writer.close();
dir.close();
}
// assert that the leaf readers of the provided directory reader are sorted according to the
// provided leafSorter
private static void assertLeavesSorted(
DirectoryReader reader, Comparator<LeafReader> leafSorter) {
List<LeafReader> lrs =
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
List<LeafReader> expectedSortedlrs =
reader.leaves().stream()
.map(LeafReaderContext::reader)
.sorted(leafSorter)
.collect(toList());
assertEquals(expectedSortedlrs, lrs);
}
}