From 1daf7e7c742cf53cb62a55bc3993a76d878e3223 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 30 Jul 2021 09:15:21 -0400 Subject: [PATCH] LUCENE-10027 provide leaf sorter from commit (#214) Provide leaf sorter for directory readers opened from IndexCommit LUCENE-9507 allowed to provide a leaf sorter for directory readers. One API that was missed is to allow to provide a leaf sorter for directory readers opened from an index commit. This patch address this by adding an extra parameter: a custom comparator for sorting leaf readers to the Directory reader open API from indexCommit and minSupportedMajorVersion. Relates to PR #32 --- lucene/CHANGES.txt | 4 + .../TestBackwardsCompatibility.java | 8 +- .../apache/lucene/index/DirectoryReader.java | 11 ++- .../lucene/index/TestDirectoryReader.java | 4 +- .../lucene/index/TestIndexWriterReader.java | 94 ++++++++++--------- 5 files changed, 67 insertions(+), 54 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3405d83a5e4..32e633961c4 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -360,6 +360,10 @@ API Changes * LUCENE-10036: Replaced the ScoreCachingWrappingScorer ctor with a static factory method that ensures unnecessary wrapping doesn't occur. (Greg Miller) +* LUCENE-10027: Directory reader open API from indexCommit and minSupportedMajorVersion has + been modified to add an extra parameter: a custom comparator for sorting leaf readers + (Mayya Sharipova) + New Features --------------------- (No changes) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java index 4ee2e5be103..c560c189cff 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java @@ -1071,7 +1071,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { // QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); // Query query = parser.parse("handle:1"); IndexCommit indexCommit = DirectoryReader.listCommits(dir).get(0); - IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion); + IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion, null); IndexSearcher searcher = newSearcher(reader); TestUtil.checkIndex(dir); @@ -2076,13 +2076,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase { IndexFormatTooOldException ex = expectThrows( IndexFormatTooOldException.class, - () -> StandardDirectoryReader.open(commit, Version.LATEST.major)); + () -> StandardDirectoryReader.open(commit, Version.LATEST.major, null)); assertTrue( ex.getMessage() .contains( "only supports reading from version " + Version.LATEST.major + " upwards.")); // now open with allowed min version - StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR).close(); + StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR, null).close(); } } @@ -2092,7 +2092,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir); try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) { IndexCommit commit = DirectoryReader.listCommits(dir).get(0); - StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR).close(); + StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close(); } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java index f432d8c7680..58aca2d18ef 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java @@ -132,12 +132,19 @@ public abstract class DirectoryReader extends BaseCompositeReader { * * @param commit the commit point to open * @param minSupportedMajorVersion the minimum supported major index version + * @param leafSorter a comparator for sorting leaf readers. Providing leafSorter is useful for + * indices on which it is expected to run many queries with particular sort criteria (e.g. for + * time-based indices, this is usually a descending sort on timestamp). In this case {@code + * leafSorter} should sort leaves according to this sort criteria. Providing leafSorter allows + * to speed up this particular type of sort queries by early terminating while iterating + * through segments and segments' documents * @throws IOException if there is a low-level IO error */ - public static DirectoryReader open(final IndexCommit commit, int minSupportedMajorVersion) + public static DirectoryReader open( + final IndexCommit commit, int minSupportedMajorVersion, Comparator leafSorter) throws IOException { return StandardDirectoryReader.open( - commit.getDirectory(), minSupportedMajorVersion, commit, null); + commit.getDirectory(), minSupportedMajorVersion, commit, leafSorter); } /** diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java index 1b74790c01d..365c008d543 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java @@ -1107,8 +1107,8 @@ public class TestDirectoryReader extends LuceneTestCase { writer.addDocument(doc); writer.commit(); IndexCommit commit = DirectoryReader.listCommits(dir).get(0); - expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1)); - DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1)).close(); + expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1, null)); + DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1), null).close(); } } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java index dd97c449850..fd844106a06 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java @@ -49,6 +49,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.ThreadInterruptedException; +import org.apache.lucene.util.Version; import org.junit.Test; @SuppressCodecs("SimpleText") // too slow here @@ -1249,14 +1250,7 @@ public class TestIndexWriterReader extends LuceneTestCase { // Test1: test that leafReaders are sorted according to leafSorter provided in IndexWriterConfig { try (DirectoryReader reader = writer.getReader()) { - List lrs = - reader.leaves().stream().map(LeafReaderContext::reader).collect(toList()); - List expectedSortedlrs = - reader.leaves().stream() - .map(LeafReaderContext::reader) - .sorted(leafSorter) - .collect(toList()); - assertEquals(expectedSortedlrs, lrs); + assertLeavesSorted(reader, leafSorter); // add more documents that should be sorted first final long FIRST_VALUE = ASC_SORT ? 0 : 100; @@ -1269,28 +1263,16 @@ public class TestIndexWriterReader extends LuceneTestCase { // and open again try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) { - lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList()); - expectedSortedlrs = - reader2.leaves().stream() - .map(LeafReaderContext::reader) - .sorted(leafSorter) - .collect(toList()); - assertEquals(expectedSortedlrs, lrs); + assertLeavesSorted(reader2, leafSorter); } } } - // Test2: test that leafReaders are sorted according to leafSorter provided in DirectoryReader + // Test2: test that leafReaders are sorted according to the provided leafSorter when opened from + // directory { try (DirectoryReader reader = DirectoryReader.open(dir, leafSorter)) { - List lrs = - reader.leaves().stream().map(LeafReaderContext::reader).collect(toList()); - List expectedSortedlrs = - reader.leaves().stream() - .map(LeafReaderContext::reader) - .sorted(leafSorter) - .collect(toList()); - assertEquals(expectedSortedlrs, lrs); + assertLeavesSorted(reader, leafSorter); // add more documents that should be sorted first final long FIRST_VALUE = ASC_SORT ? 0 : 100; @@ -1303,13 +1285,7 @@ public class TestIndexWriterReader extends LuceneTestCase { // and open again try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) { - lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList()); - expectedSortedlrs = - reader2.leaves().stream() - .map(LeafReaderContext::reader) - .sorted(leafSorter) - .collect(toList()); - assertEquals(expectedSortedlrs, lrs); + assertLeavesSorted(reader2, leafSorter); } } } @@ -1319,14 +1295,7 @@ public class TestIndexWriterReader extends LuceneTestCase { { try (DirectoryReader reader = new AssertingDirectoryReader(DirectoryReader.open(dir, leafSorter))) { - List lrs = - reader.leaves().stream().map(LeafReaderContext::reader).collect(toList()); - List expectedSortedlrs = - reader.leaves().stream() - .map(LeafReaderContext::reader) - .sorted(leafSorter) - .collect(toList()); - assertEquals(expectedSortedlrs, lrs); + assertLeavesSorted(reader, leafSorter); // add more documents that should be sorted first final long FIRST_VALUE = ASC_SORT ? 0 : 100; @@ -1339,13 +1308,32 @@ public class TestIndexWriterReader extends LuceneTestCase { // and open again try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) { - lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList()); - expectedSortedlrs = - reader2.leaves().stream() - .map(LeafReaderContext::reader) - .sorted(leafSorter) - .collect(toList()); - assertEquals(expectedSortedlrs, lrs); + assertLeavesSorted(reader2, leafSorter); + } + } + } + + // Test4: test that leafReaders are sorted according to the provided leafSorter when opened from + // commit + { + List commits = DirectoryReader.listCommits(dir); + IndexCommit latestCommit = commits.get(commits.size() - 1); + try (DirectoryReader reader = + DirectoryReader.open(latestCommit, Version.MIN_SUPPORTED_MAJOR, leafSorter)) { + assertLeavesSorted(reader, leafSorter); + + // add more documents that should be sorted first + final long FIRST_VALUE = ASC_SORT ? 0 : 100; + for (int i = 0; i < 10; ++i) { + final Document doc = new Document(); + doc.add(new LongPoint(FIELD_NAME, FIRST_VALUE)); + writer.addDocument(doc); + } + writer.commit(); + + // and open again + try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) { + assertLeavesSorted(reader2, leafSorter); } } } @@ -1353,4 +1341,18 @@ public class TestIndexWriterReader extends LuceneTestCase { writer.close(); dir.close(); } + + // assert that the leaf readers of the provided directory reader are sorted according to the + // provided leafSorter + private static void assertLeavesSorted( + DirectoryReader reader, Comparator leafSorter) { + List lrs = + reader.leaves().stream().map(LeafReaderContext::reader).collect(toList()); + List expectedSortedlrs = + reader.leaves().stream() + .map(LeafReaderContext::reader) + .sorted(leafSorter) + .collect(toList()); + assertEquals(expectedSortedlrs, lrs); + } }