mirror of https://github.com/apache/lucene.git
LUCENE-10027 provide leaf sorter from commit (#214)
Provide leaf sorter for directory readers opened from IndexCommit LUCENE-9507 allowed to provide a leaf sorter for directory readers. One API that was missed is to allow to provide a leaf sorter for directory readers opened from an index commit. This patch address this by adding an extra parameter: a custom comparator for sorting leaf readers to the Directory reader open API from indexCommit and minSupportedMajorVersion. Relates to PR #32
This commit is contained in:
parent
56eb76dbaf
commit
1daf7e7c74
|
@ -360,6 +360,10 @@ API Changes
|
|||
* LUCENE-10036: Replaced the ScoreCachingWrappingScorer ctor with a static factory method that
|
||||
ensures unnecessary wrapping doesn't occur. (Greg Miller)
|
||||
|
||||
* LUCENE-10027: Directory reader open API from indexCommit and minSupportedMajorVersion has
|
||||
been modified to add an extra parameter: a custom comparator for sorting leaf readers
|
||||
(Mayya Sharipova)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
|
|
|
@ -1071,7 +1071,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
// QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
|
||||
// Query query = parser.parse("handle:1");
|
||||
IndexCommit indexCommit = DirectoryReader.listCommits(dir).get(0);
|
||||
IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion);
|
||||
IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion, null);
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
TestUtil.checkIndex(dir);
|
||||
|
@ -2076,13 +2076,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
IndexFormatTooOldException ex =
|
||||
expectThrows(
|
||||
IndexFormatTooOldException.class,
|
||||
() -> StandardDirectoryReader.open(commit, Version.LATEST.major));
|
||||
() -> StandardDirectoryReader.open(commit, Version.LATEST.major, null));
|
||||
assertTrue(
|
||||
ex.getMessage()
|
||||
.contains(
|
||||
"only supports reading from version " + Version.LATEST.major + " upwards."));
|
||||
// now open with allowed min version
|
||||
StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR).close();
|
||||
StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR, null).close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2092,7 +2092,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir);
|
||||
try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) {
|
||||
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
||||
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR).close();
|
||||
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -132,12 +132,19 @@ public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> {
|
|||
*
|
||||
* @param commit the commit point to open
|
||||
* @param minSupportedMajorVersion the minimum supported major index version
|
||||
* @param leafSorter a comparator for sorting leaf readers. Providing leafSorter is useful for
|
||||
* indices on which it is expected to run many queries with particular sort criteria (e.g. for
|
||||
* time-based indices, this is usually a descending sort on timestamp). In this case {@code
|
||||
* leafSorter} should sort leaves according to this sort criteria. Providing leafSorter allows
|
||||
* to speed up this particular type of sort queries by early terminating while iterating
|
||||
* through segments and segments' documents
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public static DirectoryReader open(final IndexCommit commit, int minSupportedMajorVersion)
|
||||
public static DirectoryReader open(
|
||||
final IndexCommit commit, int minSupportedMajorVersion, Comparator<LeafReader> leafSorter)
|
||||
throws IOException {
|
||||
return StandardDirectoryReader.open(
|
||||
commit.getDirectory(), minSupportedMajorVersion, commit, null);
|
||||
commit.getDirectory(), minSupportedMajorVersion, commit, leafSorter);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1107,8 +1107,8 @@ public class TestDirectoryReader extends LuceneTestCase {
|
|||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
||||
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1));
|
||||
DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1)).close();
|
||||
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1, null));
|
||||
DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1), null).close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.junit.Test;
|
||||
|
||||
@SuppressCodecs("SimpleText") // too slow here
|
||||
|
@ -1249,14 +1250,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
|||
// Test1: test that leafReaders are sorted according to leafSorter provided in IndexWriterConfig
|
||||
{
|
||||
try (DirectoryReader reader = writer.getReader()) {
|
||||
List<LeafReader> lrs =
|
||||
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
||||
List<LeafReader> expectedSortedlrs =
|
||||
reader.leaves().stream()
|
||||
.map(LeafReaderContext::reader)
|
||||
.sorted(leafSorter)
|
||||
.collect(toList());
|
||||
assertEquals(expectedSortedlrs, lrs);
|
||||
assertLeavesSorted(reader, leafSorter);
|
||||
|
||||
// add more documents that should be sorted first
|
||||
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
||||
|
@ -1269,28 +1263,16 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
|||
|
||||
// and open again
|
||||
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
||||
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
||||
expectedSortedlrs =
|
||||
reader2.leaves().stream()
|
||||
.map(LeafReaderContext::reader)
|
||||
.sorted(leafSorter)
|
||||
.collect(toList());
|
||||
assertEquals(expectedSortedlrs, lrs);
|
||||
assertLeavesSorted(reader2, leafSorter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test2: test that leafReaders are sorted according to leafSorter provided in DirectoryReader
|
||||
// Test2: test that leafReaders are sorted according to the provided leafSorter when opened from
|
||||
// directory
|
||||
{
|
||||
try (DirectoryReader reader = DirectoryReader.open(dir, leafSorter)) {
|
||||
List<LeafReader> lrs =
|
||||
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
||||
List<LeafReader> expectedSortedlrs =
|
||||
reader.leaves().stream()
|
||||
.map(LeafReaderContext::reader)
|
||||
.sorted(leafSorter)
|
||||
.collect(toList());
|
||||
assertEquals(expectedSortedlrs, lrs);
|
||||
assertLeavesSorted(reader, leafSorter);
|
||||
|
||||
// add more documents that should be sorted first
|
||||
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
||||
|
@ -1303,13 +1285,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
|||
|
||||
// and open again
|
||||
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
||||
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
||||
expectedSortedlrs =
|
||||
reader2.leaves().stream()
|
||||
.map(LeafReaderContext::reader)
|
||||
.sorted(leafSorter)
|
||||
.collect(toList());
|
||||
assertEquals(expectedSortedlrs, lrs);
|
||||
assertLeavesSorted(reader2, leafSorter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1319,14 +1295,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
|||
{
|
||||
try (DirectoryReader reader =
|
||||
new AssertingDirectoryReader(DirectoryReader.open(dir, leafSorter))) {
|
||||
List<LeafReader> lrs =
|
||||
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
||||
List<LeafReader> expectedSortedlrs =
|
||||
reader.leaves().stream()
|
||||
.map(LeafReaderContext::reader)
|
||||
.sorted(leafSorter)
|
||||
.collect(toList());
|
||||
assertEquals(expectedSortedlrs, lrs);
|
||||
assertLeavesSorted(reader, leafSorter);
|
||||
|
||||
// add more documents that should be sorted first
|
||||
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
||||
|
@ -1339,13 +1308,32 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
|||
|
||||
// and open again
|
||||
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
||||
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
||||
expectedSortedlrs =
|
||||
reader2.leaves().stream()
|
||||
.map(LeafReaderContext::reader)
|
||||
.sorted(leafSorter)
|
||||
.collect(toList());
|
||||
assertEquals(expectedSortedlrs, lrs);
|
||||
assertLeavesSorted(reader2, leafSorter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test4: test that leafReaders are sorted according to the provided leafSorter when opened from
|
||||
// commit
|
||||
{
|
||||
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
|
||||
IndexCommit latestCommit = commits.get(commits.size() - 1);
|
||||
try (DirectoryReader reader =
|
||||
DirectoryReader.open(latestCommit, Version.MIN_SUPPORTED_MAJOR, leafSorter)) {
|
||||
assertLeavesSorted(reader, leafSorter);
|
||||
|
||||
// add more documents that should be sorted first
|
||||
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
final Document doc = new Document();
|
||||
doc.add(new LongPoint(FIELD_NAME, FIRST_VALUE));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.commit();
|
||||
|
||||
// and open again
|
||||
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
||||
assertLeavesSorted(reader2, leafSorter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1353,4 +1341,18 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
|||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// assert that the leaf readers of the provided directory reader are sorted according to the
|
||||
// provided leafSorter
|
||||
private static void assertLeavesSorted(
|
||||
DirectoryReader reader, Comparator<LeafReader> leafSorter) {
|
||||
List<LeafReader> lrs =
|
||||
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
||||
List<LeafReader> expectedSortedlrs =
|
||||
reader.leaves().stream()
|
||||
.map(LeafReaderContext::reader)
|
||||
.sorted(leafSorter)
|
||||
.collect(toList());
|
||||
assertEquals(expectedSortedlrs, lrs);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue