mirror of https://github.com/apache/lucene.git
LUCENE-10027 provide leaf sorter from commit (#214)
Provide leaf sorter for directory readers opened from IndexCommit LUCENE-9507 allowed to provide a leaf sorter for directory readers. One API that was missed is to allow to provide a leaf sorter for directory readers opened from an index commit. This patch address this by adding an extra parameter: a custom comparator for sorting leaf readers to the Directory reader open API from indexCommit and minSupportedMajorVersion. Relates to PR #32
This commit is contained in:
parent
56eb76dbaf
commit
1daf7e7c74
|
@ -360,6 +360,10 @@ API Changes
|
||||||
* LUCENE-10036: Replaced the ScoreCachingWrappingScorer ctor with a static factory method that
|
* LUCENE-10036: Replaced the ScoreCachingWrappingScorer ctor with a static factory method that
|
||||||
ensures unnecessary wrapping doesn't occur. (Greg Miller)
|
ensures unnecessary wrapping doesn't occur. (Greg Miller)
|
||||||
|
|
||||||
|
* LUCENE-10027: Directory reader open API from indexCommit and minSupportedMajorVersion has
|
||||||
|
been modified to add an extra parameter: a custom comparator for sorting leaf readers
|
||||||
|
(Mayya Sharipova)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
(No changes)
|
||||||
|
|
|
@ -1071,7 +1071,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
// QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
|
// QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
|
||||||
// Query query = parser.parse("handle:1");
|
// Query query = parser.parse("handle:1");
|
||||||
IndexCommit indexCommit = DirectoryReader.listCommits(dir).get(0);
|
IndexCommit indexCommit = DirectoryReader.listCommits(dir).get(0);
|
||||||
IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion);
|
IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion, null);
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
TestUtil.checkIndex(dir);
|
TestUtil.checkIndex(dir);
|
||||||
|
@ -2076,13 +2076,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
IndexFormatTooOldException ex =
|
IndexFormatTooOldException ex =
|
||||||
expectThrows(
|
expectThrows(
|
||||||
IndexFormatTooOldException.class,
|
IndexFormatTooOldException.class,
|
||||||
() -> StandardDirectoryReader.open(commit, Version.LATEST.major));
|
() -> StandardDirectoryReader.open(commit, Version.LATEST.major, null));
|
||||||
assertTrue(
|
assertTrue(
|
||||||
ex.getMessage()
|
ex.getMessage()
|
||||||
.contains(
|
.contains(
|
||||||
"only supports reading from version " + Version.LATEST.major + " upwards."));
|
"only supports reading from version " + Version.LATEST.major + " upwards."));
|
||||||
// now open with allowed min version
|
// now open with allowed min version
|
||||||
StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR).close();
|
StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR, null).close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2092,7 +2092,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir);
|
TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir);
|
||||||
try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) {
|
try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) {
|
||||||
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
||||||
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR).close();
|
StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,12 +132,19 @@ public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> {
|
||||||
*
|
*
|
||||||
* @param commit the commit point to open
|
* @param commit the commit point to open
|
||||||
* @param minSupportedMajorVersion the minimum supported major index version
|
* @param minSupportedMajorVersion the minimum supported major index version
|
||||||
|
* @param leafSorter a comparator for sorting leaf readers. Providing leafSorter is useful for
|
||||||
|
* indices on which it is expected to run many queries with particular sort criteria (e.g. for
|
||||||
|
* time-based indices, this is usually a descending sort on timestamp). In this case {@code
|
||||||
|
* leafSorter} should sort leaves according to this sort criteria. Providing leafSorter allows
|
||||||
|
* to speed up this particular type of sort queries by early terminating while iterating
|
||||||
|
* through segments and segments' documents
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
public static DirectoryReader open(final IndexCommit commit, int minSupportedMajorVersion)
|
public static DirectoryReader open(
|
||||||
|
final IndexCommit commit, int minSupportedMajorVersion, Comparator<LeafReader> leafSorter)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return StandardDirectoryReader.open(
|
return StandardDirectoryReader.open(
|
||||||
commit.getDirectory(), minSupportedMajorVersion, commit, null);
|
commit.getDirectory(), minSupportedMajorVersion, commit, leafSorter);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1107,8 +1107,8 @@ public class TestDirectoryReader extends LuceneTestCase {
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
writer.commit();
|
writer.commit();
|
||||||
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
|
||||||
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1));
|
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1, null));
|
||||||
DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1)).close();
|
DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1), null).close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.ThreadInterruptedException;
|
import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@SuppressCodecs("SimpleText") // too slow here
|
@SuppressCodecs("SimpleText") // too slow here
|
||||||
|
@ -1249,14 +1250,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
// Test1: test that leafReaders are sorted according to leafSorter provided in IndexWriterConfig
|
// Test1: test that leafReaders are sorted according to leafSorter provided in IndexWriterConfig
|
||||||
{
|
{
|
||||||
try (DirectoryReader reader = writer.getReader()) {
|
try (DirectoryReader reader = writer.getReader()) {
|
||||||
List<LeafReader> lrs =
|
assertLeavesSorted(reader, leafSorter);
|
||||||
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
|
||||||
List<LeafReader> expectedSortedlrs =
|
|
||||||
reader.leaves().stream()
|
|
||||||
.map(LeafReaderContext::reader)
|
|
||||||
.sorted(leafSorter)
|
|
||||||
.collect(toList());
|
|
||||||
assertEquals(expectedSortedlrs, lrs);
|
|
||||||
|
|
||||||
// add more documents that should be sorted first
|
// add more documents that should be sorted first
|
||||||
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
||||||
|
@ -1269,28 +1263,16 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
|
|
||||||
// and open again
|
// and open again
|
||||||
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
||||||
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
assertLeavesSorted(reader2, leafSorter);
|
||||||
expectedSortedlrs =
|
|
||||||
reader2.leaves().stream()
|
|
||||||
.map(LeafReaderContext::reader)
|
|
||||||
.sorted(leafSorter)
|
|
||||||
.collect(toList());
|
|
||||||
assertEquals(expectedSortedlrs, lrs);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test2: test that leafReaders are sorted according to leafSorter provided in DirectoryReader
|
// Test2: test that leafReaders are sorted according to the provided leafSorter when opened from
|
||||||
|
// directory
|
||||||
{
|
{
|
||||||
try (DirectoryReader reader = DirectoryReader.open(dir, leafSorter)) {
|
try (DirectoryReader reader = DirectoryReader.open(dir, leafSorter)) {
|
||||||
List<LeafReader> lrs =
|
assertLeavesSorted(reader, leafSorter);
|
||||||
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
|
||||||
List<LeafReader> expectedSortedlrs =
|
|
||||||
reader.leaves().stream()
|
|
||||||
.map(LeafReaderContext::reader)
|
|
||||||
.sorted(leafSorter)
|
|
||||||
.collect(toList());
|
|
||||||
assertEquals(expectedSortedlrs, lrs);
|
|
||||||
|
|
||||||
// add more documents that should be sorted first
|
// add more documents that should be sorted first
|
||||||
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
||||||
|
@ -1303,13 +1285,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
|
|
||||||
// and open again
|
// and open again
|
||||||
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
||||||
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
assertLeavesSorted(reader2, leafSorter);
|
||||||
expectedSortedlrs =
|
|
||||||
reader2.leaves().stream()
|
|
||||||
.map(LeafReaderContext::reader)
|
|
||||||
.sorted(leafSorter)
|
|
||||||
.collect(toList());
|
|
||||||
assertEquals(expectedSortedlrs, lrs);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1319,14 +1295,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
{
|
{
|
||||||
try (DirectoryReader reader =
|
try (DirectoryReader reader =
|
||||||
new AssertingDirectoryReader(DirectoryReader.open(dir, leafSorter))) {
|
new AssertingDirectoryReader(DirectoryReader.open(dir, leafSorter))) {
|
||||||
List<LeafReader> lrs =
|
assertLeavesSorted(reader, leafSorter);
|
||||||
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
|
||||||
List<LeafReader> expectedSortedlrs =
|
|
||||||
reader.leaves().stream()
|
|
||||||
.map(LeafReaderContext::reader)
|
|
||||||
.sorted(leafSorter)
|
|
||||||
.collect(toList());
|
|
||||||
assertEquals(expectedSortedlrs, lrs);
|
|
||||||
|
|
||||||
// add more documents that should be sorted first
|
// add more documents that should be sorted first
|
||||||
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
||||||
|
@ -1339,13 +1308,32 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
|
|
||||||
// and open again
|
// and open again
|
||||||
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
||||||
lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
assertLeavesSorted(reader2, leafSorter);
|
||||||
expectedSortedlrs =
|
}
|
||||||
reader2.leaves().stream()
|
}
|
||||||
.map(LeafReaderContext::reader)
|
}
|
||||||
.sorted(leafSorter)
|
|
||||||
.collect(toList());
|
// Test4: test that leafReaders are sorted according to the provided leafSorter when opened from
|
||||||
assertEquals(expectedSortedlrs, lrs);
|
// commit
|
||||||
|
{
|
||||||
|
List<IndexCommit> commits = DirectoryReader.listCommits(dir);
|
||||||
|
IndexCommit latestCommit = commits.get(commits.size() - 1);
|
||||||
|
try (DirectoryReader reader =
|
||||||
|
DirectoryReader.open(latestCommit, Version.MIN_SUPPORTED_MAJOR, leafSorter)) {
|
||||||
|
assertLeavesSorted(reader, leafSorter);
|
||||||
|
|
||||||
|
// add more documents that should be sorted first
|
||||||
|
final long FIRST_VALUE = ASC_SORT ? 0 : 100;
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
final Document doc = new Document();
|
||||||
|
doc.add(new LongPoint(FIELD_NAME, FIRST_VALUE));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
// and open again
|
||||||
|
try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
|
||||||
|
assertLeavesSorted(reader2, leafSorter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1353,4 +1341,18 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
||||||
writer.close();
|
writer.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// assert that the leaf readers of the provided directory reader are sorted according to the
|
||||||
|
// provided leafSorter
|
||||||
|
private static void assertLeavesSorted(
|
||||||
|
DirectoryReader reader, Comparator<LeafReader> leafSorter) {
|
||||||
|
List<LeafReader> lrs =
|
||||||
|
reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
|
||||||
|
List<LeafReader> expectedSortedlrs =
|
||||||
|
reader.leaves().stream()
|
||||||
|
.map(LeafReaderContext::reader)
|
||||||
|
.sorted(leafSorter)
|
||||||
|
.collect(toList());
|
||||||
|
assertEquals(expectedSortedlrs, lrs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue