From cd61a926fb0787823360d22c0a9accd9e4579ceb Mon Sep 17 00:00:00 2001 From: Christophe Bismuth Date: Mon, 12 Nov 2018 11:00:20 +0100 Subject: [PATCH] LUCENE-8463: TopFieldCollector can now early-terminates queries when sorting by SortField.DOC. Signed-off-by: Jim Ferenczi --- lucene/CHANGES.txt | 5 +++ .../lucene/search/TopFieldCollector.java | 28 ++++++++---- ...TestTopFieldCollectorEarlyTermination.java | 43 +++++++++++++++++-- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 611cb20226f..ca87bfe278d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -212,6 +212,11 @@ New Features IndexWriterConfig#setIndexCreatedVersionMajor. This is an expert feature. (Adrien Grand) +Improvements: + +* LUCENE-8463: TopFieldCollector can now early-terminates queries when sorting by SortField.DOC. + (Christophe Bismuth via Jim Ferenczi) + ======================= Lucene 7.6.0 ======================= Build diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index ff2307c09f5..f3a2a3bb2f3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -68,13 +68,27 @@ public abstract class TopFieldCollector extends TopDocsCollector { } static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) { + return canEarlyTerminateOnDocId(searchSort) || + canEarlyTerminateOnPrefix(searchSort, indexSort); + } + + private static boolean canEarlyTerminateOnDocId(Sort searchSort) { final SortField[] fields1 = searchSort.getSort(); - final SortField[] fields2 = indexSort.getSort(); - // early termination is possible if fields1 is a prefix of fields2 - if (fields1.length > fields2.length) { + return SortField.FIELD_DOC.equals(fields1[0]); + } + + private static boolean canEarlyTerminateOnPrefix(Sort searchSort, Sort indexSort) { + if (indexSort != null) { + final SortField[] fields1 = searchSort.getSort(); + final SortField[] fields2 = indexSort.getSort(); + // early termination is possible if fields1 is a prefix of fields2 + if (fields1.length > fields2.length) { + return false; + } + return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length)); + } else { return false; } - return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length)); } /* @@ -99,8 +113,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { final LeafFieldComparator[] comparators = queue.getComparators(context); final int[] reverseMul = queue.getReverseMul(); final Sort indexSort = context.reader().getMetaData().getSort(); - final boolean canEarlyTerminate = indexSort != null && - canEarlyTerminate(sort, indexSort); + final boolean canEarlyTerminate = canEarlyTerminate(sort, indexSort); return new MultiComparatorLeafCollector(comparators, reverseMul) { @@ -192,8 +205,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { docBase = context.docBase; final int afterDoc = after.doc - docBase; final Sort indexSort = context.reader().getMetaData().getSort(); - final boolean canEarlyTerminate = indexSort != null && - canEarlyTerminate(sort, indexSort); + final boolean canEarlyTerminate = canEarlyTerminate(sort, indexSort); return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul()) { boolean collectedAllCompetitiveHits = false; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollectorEarlyTermination.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollectorEarlyTermination.java index b6d33dad390..a92a100e058 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollectorEarlyTermination.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollectorEarlyTermination.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Random; import java.util.Set; +import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; @@ -39,8 +40,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; -import com.carrotsearch.randomizedtesting.generators.RandomPicks; - public class TestTopFieldCollectorEarlyTermination extends LuceneTestCase { private int numDocs; @@ -167,7 +166,41 @@ public class TestTopFieldCollectorEarlyTermination extends LuceneTestCase { } } - public void testCanEarlyTerminate() { + public void testCanEarlyTerminateOnDocId() { + assertTrue(TopFieldCollector.canEarlyTerminate( + new Sort(SortField.FIELD_DOC), + new Sort(SortField.FIELD_DOC))); + + assertTrue(TopFieldCollector.canEarlyTerminate( + new Sort(SortField.FIELD_DOC), + null)); + + assertFalse(TopFieldCollector.canEarlyTerminate( + new Sort(new SortField("a", SortField.Type.LONG)), + null)); + + assertFalse(TopFieldCollector.canEarlyTerminate( + new Sort(new SortField("a", SortField.Type.LONG)), + new Sort(new SortField("b", SortField.Type.LONG)))); + + assertTrue(TopFieldCollector.canEarlyTerminate( + new Sort(SortField.FIELD_DOC), + new Sort(new SortField("b", SortField.Type.LONG)))); + + assertTrue(TopFieldCollector.canEarlyTerminate( + new Sort(SortField.FIELD_DOC), + new Sort(new SortField("b", SortField.Type.LONG), SortField.FIELD_DOC))); + + assertFalse(TopFieldCollector.canEarlyTerminate( + new Sort(new SortField("a", SortField.Type.LONG)), + new Sort(SortField.FIELD_DOC))); + + assertFalse(TopFieldCollector.canEarlyTerminate( + new Sort(new SortField("a", SortField.Type.LONG), SortField.FIELD_DOC), + new Sort(SortField.FIELD_DOC))); + } + + public void testCanEarlyTerminateOnPrefix() { assertTrue(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG)), new Sort(new SortField("a", SortField.Type.LONG)))); @@ -180,6 +213,10 @@ public class TestTopFieldCollectorEarlyTermination extends LuceneTestCase { new Sort(new SortField("a", SortField.Type.LONG)), new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); + assertFalse(TopFieldCollector.canEarlyTerminate( + new Sort(new SortField("a", SortField.Type.LONG, true)), + null)); + assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG, true)), new Sort(new SortField("a", SortField.Type.LONG, false))));