Bump the window size of disjunction from 2,048 to 4,096. (#13605)

It's been pointed multiple times that a difference between Tantivy and Lucene
is the fact that Tantivy uses windows of 4,096 docs when Lucene has a 2x
smaller window size of 2,048 docs and that this might explain part of the
performance difference. luceneutil suggests that bumping the window size to
4,096 does indeed improve performance for counting queries, but not for top-k
queries. I'm still suggesting to bump the window size across the board to keep
our disjunction scorer consistent.
This commit is contained in:
Adrien Grand 2024-07-25 15:38:21 +02:00 committed by GitHub
parent b4fb425c43
commit 8d4f7a6e99
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 15 additions and 15 deletions

View File

@ -30,7 +30,7 @@ import org.apache.lucene.util.PriorityQueue;
*/
final class BooleanScorer extends BulkScorer {
static final int SHIFT = 11;
static final int SHIFT = 12;
static final int SIZE = 1 << SHIFT;
static final int MASK = SIZE - 1;
static final int SET_SIZE = 1 << (SHIFT - 6);

View File

@ -25,7 +25,7 @@ import org.apache.lucene.util.MathUtil;
final class MaxScoreBulkScorer extends BulkScorer {
static final int INNER_WINDOW_SIZE = 1 << 11;
static final int INNER_WINDOW_SIZE = 1 << 12;
private final int maxDoc;
// All scorers, sorted by increasing max score.

View File

@ -125,19 +125,19 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
assertEquals(2 + 1, scorer.score(), 0);
break;
case 1:
assertEquals(2048, doc);
assertEquals(4096, doc);
assertEquals(2, scorer.score(), 0);
break;
case 2:
assertEquals(6144, doc);
assertEquals(12288, doc);
assertEquals(2 + 1, scorer.score(), 0);
break;
case 3:
assertEquals(8192, doc);
assertEquals(16384, doc);
assertEquals(1, scorer.score(), 0);
break;
case 4:
assertEquals(10240, doc);
assertEquals(20480, doc);
assertEquals(1, scorer.score(), 0);
break;
default:
@ -195,13 +195,13 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
assertEquals(2 + 1, scorer.score(), 0);
break;
case 1:
assertEquals(2048, doc);
assertEquals(4096, doc);
assertEquals(2, scorer.score(), 0);
// simulate top-2 retrieval
scorer.setMinCompetitiveScore(Math.nextUp(2));
break;
case 2:
assertEquals(6144, doc);
assertEquals(12288, doc);
assertEquals(2 + 1, scorer.score(), 0);
scorer.setMinCompetitiveScore(Math.nextUp(2 + 1));
break;
@ -268,19 +268,19 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
assertEquals(2 + 1, scorer.score(), 0);
break;
case 1:
assertEquals(2048, doc);
assertEquals(4096, doc);
assertEquals(2, scorer.score(), 0);
break;
case 2:
assertEquals(6144, doc);
assertEquals(12288, doc);
assertEquals(2 + 1 + 3, scorer.score(), 0);
break;
case 3:
assertEquals(8192, doc);
assertEquals(16384, doc);
assertEquals(1, scorer.score(), 0);
break;
case 4:
assertEquals(10240, doc);
assertEquals(20480, doc);
assertEquals(1 + 3, scorer.score(), 0);
break;
default:
@ -346,18 +346,18 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
assertEquals(2 + 1, scorer.score(), 0);
break;
case 1:
assertEquals(2048, doc);
assertEquals(4096, doc);
assertEquals(2, scorer.score(), 0);
// simulate top-2 retrieval
scorer.setMinCompetitiveScore(Math.nextUp(2));
break;
case 2:
assertEquals(6144, doc);
assertEquals(12288, doc);
assertEquals(2 + 1 + 3, scorer.score(), 0);
scorer.setMinCompetitiveScore(Math.nextUp(2 + 1));
break;
case 3:
assertEquals(10240, doc);
assertEquals(20480, doc);
assertEquals(1 + 3, scorer.score(), 0);
scorer.setMinCompetitiveScore(Math.nextUp(1 + 3));
break;