From 4947c0f746e9b5074ab78036e935cecab1d52a9c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 4 Dec 2024 15:12:04 +0100 Subject: [PATCH] Improve search equivalence tests. (#14036) This addresses an existing TODO about giving terms a zipfian distribution, and disables query caching to make sure that two-phase iterators are properly tested. --- .../tests/search/SearchEquivalenceTestBase.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/search/SearchEquivalenceTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/search/SearchEquivalenceTestBase.java index 63a47895141..8831a3fcc7a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/search/SearchEquivalenceTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/search/SearchEquivalenceTestBase.java @@ -94,7 +94,11 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase { reader = iw.getReader(); s1 = newSearcher(reader); + // Disable the query cache, which converts two-phase iterators to normal iterators, while we + // want to make sure two-phase iterators are exercised. + s1.setQueryCache(null); s2 = newSearcher(reader); + s2.setQueryCache(null); iw.close(); } @@ -114,7 +118,6 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase { * tokenization can be assumed to be on whitespace. */ static String randomFieldContents() { - // TODO: zipf-like distribution StringBuilder sb = new StringBuilder(); int numTerms = random().nextInt(15); for (int i = 0; i < numTerms; i++) { @@ -128,7 +131,13 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase { /** returns random character (a-z) */ static char randomChar() { - return (char) TestUtil.nextInt(random(), 'a', 'z'); + char c = (char) TestUtil.nextInt(random(), 'a', 'z'); + if (random().nextBoolean()) { + // bias towards earlier chars, so that chars have a ~ zipfian distribution with earlier chars + // having a higher frequency + c = (char) TestUtil.nextInt(random(), 'a', c); + } + return c; } /** returns a term suitable for searching. terms are single characters in lowercase (a-z) */