From 89034ad8cf8019c62a0a4ed1e477cd52e1277e60 Mon Sep 17 00:00:00 2001 From: Naoto MINAMI Date: Thu, 3 Jun 2021 09:28:51 +0900 Subject: [PATCH] LUCENE-9823: Prevent unsafe rewrites for SynonymQuery and CombinedFieldQuery. (#160) Before, rewriting could slightly change the scoring when weights were specified. We now rewrite less aggressively to avoid changing the query's behavior. --- lucene/CHANGES.txt | 5 +++- .../apache/lucene/search/SynonymQuery.java | 8 +++--- .../lucene/search/TestSynonymQuery.java | 25 +++++++++++++++++++ .../sandbox/search/CombinedFieldQuery.java | 13 ---------- .../search/TestCombinedFieldQuery.java | 12 --------- 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6c809cbb842..26863cb1f06 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -290,6 +290,9 @@ Bug fixes ConcurrentSortedSetDocValueFacetCounts now returns null / -1 instead of throwing IllegalArgumentException as per Javadoc spec in Facets. (Alexander Lukyanchikov) +* LUCENE-9823: Prevent unsafe rewrites for SynonymQuery and CombinedFieldQuery. Before, rewriting + could slightly change the scoring when weights were specified. (Naoto Minami via Julie Tibshirani) + Changes in Backwards Compatibility Policy * LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top @@ -348,7 +351,7 @@ Other the existing ones to the backwards codecs. (Julie Tibshirani, Ignacio Vera) * LUCENE-9907: Remove dependency on PackedInts#getReader() from the current codecs and move the - method to backwards codec. (Ignacio Vera) + method to backwards codec. (Ignacio Vera) ======================= Lucene 8.9.0 ======================= diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index 8b564bd2701..2e3c9ab1e6a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -144,14 +144,12 @@ public final class SynonymQuery extends Query { @Override public Query rewrite(IndexReader reader) throws IOException { - // optimize zero and single term cases + // optimize zero and non-boosted single term cases if (terms.length == 0) { return new BooleanQuery.Builder().build(); } - if (terms.length == 1) { - return terms[0].boost == 1f - ? new TermQuery(terms[0].term) - : new BoostQuery(new TermQuery(terms[0].term), terms[0].boost); + if (terms.length == 1 && terms[0].boost == 1f) { + return new TermQuery(terms[0].term); } return this; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java index d71956efb90..1772e248cda 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.ImpactsSource; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; @@ -466,4 +467,28 @@ public class TestSynonymQuery extends LuceneTestCase { reader.close(); dir.close(); } + + public void testRewrite() throws IOException { + IndexSearcher searcher = new IndexSearcher(new MultiReader()); + + // zero length SynonymQuery is rewritten + SynonymQuery q = new SynonymQuery.Builder("f").build(); + assertTrue(q.getTerms().isEmpty()); + assertEquals(searcher.rewrite(q), new MatchNoDocsQuery()); + + // non-boosted single term SynonymQuery is rewritten + q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 1f).build(); + assertEquals(q.getTerms().size(), 1); + assertEquals(searcher.rewrite(q), new TermQuery(new Term("f"))); + + // boosted single term SynonymQuery is not rewritten + q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 0.8f).build(); + assertEquals(q.getTerms().size(), 1); + assertEquals(searcher.rewrite(q), q); + + // multiple term SynonymQuery is not rewritten + q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 1f).addTerm(new Term("f"), 1f).build(); + assertEquals(q.getTerms().size(), 2); + assertEquals(searcher.rewrite(q), q); + } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java index 1c79657c9b0..936fc7b07b8 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java @@ -48,7 +48,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermScorer; import org.apache.lucene.search.TermStatistics; @@ -233,18 +232,6 @@ public final class CombinedFieldQuery extends Query implements Accountable { if (terms.length == 0) { return new BooleanQuery.Builder().build(); } - // single field and one term - if (fieldTerms.length == 1) { - return new TermQuery(fieldTerms[0]); - } - // single field and multiple terms - if (fieldAndWeights.size() == 1) { - SynonymQuery.Builder builder = new SynonymQuery.Builder(fieldTerms[0].field()); - for (Term term : fieldTerms) { - builder.addTerm(term); - } - return builder.build(); - } return this; } diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java index f3c0044c6b2..bb9fe6f2440 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java @@ -34,7 +34,6 @@ import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TopDocs; @@ -68,17 +67,6 @@ public class TestCombinedFieldQuery extends LuceneTestCase { actual = searcher.rewrite(builder.build()); assertEquals(actual, new MatchNoDocsQuery()); builder.addTerm(new BytesRef("foo")); - actual = searcher.rewrite(builder.build()); - assertEquals(actual, new TermQuery(new Term("field", "foo"))); - builder.addTerm(new BytesRef("bar")); - actual = searcher.rewrite(builder.build()); - assertEquals( - actual, - new SynonymQuery.Builder("field") - .addTerm(new Term("field", "foo")) - .addTerm(new Term("field", "bar")) - .build()); - builder.addField("another_field", 1f); Query query = builder.build(); actual = searcher.rewrite(query); assertEquals(actual, query);