LUCENE-9823: Prevent unsafe rewrites for SynonymQuery and CombinedFieldQuery. (#160)

Before, rewriting could slightly change the scoring when weights were
specified. We now rewrite less aggressively to avoid changing the query's
behavior.
This commit is contained in:
Naoto MINAMI 2021-06-03 09:28:51 +09:00 committed by GitHub
parent eecd1971fa
commit 89034ad8cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 32 additions and 31 deletions

View File

@ -290,6 +290,9 @@ Bug fixes
ConcurrentSortedSetDocValueFacetCounts now returns null / -1 instead of throwing
IllegalArgumentException as per Javadoc spec in Facets. (Alexander Lukyanchikov)
* LUCENE-9823: Prevent unsafe rewrites for SynonymQuery and CombinedFieldQuery. Before, rewriting
could slightly change the scoring when weights were specified. (Naoto Minami via Julie Tibshirani)
Changes in Backwards Compatibility Policy
* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top

View File

@ -144,14 +144,12 @@ public final class SynonymQuery extends Query {
@Override
public Query rewrite(IndexReader reader) throws IOException {
// optimize zero and single term cases
// optimize zero and non-boosted single term cases
if (terms.length == 0) {
return new BooleanQuery.Builder().build();
}
if (terms.length == 1) {
return terms[0].boost == 1f
? new TermQuery(terms[0].term)
: new BoostQuery(new TermQuery(terms[0].term), terms[0].boost);
if (terms.length == 1 && terms[0].boost == 1f) {
return new TermQuery(terms[0].term);
}
return this;
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.ImpactsSource;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@ -466,4 +467,28 @@ public class TestSynonymQuery extends LuceneTestCase {
reader.close();
dir.close();
}
public void testRewrite() throws IOException {
IndexSearcher searcher = new IndexSearcher(new MultiReader());
// zero length SynonymQuery is rewritten
SynonymQuery q = new SynonymQuery.Builder("f").build();
assertTrue(q.getTerms().isEmpty());
assertEquals(searcher.rewrite(q), new MatchNoDocsQuery());
// non-boosted single term SynonymQuery is rewritten
q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 1f).build();
assertEquals(q.getTerms().size(), 1);
assertEquals(searcher.rewrite(q), new TermQuery(new Term("f")));
// boosted single term SynonymQuery is not rewritten
q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 0.8f).build();
assertEquals(q.getTerms().size(), 1);
assertEquals(searcher.rewrite(q), q);
// multiple term SynonymQuery is not rewritten
q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 1f).addTerm(new Term("f"), 1f).build();
assertEquals(q.getTerms().size(), 2);
assertEquals(searcher.rewrite(q), q);
}
}

View File

@ -48,7 +48,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermScorer;
import org.apache.lucene.search.TermStatistics;
@ -233,18 +232,6 @@ public final class CombinedFieldQuery extends Query implements Accountable {
if (terms.length == 0) {
return new BooleanQuery.Builder().build();
}
// single field and one term
if (fieldTerms.length == 1) {
return new TermQuery(fieldTerms[0]);
}
// single field and multiple terms
if (fieldAndWeights.size() == 1) {
SynonymQuery.Builder builder = new SynonymQuery.Builder(fieldTerms[0].field());
for (Term term : fieldTerms) {
builder.addTerm(term);
}
return builder.build();
}
return this;
}

View File

@ -34,7 +34,6 @@ import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.TopDocs;
@ -68,17 +67,6 @@ public class TestCombinedFieldQuery extends LuceneTestCase {
actual = searcher.rewrite(builder.build());
assertEquals(actual, new MatchNoDocsQuery());
builder.addTerm(new BytesRef("foo"));
actual = searcher.rewrite(builder.build());
assertEquals(actual, new TermQuery(new Term("field", "foo")));
builder.addTerm(new BytesRef("bar"));
actual = searcher.rewrite(builder.build());
assertEquals(
actual,
new SynonymQuery.Builder("field")
.addTerm(new Term("field", "foo"))
.addTerm(new Term("field", "bar"))
.build());
builder.addField("another_field", 1f);
Query query = builder.build();
actual = searcher.rewrite(query);
assertEquals(actual, query);