LUCENE-9823: Prevent unsafe rewrites for SynonymQuery and CombinedFieldQuery. (#160)

Before, rewriting could slightly change the scoring when weights were
specified. We now rewrite less aggressively to avoid changing the query's
behavior.
This commit is contained in:
Naoto MINAMI 2021-06-03 09:28:51 +09:00 committed by GitHub
parent eecd1971fa
commit 89034ad8cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 32 additions and 31 deletions

View File

@ -290,6 +290,9 @@ Bug fixes
ConcurrentSortedSetDocValueFacetCounts now returns null / -1 instead of throwing ConcurrentSortedSetDocValueFacetCounts now returns null / -1 instead of throwing
IllegalArgumentException as per Javadoc spec in Facets. (Alexander Lukyanchikov) IllegalArgumentException as per Javadoc spec in Facets. (Alexander Lukyanchikov)
* LUCENE-9823: Prevent unsafe rewrites for SynonymQuery and CombinedFieldQuery. Before, rewriting
could slightly change the scoring when weights were specified. (Naoto Minami via Julie Tibshirani)
Changes in Backwards Compatibility Policy Changes in Backwards Compatibility Policy
* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top * LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top

View File

@ -144,14 +144,12 @@ public final class SynonymQuery extends Query {
@Override @Override
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
// optimize zero and single term cases // optimize zero and non-boosted single term cases
if (terms.length == 0) { if (terms.length == 0) {
return new BooleanQuery.Builder().build(); return new BooleanQuery.Builder().build();
} }
if (terms.length == 1) { if (terms.length == 1 && terms[0].boost == 1f) {
return terms[0].boost == 1f return new TermQuery(terms[0].term);
? new TermQuery(terms[0].term)
: new BoostQuery(new TermQuery(terms[0].term), terms[0].boost);
} }
return this; return this;
} }

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.ImpactsSource; import org.apache.lucene.index.ImpactsSource;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
@ -466,4 +467,28 @@ public class TestSynonymQuery extends LuceneTestCase {
reader.close(); reader.close();
dir.close(); dir.close();
} }
public void testRewrite() throws IOException {
IndexSearcher searcher = new IndexSearcher(new MultiReader());
// zero length SynonymQuery is rewritten
SynonymQuery q = new SynonymQuery.Builder("f").build();
assertTrue(q.getTerms().isEmpty());
assertEquals(searcher.rewrite(q), new MatchNoDocsQuery());
// non-boosted single term SynonymQuery is rewritten
q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 1f).build();
assertEquals(q.getTerms().size(), 1);
assertEquals(searcher.rewrite(q), new TermQuery(new Term("f")));
// boosted single term SynonymQuery is not rewritten
q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 0.8f).build();
assertEquals(q.getTerms().size(), 1);
assertEquals(searcher.rewrite(q), q);
// multiple term SynonymQuery is not rewritten
q = new SynonymQuery.Builder("f").addTerm(new Term("f"), 1f).addTerm(new Term("f"), 1f).build();
assertEquals(q.getTerms().size(), 2);
assertEquals(searcher.rewrite(q), q);
}
} }

View File

@ -48,7 +48,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermScorer; import org.apache.lucene.search.TermScorer;
import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TermStatistics;
@ -233,18 +232,6 @@ public final class CombinedFieldQuery extends Query implements Accountable {
if (terms.length == 0) { if (terms.length == 0) {
return new BooleanQuery.Builder().build(); return new BooleanQuery.Builder().build();
} }
// single field and one term
if (fieldTerms.length == 1) {
return new TermQuery(fieldTerms[0]);
}
// single field and multiple terms
if (fieldAndWeights.size() == 1) {
SynonymQuery.Builder builder = new SynonymQuery.Builder(fieldTerms[0].field());
for (Term term : fieldTerms) {
builder.addTerm(term);
}
return builder.build();
}
return this; return this;
} }

View File

@ -34,7 +34,6 @@ import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
@ -68,17 +67,6 @@ public class TestCombinedFieldQuery extends LuceneTestCase {
actual = searcher.rewrite(builder.build()); actual = searcher.rewrite(builder.build());
assertEquals(actual, new MatchNoDocsQuery()); assertEquals(actual, new MatchNoDocsQuery());
builder.addTerm(new BytesRef("foo")); builder.addTerm(new BytesRef("foo"));
actual = searcher.rewrite(builder.build());
assertEquals(actual, new TermQuery(new Term("field", "foo")));
builder.addTerm(new BytesRef("bar"));
actual = searcher.rewrite(builder.build());
assertEquals(
actual,
new SynonymQuery.Builder("field")
.addTerm(new Term("field", "foo"))
.addTerm(new Term("field", "bar"))
.build());
builder.addField("another_field", 1f);
Query query = builder.build(); Query query = builder.build();
actual = searcher.rewrite(query); actual = searcher.rewrite(query);
assertEquals(actual, query); assertEquals(actual, query);