LUCENE-5264: CommonTermsQuery ignores minMustMatch if only high freq terms are present

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1530651 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2013-10-09 14:47:55 +00:00
parent cd6c2268da
commit f283429ee1
3 changed files with 39 additions and 9 deletions

View File

@ -120,6 +120,10 @@ Bug Fixes
descriptor exhaustion, hit at unlucky times inside IndexWriter could
lead to silently losing deletions. (Shai Erera, Mike McCandless)
* LUCENE-5264: CommonTermsQuery ignored minMustMatch if only high-frequent
terms were present in the query and the high-frequent operator was set
to SHOULD. (Simon Willnauer)
API Changes:
* LUCENE-5222: Add SortField.needsScores(). Previously it was not possible

View File

@ -214,18 +214,13 @@ public class CommonTermsQuery extends Query {
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
* prevent slow queries.
*/
if (highFreqOccur == Occur.MUST) {
highFreq.setBoost(getBoost());
return highFreq;
} else {
BooleanQuery highFreqConjunction = new BooleanQuery();
if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
for (BooleanClause booleanClause : highFreq) {
highFreqConjunction.add(booleanClause.getQuery(), Occur.MUST);
booleanClause.setOccur(Occur.MUST);
}
highFreqConjunction.setBoost(getBoost());
return highFreqConjunction;
}
highFreq.setBoost(getBoost());
return highFreq;
} else if (highFreq.clauses().isEmpty()) {
// only do low freq terms - we don't have high freq terms
lowFreq.setBoost(getBoost());

View File

@ -283,6 +283,37 @@ public class CommonTermsQueryTest extends LuceneTestCase {
r.document(search.scoreDocs[1].doc).get("id"),
r.document(search.scoreDocs[2].doc).get("id"))));
}
{
// only high freq terms around - check that min should match is applied
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "the"));
query.setLowFreqMinimumNumberShouldMatch(1.0f);
query.setHighFreqMinimumNumberShouldMatch(2.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 4);
}
{
// only high freq terms around - check that min should match is applied
CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "the"));
query.setLowFreqMinimumNumberShouldMatch(1.0f);
query.setHighFreqMinimumNumberShouldMatch(2.0f);
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 2);
assertEquals(
new HashSet<>(Arrays.asList("0", "2")),
new HashSet<>(Arrays.asList(
r.document(search.scoreDocs[0].doc).get("id"),
r.document(search.scoreDocs[1].doc).get("id"))));
}
r.close();
w.close();
dir.close();