mirror of https://github.com/apache/lucene.git
LUCENE-4727: use float as minShouldMatch on CommonTermsQuery
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1439449 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
765d3ef36c
commit
ce7be4dc65
|
@ -74,7 +74,7 @@ public class CommonTermsQuery extends Query {
|
|||
protected final Occur highFreqOccur;
|
||||
protected float lowFreqBoost = 1.0f;
|
||||
protected float highFreqBoost = 1.0f;
|
||||
protected int minNrShouldMatch = 0;
|
||||
protected float minNrShouldMatch = 0;
|
||||
|
||||
/**
|
||||
* Creates a new {@link CommonTermsQuery}
|
||||
|
@ -84,7 +84,7 @@ public class CommonTermsQuery extends Query {
|
|||
* @param lowFreqOccur
|
||||
* {@link Occur} used for low frequency terms
|
||||
* @param maxTermFrequency
|
||||
* a value in [0..1] (or absolute number >=1) representing the
|
||||
* a value in [0..1) (or absolute number >=1) representing the
|
||||
* maximum threshold of a terms document frequency to be considered a
|
||||
* low frequency term.
|
||||
* @throws IllegalArgumentException
|
||||
|
@ -104,7 +104,7 @@ public class CommonTermsQuery extends Query {
|
|||
* @param lowFreqOccur
|
||||
* {@link Occur} used for low frequency terms
|
||||
* @param maxTermFrequency
|
||||
* a value in [0..1] (or absolute number >=1) representing the
|
||||
* a value in [0..1) (or absolute number >=1) representing the
|
||||
* maximum threshold of a terms document frequency to be considered a
|
||||
* low frequency term.
|
||||
* @param disableCoord
|
||||
|
@ -160,15 +160,19 @@ public class CommonTermsQuery extends Query {
|
|||
return buildQuery(maxDoc, contextArray, queryTerms);
|
||||
}
|
||||
|
||||
protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
|
||||
if (minNrShouldMatch >= 1.0f || minNrShouldMatch == 0.0f) {
|
||||
return (int) minNrShouldMatch;
|
||||
}
|
||||
return (int) (Math.round(minNrShouldMatch * numOptional));
|
||||
}
|
||||
|
||||
protected Query buildQuery(final int maxDoc,
|
||||
final TermContext[] contextArray, final Term[] queryTerms) {
|
||||
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
|
||||
BooleanQuery highFreq = new BooleanQuery(disableCoord);
|
||||
highFreq.setBoost(highFreqBoost);
|
||||
lowFreq.setBoost(lowFreqBoost);
|
||||
if (lowFreqOccur == Occur.SHOULD) {
|
||||
lowFreq.setMinimumNumberShouldMatch(minNrShouldMatch);
|
||||
}
|
||||
BooleanQuery query = new BooleanQuery(true);
|
||||
for (int i = 0; i < queryTerms.length; i++) {
|
||||
TermContext termContext = contextArray[i];
|
||||
|
@ -186,6 +190,11 @@ public class CommonTermsQuery extends Query {
|
|||
}
|
||||
|
||||
}
|
||||
final int numLowFreqClauses = lowFreq.clauses().size();
|
||||
if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
|
||||
int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
|
||||
lowFreq.setMinimumNumberShouldMatch(minMustMatch);
|
||||
}
|
||||
if (lowFreq.clauses().isEmpty()) {
|
||||
/*
|
||||
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
|
||||
|
@ -265,7 +274,9 @@ public class CommonTermsQuery extends Query {
|
|||
/**
|
||||
* Specifies a minimum number of the optional BooleanClauses which must be
|
||||
* satisfied in order to produce a match on the low frequency terms query
|
||||
* part.
|
||||
* part. This method accepts a float value in the range [0..1) as a fraction
|
||||
* of the actual query terms in the low frequent clause or a number
|
||||
* <tt>>=1</tt> as an absolut number of clauses that need to match.
|
||||
*
|
||||
* <p>
|
||||
* By default no optional clauses are necessary for a match (unless there are
|
||||
|
@ -276,7 +287,7 @@ public class CommonTermsQuery extends Query {
|
|||
* @param min
|
||||
* the number of optional clauses that must match
|
||||
*/
|
||||
public void setMinimumNumberShouldMatch(int min) {
|
||||
public void setMinimumNumberShouldMatch(float min) {
|
||||
this.minNrShouldMatch = min;
|
||||
}
|
||||
|
||||
|
@ -284,7 +295,7 @@ public class CommonTermsQuery extends Query {
|
|||
* Gets the minimum number of the optional BooleanClauses which must be
|
||||
* satisfied.
|
||||
*/
|
||||
public int getMinimumNumberShouldMatch() {
|
||||
public float getMinimumNumberShouldMatch() {
|
||||
return minNrShouldMatch;
|
||||
}
|
||||
|
||||
|
@ -332,7 +343,7 @@ public class CommonTermsQuery extends Query {
|
|||
result = prime * result
|
||||
+ ((lowFreqOccur == null) ? 0 : lowFreqOccur.hashCode());
|
||||
result = prime * result + Float.floatToIntBits(maxTermFrequency);
|
||||
result = prime * result + minNrShouldMatch;
|
||||
result = prime * result + Float.floatToIntBits(minNrShouldMatch);
|
||||
result = prime * result + ((terms == null) ? 0 : terms.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -175,6 +175,90 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testMinShouldMatch() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
String[] docs = new String[] {"this is the end of the world right",
|
||||
"is this it or maybe not",
|
||||
"this is the end of the universe as we know it",
|
||||
"there is the famous restaurant at the end of the universe",};
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", "" + i, Field.Store.YES));
|
||||
doc.add(newTextField("field", docs[i], Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
IndexSearcher s = newSearcher(r);
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
query.setMinimumNumberShouldMatch(0.5f);
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 1);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
}
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
query.setMinimumNumberShouldMatch(2.0f);
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 1);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
}
|
||||
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
query.setMinimumNumberShouldMatch(0.49f);
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 3);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
|
||||
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
|
||||
}
|
||||
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
query.setMinimumNumberShouldMatch(1.0f);
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 3);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
|
||||
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testIllegalOccur() {
|
||||
Random random = random();
|
||||
|
||||
|
|
Loading…
Reference in New Issue