mirror of https://github.com/apache/lucene.git
LUCENE-5478: CommonTermsQuery now allows to create custom term queries
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1572613 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
33b8e75a42
commit
7877ebeb63
|
@ -73,6 +73,10 @@ New Features
|
|||
* LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
|
||||
on multi-valued field. (Robert Muir)
|
||||
|
||||
* LUCENE-5478: CommonTermsQuery now allows to create custom term queries
|
||||
similar to the query parser by overriding a newTermQuery method.
|
||||
(Simon Willnauer)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
|
||||
|
|
|
@ -16,11 +16,6 @@ package org.apache.lucene.queries;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -30,12 +25,17 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A query that executes high-frequency terms in a optional sub-query to prevent
|
||||
* slow queries due to "common" terms like stopwords. This query
|
||||
|
@ -149,7 +149,7 @@ public class CommonTermsQuery extends Query {
|
|||
if (this.terms.isEmpty()) {
|
||||
return new BooleanQuery();
|
||||
} else if (this.terms.size() == 1) {
|
||||
final TermQuery tq = new TermQuery(this.terms.get(0));
|
||||
final Query tq = newTermQuery(this.terms.get(0), null);
|
||||
tq.setBoost(getBoost());
|
||||
return tq;
|
||||
}
|
||||
|
@ -186,15 +186,15 @@ public class CommonTermsQuery extends Query {
|
|||
for (int i = 0; i < queryTerms.length; i++) {
|
||||
TermContext termContext = contextArray[i];
|
||||
if (termContext == null) {
|
||||
lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
|
||||
lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
|
||||
} else {
|
||||
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|
||||
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
|
||||
* (float) maxDoc))) {
|
||||
highFreq
|
||||
.add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
|
||||
.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
|
||||
} else {
|
||||
lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
|
||||
lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -351,7 +351,7 @@ public class CommonTermsQuery extends Query {
|
|||
}
|
||||
for (int i = 0; i < terms.size(); i++) {
|
||||
Term t = terms.get(i);
|
||||
buffer.append(new TermQuery(t).toString());
|
||||
buffer.append(newTermQuery(t, null).toString());
|
||||
|
||||
if (i != terms.size() - 1) buffer.append(", ");
|
||||
}
|
||||
|
@ -412,4 +412,14 @@ public class CommonTermsQuery extends Query {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a new TermQuery instance.
|
||||
* <p>This is intended for subclasses that wish to customize the generated queries.</p>
|
||||
* @param term term
|
||||
* @param context the TermContext to be used to create the low level term query. Can be <code>null</code>.
|
||||
* @return new TermQuery instance
|
||||
*/
|
||||
protected Query newTermQuery(Term term, TermContext context) {
|
||||
return context == null ? new TermQuery(term) : new TermQuery(term, context);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,14 +17,6 @@ package org.apache.lucene.queries;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
|
@ -33,12 +25,14 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
@ -49,7 +43,15 @@ import org.apache.lucene.util.LineFileDocs;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
public class CommonTermsQueryTest extends LuceneTestCase {
|
||||
|
||||
|
@ -339,6 +341,60 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtend() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
String[] docs = new String[] {"this is the end of the world right",
|
||||
"is this it or maybe not",
|
||||
"this is the end of the universe as we know it",
|
||||
"there is the famous restaurant at the end of the universe",};
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", "" + i, Field.Store.YES));
|
||||
doc.add(newTextField("field", docs[i], Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader r = w.getReader();
|
||||
IndexSearcher s = newSearcher(r);
|
||||
{
|
||||
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 3);
|
||||
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
|
||||
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
|
||||
}
|
||||
|
||||
{
|
||||
// this one boosts the termQuery("field" "universe") by 10x
|
||||
CommonTermsQuery query = new ExtendedCommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||
random().nextBoolean() ? 2.0f : 0.5f);
|
||||
query.add(new Term("field", "is"));
|
||||
query.add(new Term("field", "this"));
|
||||
query.add(new Term("field", "end"));
|
||||
query.add(new Term("field", "world"));
|
||||
query.add(new Term("field", "universe"));
|
||||
query.add(new Term("field", "right"));
|
||||
TopDocs search = s.search(query, 10);
|
||||
assertEquals(search.totalHits, 3);
|
||||
assertEquals("2", r.document(search.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("3", r.document(search.scoreDocs[1].doc).get("id"));
|
||||
assertEquals("0", r.document(search.scoreDocs[2].doc).get("id"));
|
||||
}
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomIndex() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
@ -480,4 +536,20 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
|||
|
||||
lineFileDocs.close();
|
||||
}
|
||||
|
||||
private static final class ExtendedCommonTermsQuery extends CommonTermsQuery {
|
||||
|
||||
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
|
||||
super(highFreqOccur, lowFreqOccur, maxTermFrequency);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newTermQuery(Term term, TermContext context) {
|
||||
Query query = super.newTermQuery(term, context);
|
||||
if (term.text().equals("universe")) {
|
||||
query.setBoost(100f);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue