mirror of https://github.com/apache/lucene.git
LUCENE-5478: CommonTermsQuery now allows to create custom term queries
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1572613 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
@ -73,6 +73,10 @@ New Features
* LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
on multi-valued field. (Robert Muir)
* LUCENE-5478: CommonTermsQuery now allows to create custom term queries
similar to the query parser by overriding a newTermQuery method.
(Simon Willnauer)
API Changes
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
@ -16,11 +16,6 @@ package org.apache.lucene.queries;
* See the License for the specific language governing permissions and
* limitations under the License.
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
@ -30,12 +25,17 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
* A query that executes high-frequency terms in a optional sub-query to prevent
* slow queries due to "common" terms like stopwords. This query
@ -149,7 +149,7 @@ public class CommonTermsQuery extends Query {
if (this.terms.isEmpty()) {
return new BooleanQuery();
} else if (this.terms.size() == 1) {
final TermQuery tq = new TermQuery(this.terms.get(0));
final Query tq = newTermQuery(this.terms.get(0), null);
return tq;
@ -186,15 +186,15 @@ public class CommonTermsQuery extends Query {
for (int i = 0; i < queryTerms.length; i++) {
TermContext termContext = contextArray[i];
if (termContext == null) {
lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
} else {
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
* (float) maxDoc))) {
.add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
} else {
lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
@ -351,7 +351,7 @@ public class CommonTermsQuery extends Query {
for (int i = 0; i < terms.size(); i++) {
Term t = terms.get(i);
buffer.append(new TermQuery(t).toString());
buffer.append(newTermQuery(t, null).toString());
if (i != terms.size() - 1) buffer.append(", ");
@ -412,4 +412,14 @@ public class CommonTermsQuery extends Query {
return true;
* Builds a new TermQuery instance.
* <p>This is intended for subclasses that wish to customize the generated queries.</p>
* @param term term
* @param context the TermContext to be used to create the low level term query. Can be <code>null</code>.
* @return new TermQuery instance
protected Query newTermQuery(Term term, TermContext context) {
return context == null ? new TermQuery(term) : new TermQuery(term, context);
@ -17,14 +17,6 @@ package org.apache.lucene.queries;
* limitations under the License.
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.AtomicReader;
@ -33,12 +25,14 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
@ -49,7 +43,15 @@ import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
public class CommonTermsQueryTest extends LuceneTestCase {
@ -339,6 +341,60 @@ public class CommonTermsQueryTest extends LuceneTestCase {
public void testExtend() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
"there is the famous restaurant at the end of the universe",};
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + i, Field.Store.YES));
doc.add(newTextField("field", docs[i], Field.Store.NO));
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r);
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 3);
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
// this one boosts the termQuery("field" "universe") by 10x
CommonTermsQuery query = new ExtendedCommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
random().nextBoolean() ? 2.0f : 0.5f);
query.add(new Term("field", "is"));
query.add(new Term("field", "this"));
query.add(new Term("field", "end"));
query.add(new Term("field", "world"));
query.add(new Term("field", "universe"));
query.add(new Term("field", "right"));
TopDocs search = s.search(query, 10);
assertEquals(search.totalHits, 3);
assertEquals("2", r.document(search.scoreDocs[0].doc).get("id"));
assertEquals("3", r.document(search.scoreDocs[1].doc).get("id"));
assertEquals("0", r.document(search.scoreDocs[2].doc).get("id"));
public void testRandomIndex() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
@ -480,4 +536,20 @@ public class CommonTermsQueryTest extends LuceneTestCase {
private static final class ExtendedCommonTermsQuery extends CommonTermsQuery {
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
super(highFreqOccur, lowFreqOccur, maxTermFrequency);
protected Query newTermQuery(Term term, TermContext context) {
Query query = super.newTermQuery(term, context);
if (term.text().equals("universe")) {
return query;
Reference in New Issue