mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 11:35:14 +00:00
LUCENE-5478: CommonTermsQuery now allows to create custom term queries
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1572613 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
33b8e75a42
commit
7877ebeb63
@ -73,6 +73,10 @@ New Features
|
|||||||
* LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
|
* LUCENE-5454: Add SortedSetSortField to lucene/sandbox, to allow sorting
|
||||||
on multi-valued field. (Robert Muir)
|
on multi-valued field. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-5478: CommonTermsQuery now allows to create custom term queries
|
||||||
|
similar to the query parser by overriding a newTermQuery method.
|
||||||
|
(Simon Willnauer)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
|
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues
|
||||||
|
@ -16,11 +16,6 @@ package org.apache.lucene.queries;
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
@ -30,12 +25,17 @@ import org.apache.lucene.index.Terms;
|
|||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A query that executes high-frequency terms in a optional sub-query to prevent
|
* A query that executes high-frequency terms in a optional sub-query to prevent
|
||||||
* slow queries due to "common" terms like stopwords. This query
|
* slow queries due to "common" terms like stopwords. This query
|
||||||
@ -149,7 +149,7 @@ public class CommonTermsQuery extends Query {
|
|||||||
if (this.terms.isEmpty()) {
|
if (this.terms.isEmpty()) {
|
||||||
return new BooleanQuery();
|
return new BooleanQuery();
|
||||||
} else if (this.terms.size() == 1) {
|
} else if (this.terms.size() == 1) {
|
||||||
final TermQuery tq = new TermQuery(this.terms.get(0));
|
final Query tq = newTermQuery(this.terms.get(0), null);
|
||||||
tq.setBoost(getBoost());
|
tq.setBoost(getBoost());
|
||||||
return tq;
|
return tq;
|
||||||
}
|
}
|
||||||
@ -186,7 +186,7 @@ public class CommonTermsQuery extends Query {
|
|||||||
for (int i = 0; i < queryTerms.length; i++) {
|
for (int i = 0; i < queryTerms.length; i++) {
|
||||||
TermContext termContext = contextArray[i];
|
TermContext termContext = contextArray[i];
|
||||||
if (termContext == null) {
|
if (termContext == null) {
|
||||||
lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
|
lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
|
||||||
} else {
|
} else {
|
||||||
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|
||||||
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
|
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
|
||||||
@ -351,7 +351,7 @@ public class CommonTermsQuery extends Query {
|
|||||||
}
|
}
|
||||||
for (int i = 0; i < terms.size(); i++) {
|
for (int i = 0; i < terms.size(); i++) {
|
||||||
Term t = terms.get(i);
|
Term t = terms.get(i);
|
||||||
buffer.append(new TermQuery(t).toString());
|
buffer.append(newTermQuery(t, null).toString());
|
||||||
|
|
||||||
if (i != terms.size() - 1) buffer.append(", ");
|
if (i != terms.size() - 1) buffer.append(", ");
|
||||||
}
|
}
|
||||||
@ -412,4 +412,14 @@ public class CommonTermsQuery extends Query {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds a new TermQuery instance.
|
||||||
|
* <p>This is intended for subclasses that wish to customize the generated queries.</p>
|
||||||
|
* @param term term
|
||||||
|
* @param context the TermContext to be used to create the low level term query. Can be <code>null</code>.
|
||||||
|
* @return new TermQuery instance
|
||||||
|
*/
|
||||||
|
protected Query newTermQuery(Term term, TermContext context) {
|
||||||
|
return context == null ? new TermQuery(term) : new TermQuery(term, context);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,14 +17,6 @@ package org.apache.lucene.queries;
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
@ -33,12 +25,14 @@ import org.apache.lucene.index.IndexReader;
|
|||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermContext;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryUtils;
|
import org.apache.lucene.search.QueryUtils;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
@ -49,7 +43,15 @@ import org.apache.lucene.util.LineFileDocs;
|
|||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
public class CommonTermsQueryTest extends LuceneTestCase {
|
public class CommonTermsQueryTest extends LuceneTestCase {
|
||||||
|
|
||||||
@ -339,6 +341,60 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtend() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
String[] docs = new String[] {"this is the end of the world right",
|
||||||
|
"is this it or maybe not",
|
||||||
|
"this is the end of the universe as we know it",
|
||||||
|
"there is the famous restaurant at the end of the universe",};
|
||||||
|
for (int i = 0; i < docs.length; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newStringField("id", "" + i, Field.Store.YES));
|
||||||
|
doc.add(newTextField("field", docs[i], Field.Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader r = w.getReader();
|
||||||
|
IndexSearcher s = newSearcher(r);
|
||||||
|
{
|
||||||
|
CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||||
|
random().nextBoolean() ? 2.0f : 0.5f);
|
||||||
|
query.add(new Term("field", "is"));
|
||||||
|
query.add(new Term("field", "this"));
|
||||||
|
query.add(new Term("field", "end"));
|
||||||
|
query.add(new Term("field", "world"));
|
||||||
|
query.add(new Term("field", "universe"));
|
||||||
|
query.add(new Term("field", "right"));
|
||||||
|
TopDocs search = s.search(query, 10);
|
||||||
|
assertEquals(search.totalHits, 3);
|
||||||
|
assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
|
||||||
|
assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
|
||||||
|
assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// this one boosts the termQuery("field" "universe") by 10x
|
||||||
|
CommonTermsQuery query = new ExtendedCommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
|
||||||
|
random().nextBoolean() ? 2.0f : 0.5f);
|
||||||
|
query.add(new Term("field", "is"));
|
||||||
|
query.add(new Term("field", "this"));
|
||||||
|
query.add(new Term("field", "end"));
|
||||||
|
query.add(new Term("field", "world"));
|
||||||
|
query.add(new Term("field", "universe"));
|
||||||
|
query.add(new Term("field", "right"));
|
||||||
|
TopDocs search = s.search(query, 10);
|
||||||
|
assertEquals(search.totalHits, 3);
|
||||||
|
assertEquals("2", r.document(search.scoreDocs[0].doc).get("id"));
|
||||||
|
assertEquals("3", r.document(search.scoreDocs[1].doc).get("id"));
|
||||||
|
assertEquals("0", r.document(search.scoreDocs[2].doc).get("id"));
|
||||||
|
}
|
||||||
|
r.close();
|
||||||
|
w.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testRandomIndex() throws IOException {
|
public void testRandomIndex() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
@ -480,4 +536,20 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
|||||||
|
|
||||||
lineFileDocs.close();
|
lineFileDocs.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final class ExtendedCommonTermsQuery extends CommonTermsQuery {
|
||||||
|
|
||||||
|
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
|
||||||
|
super(highFreqOccur, lowFreqOccur, maxTermFrequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query newTermQuery(Term term, TermContext context) {
|
||||||
|
Query query = super.newTermQuery(term, context);
|
||||||
|
if (term.text().equals("universe")) {
|
||||||
|
query.setBoost(100f);
|
||||||
|
}
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user