Expose CommonTermsQuery in Match & MultiMatch and enable highlighting
Closes #2591
This commit is contained in:
parent
bfdf8fe590
commit
48488f707f
|
@ -1,6 +1,5 @@
|
||||||
package org.apache.lucene.index.memory;
|
package org.apache.lucene.index.memory;
|
||||||
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
/*
|
/*
|
||||||
* Licensed to ElasticSearch and Shay Banon under one
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
package org.apache.lucene.queries;
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.elasticsearch.common.lucene.search.Queries;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extended version of {@link CommonTermsQuery} that allows to pass in a
|
||||||
|
* <tt>minimumNumberShouldMatch</tt> specification that uses the actual num of high frequent terms
|
||||||
|
* to calculate the minimum matching terms.
|
||||||
|
*/
|
||||||
|
public class ExtendedCommonTermsQuery extends XCommonTermsQuery {
|
||||||
|
|
||||||
|
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord) {
|
||||||
|
super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
|
||||||
|
super(highFreqOccur, lowFreqOccur, maxTermFrequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String minNumShouldMatchSpec;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int getMinimumNumberShouldMatch(int numOptional) {
|
||||||
|
if (minNumShouldMatchSpec == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return Queries.calculateMinShouldMatch(numOptional, minNumShouldMatchSpec);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMinimumNumberShouldMatch(String spec) {
|
||||||
|
this.minNumShouldMatchSpec = spec;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,381 @@
|
||||||
|
package org.apache.lucene.queries;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermContext;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A query that executes high-frequency terms in a optional sub-query to prevent
|
||||||
|
* slow queries due to "common" terms like stopwords. This query basically
|
||||||
|
* builds 2 queries off the {@link #add(Term) added} terms where low-frequency
|
||||||
|
* terms are added to a required boolean clause and high-frequency terms are
|
||||||
|
* added to an optional boolean clause. The optional clause is only executed if
|
||||||
|
* the required "low-frequency' clause matches. Scores produced by this query
|
||||||
|
* will be slightly different to plain {@link BooleanQuery} scorer mainly due to
|
||||||
|
* differences in the {@link Similarity#coord(int,int) number of leave queries}
|
||||||
|
* in the required boolean clause. In the most cases high-frequency terms are
|
||||||
|
* unlikely to significantly contribute to the document score unless at least
|
||||||
|
* one of the low-frequency terms are matched such that this query can improve
|
||||||
|
* query execution times significantly if applicable.
|
||||||
|
* <p>
|
||||||
|
* {@link XCommonTermsQuery} has several advantages over stopword filtering at
|
||||||
|
* index or query time since a term can be "classified" based on the actual
|
||||||
|
* document frequency in the index and can prevent slow queries even across
|
||||||
|
* domains without specialized stopword files.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* <b>Note:</b> if the query only contains high-frequency terms the query is
|
||||||
|
* rewritten into a plain conjunction query ie. all high-frequency terms need to
|
||||||
|
* match in order to match a document.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
//LUCENE MONITOR - Copied from CommonTermsQuery changes are tracked with //CHANGE
|
||||||
|
public class XCommonTermsQuery extends Query {
|
||||||
|
/*
|
||||||
|
* TODO maybe it would make sense to abstract this even further and allow to
|
||||||
|
* rewrite to dismax rather than boolean. Yet, this can already be subclassed
|
||||||
|
* to do so.
|
||||||
|
*/
|
||||||
|
protected final List<Term> terms = new ArrayList<Term>();
|
||||||
|
protected final boolean disableCoord;
|
||||||
|
protected final float maxTermFrequency;
|
||||||
|
protected final Occur lowFreqOccur;
|
||||||
|
protected final Occur highFreqOccur;
|
||||||
|
protected float lowFreqBoost = 1.0f;
|
||||||
|
protected float highFreqBoost = 1.0f;
|
||||||
|
//CHANGE made minNr... a float for fractions
|
||||||
|
protected float minNrShouldMatch = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link XCommonTermsQuery}
|
||||||
|
*
|
||||||
|
* @param highFreqOccur
|
||||||
|
* {@link Occur} used for high frequency terms
|
||||||
|
* @param lowFreqOccur
|
||||||
|
* {@link Occur} used for low frequency terms
|
||||||
|
* @param maxTermFrequency
|
||||||
|
* a value in [0..1] (or absolute number >=1) representing the
|
||||||
|
* maximum threshold of a terms document frequency to be considered a
|
||||||
|
* low frequency term.
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
|
||||||
|
* highFreqOccur
|
||||||
|
*/
|
||||||
|
public XCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur,
|
||||||
|
float maxTermFrequency) {
|
||||||
|
this(highFreqOccur, lowFreqOccur, maxTermFrequency, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link XCommonTermsQuery}
|
||||||
|
*
|
||||||
|
* @param highFreqOccur
|
||||||
|
* {@link Occur} used for high frequency terms
|
||||||
|
* @param lowFreqOccur
|
||||||
|
* {@link Occur} used for low frequency terms
|
||||||
|
* @param maxTermFrequency
|
||||||
|
* a value in [0..1] (or absolute number >=1) representing the
|
||||||
|
* maximum threshold of a terms document frequency to be considered a
|
||||||
|
* low frequency term.
|
||||||
|
* @param disableCoord
|
||||||
|
* disables {@link Similarity#coord(int,int)} in scoring for the low
|
||||||
|
* / high frequency sub-queries
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* if {@link Occur#MUST_NOT} is pass as lowFreqOccur or
|
||||||
|
* highFreqOccur
|
||||||
|
*/
|
||||||
|
public XCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur,
|
||||||
|
float maxTermFrequency, boolean disableCoord) {
|
||||||
|
if (highFreqOccur == Occur.MUST_NOT) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"highFreqOccur should be MUST or SHOULD but was MUST_NOT");
|
||||||
|
}
|
||||||
|
if (lowFreqOccur == Occur.MUST_NOT) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"lowFreqOccur should be MUST or SHOULD but was MUST_NOT");
|
||||||
|
}
|
||||||
|
this.disableCoord = disableCoord;
|
||||||
|
this.highFreqOccur = highFreqOccur;
|
||||||
|
this.lowFreqOccur = lowFreqOccur;
|
||||||
|
this.maxTermFrequency = maxTermFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a term to the {@link CommonTermsQuery}
|
||||||
|
*
|
||||||
|
* @param term
|
||||||
|
* the term to add
|
||||||
|
*/
|
||||||
|
public void add(Term term) {
|
||||||
|
if (term == null) {
|
||||||
|
throw new IllegalArgumentException("Term must not be null");
|
||||||
|
}
|
||||||
|
this.terms.add(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
if (this.terms.isEmpty()) {
|
||||||
|
return new BooleanQuery();
|
||||||
|
} else if (this.terms.size() == 1) {
|
||||||
|
final TermQuery tq = new TermQuery(this.terms.get(0));
|
||||||
|
tq.setBoost(getBoost());
|
||||||
|
return tq;
|
||||||
|
}
|
||||||
|
final List<AtomicReaderContext> leaves = reader.leaves();
|
||||||
|
final int maxDoc = reader.maxDoc();
|
||||||
|
final TermContext[] contextArray = new TermContext[terms.size()];
|
||||||
|
final Term[] queryTerms = this.terms.toArray(new Term[0]);
|
||||||
|
collectTermContext(reader, leaves, contextArray, queryTerms);
|
||||||
|
return buildQuery(maxDoc, contextArray, queryTerms);
|
||||||
|
}
|
||||||
|
|
||||||
|
//CHANGE added to get num optional
|
||||||
|
protected int getMinimumNumberShouldMatch(int numOptional) {
|
||||||
|
if (minNrShouldMatch >= 1.0f) {
|
||||||
|
return (int) minNrShouldMatch;
|
||||||
|
}
|
||||||
|
return (int) (minNrShouldMatch * numOptional);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Query buildQuery(final int maxDoc,
|
||||||
|
final TermContext[] contextArray, final Term[] queryTerms) {
|
||||||
|
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
|
||||||
|
BooleanQuery highFreq = new BooleanQuery(disableCoord);
|
||||||
|
highFreq.setBoost(highFreqBoost);
|
||||||
|
lowFreq.setBoost(lowFreqBoost);
|
||||||
|
|
||||||
|
BooleanQuery query = new BooleanQuery(true);
|
||||||
|
|
||||||
|
for (int i = 0; i < queryTerms.length; i++) {
|
||||||
|
TermContext termContext = contextArray[i];
|
||||||
|
if (termContext == null) {
|
||||||
|
lowFreq.add(new TermQuery(queryTerms[i]), lowFreqOccur);
|
||||||
|
} else {
|
||||||
|
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|
||||||
|
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency
|
||||||
|
* (float) maxDoc))) {
|
||||||
|
highFreq
|
||||||
|
.add(new TermQuery(queryTerms[i], termContext), highFreqOccur);
|
||||||
|
} else {
|
||||||
|
lowFreq.add(new TermQuery(queryTerms[i], termContext), lowFreqOccur);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (lowFreqOccur == Occur.SHOULD) {
|
||||||
|
lowFreq.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch(lowFreq.clauses().size()));
|
||||||
|
}
|
||||||
|
if (lowFreq.clauses().isEmpty()) {
|
||||||
|
/*
|
||||||
|
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
|
||||||
|
* prevent slow queries.
|
||||||
|
*/
|
||||||
|
if (highFreqOccur == Occur.MUST) {
|
||||||
|
highFreq.setBoost(getBoost());
|
||||||
|
return highFreq;
|
||||||
|
} else {
|
||||||
|
BooleanQuery highFreqConjunction = new BooleanQuery();
|
||||||
|
for (BooleanClause booleanClause : highFreq) {
|
||||||
|
highFreqConjunction.add(booleanClause.getQuery(), Occur.MUST);
|
||||||
|
}
|
||||||
|
highFreqConjunction.setBoost(getBoost());
|
||||||
|
return highFreqConjunction;
|
||||||
|
|
||||||
|
}
|
||||||
|
} else if (highFreq.clauses().isEmpty()) {
|
||||||
|
// only do low freq terms - we don't have high freq terms
|
||||||
|
lowFreq.setBoost(getBoost());
|
||||||
|
return lowFreq;
|
||||||
|
} else {
|
||||||
|
query.add(highFreq, Occur.SHOULD);
|
||||||
|
query.add(lowFreq, Occur.MUST);
|
||||||
|
query.setBoost(getBoost());
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void collectTermContext(IndexReader reader,
|
||||||
|
List<AtomicReaderContext> leaves, TermContext[] contextArray,
|
||||||
|
Term[] queryTerms) throws IOException {
|
||||||
|
TermsEnum termsEnum = null;
|
||||||
|
for (AtomicReaderContext context : leaves) {
|
||||||
|
final Fields fields = context.reader().fields();
|
||||||
|
if (fields == null) {
|
||||||
|
// reader has no fields
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < queryTerms.length; i++) {
|
||||||
|
Term term = queryTerms[i];
|
||||||
|
TermContext termContext = contextArray[i];
|
||||||
|
final Terms terms = fields.terms(term.field());
|
||||||
|
if (terms == null) {
|
||||||
|
// field does not exist
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
termsEnum = terms.iterator(termsEnum);
|
||||||
|
assert termsEnum != null;
|
||||||
|
|
||||||
|
if (termsEnum == TermsEnum.EMPTY) continue;
|
||||||
|
if (termsEnum.seekExact(term.bytes(), false)) {
|
||||||
|
if (termContext == null) {
|
||||||
|
contextArray[i] = new TermContext(reader.getContext(),
|
||||||
|
termsEnum.termState(), context.ord, termsEnum.docFreq(),
|
||||||
|
termsEnum.totalTermFreq());
|
||||||
|
} else {
|
||||||
|
termContext.register(termsEnum.termState(), context.ord,
|
||||||
|
termsEnum.docFreq(), termsEnum.totalTermFreq());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true iff {@link Similarity#coord(int,int)} is disabled in scoring
|
||||||
|
* for the high and low frequency query instance. The top level query will
|
||||||
|
* always disable coords.
|
||||||
|
*/
|
||||||
|
public boolean isCoordDisabled() {
|
||||||
|
return disableCoord;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specifies a minimum number of the optional BooleanClauses which must be
|
||||||
|
* satisfied in order to produce a match on the low frequency terms query
|
||||||
|
* part.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* By default no optional clauses are necessary for a match (unless there are
|
||||||
|
* no required clauses). If this method is used, then the specified number of
|
||||||
|
* clauses is required.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param min
|
||||||
|
* the number of optional clauses that must match
|
||||||
|
*/
|
||||||
|
//CHANGE accepts now a float
|
||||||
|
public void setMinimumNumberShouldMatch(float min) {
|
||||||
|
this.minNrShouldMatch = min;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the minimum number of the optional BooleanClauses which must be
|
||||||
|
* satisfied.
|
||||||
|
*/
|
||||||
|
//CHANGE returns now a float
|
||||||
|
public float getMinimumNumberShouldMatch() {
|
||||||
|
return minNrShouldMatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void extractTerms(Set<Term> terms) {
|
||||||
|
terms.addAll(this.terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
StringBuilder buffer = new StringBuilder();
|
||||||
|
boolean needParens = (getBoost() != 1.0)
|
||||||
|
|| (getMinimumNumberShouldMatch() > 0);
|
||||||
|
if (needParens) {
|
||||||
|
buffer.append("(");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < terms.size(); i++) {
|
||||||
|
Term t = terms.get(i);
|
||||||
|
buffer.append(new TermQuery(t).toString());
|
||||||
|
|
||||||
|
if (i != terms.size() - 1) buffer.append(", ");
|
||||||
|
}
|
||||||
|
if (needParens) {
|
||||||
|
buffer.append(")");
|
||||||
|
}
|
||||||
|
if (getMinimumNumberShouldMatch() > 0) {
|
||||||
|
buffer.append('~');
|
||||||
|
buffer.append(getMinimumNumberShouldMatch());
|
||||||
|
}
|
||||||
|
if (getBoost() != 1.0f) {
|
||||||
|
buffer.append(ToStringUtils.boost(getBoost()));
|
||||||
|
}
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = super.hashCode();
|
||||||
|
result = prime * result + (disableCoord ? 1231 : 1237);
|
||||||
|
result = prime * result + Float.floatToIntBits(highFreqBoost);
|
||||||
|
result = prime * result
|
||||||
|
+ ((highFreqOccur == null) ? 0 : highFreqOccur.hashCode());
|
||||||
|
result = prime * result + Float.floatToIntBits(lowFreqBoost);
|
||||||
|
result = prime * result
|
||||||
|
+ ((lowFreqOccur == null) ? 0 : lowFreqOccur.hashCode());
|
||||||
|
result = prime * result + Float.floatToIntBits(maxTermFrequency);
|
||||||
|
result = prime * result + Float.floatToIntBits(minNrShouldMatch);
|
||||||
|
result = prime * result + ((terms == null) ? 0 : terms.hashCode());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj) return true;
|
||||||
|
if (!super.equals(obj)) return false;
|
||||||
|
if (getClass() != obj.getClass()) return false;
|
||||||
|
XCommonTermsQuery other = (XCommonTermsQuery) obj;
|
||||||
|
if (disableCoord != other.disableCoord) return false;
|
||||||
|
if (Float.floatToIntBits(highFreqBoost) != Float
|
||||||
|
.floatToIntBits(other.highFreqBoost)) return false;
|
||||||
|
if (highFreqOccur != other.highFreqOccur) return false;
|
||||||
|
if (Float.floatToIntBits(lowFreqBoost) != Float
|
||||||
|
.floatToIntBits(other.lowFreqBoost)) return false;
|
||||||
|
if (lowFreqOccur != other.lowFreqOccur) return false;
|
||||||
|
if (Float.floatToIntBits(maxTermFrequency) != Float
|
||||||
|
.floatToIntBits(other.maxTermFrequency)) return false;
|
||||||
|
if (minNrShouldMatch != other.minNrShouldMatch) return false;
|
||||||
|
if (terms == null) {
|
||||||
|
if (other.terms != null) return false;
|
||||||
|
} else if (!terms.equals(other.terms)) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
//CHANGE added
|
||||||
|
public List<Term> terms() {
|
||||||
|
return this.terms;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -20,6 +20,7 @@
|
||||||
package org.apache.lucene.search.vectorhighlight;
|
package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||||
import org.apache.lucene.queries.FilterClause;
|
import org.apache.lucene.queries.FilterClause;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
|
@ -97,6 +98,8 @@ public class CustomFieldQuery extends FieldQuery {
|
||||||
}
|
}
|
||||||
} else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
|
} else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
|
||||||
flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries);
|
flatten(((FiltersFunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries);
|
||||||
|
} else if (sourceQuery instanceof ExtendedCommonTermsQuery) {
|
||||||
|
flatten(((ExtendedCommonTermsQuery)sourceQuery).rewrite(reader), reader, flatQueries);
|
||||||
} else {
|
} else {
|
||||||
super.flatten(sourceQuery, reader, flatQueries);
|
super.flatten(sourceQuery, reader, flatQueries);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.query;
|
package org.elasticsearch.index.query;
|
||||||
|
|
||||||
|
import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameQuery;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
@ -26,6 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queries.CommonTermsQuery;
|
import org.apache.lucene.queries.CommonTermsQuery;
|
||||||
|
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
@ -143,20 +146,15 @@ public class CommonTermsQueryParser implements QueryParser {
|
||||||
if (value == null) {
|
if (value == null) {
|
||||||
throw new QueryParsingException(parseContext.index(), "No text specified for text query");
|
throw new QueryParsingException(parseContext.index(), "No text specified for text query");
|
||||||
}
|
}
|
||||||
CommonTermsQuery query = new CommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords);
|
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords);
|
||||||
int numTerms = parseQueryString(query, value.toString(), fieldName, parseContext, queryAnalyzer);
|
|
||||||
if (numTerms == 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (minimumShouldMatch != null) {
|
|
||||||
query.setMinimumNumberShouldMatch(Queries.calculateMinShouldMatch(numTerms, minimumShouldMatch));
|
|
||||||
}
|
|
||||||
query.setBoost(boost);
|
query.setBoost(boost);
|
||||||
return query;
|
return parseQueryString(query, value.toString(), fieldName, parseContext, queryAnalyzer, minimumShouldMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final int parseQueryString(CommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext,
|
|
||||||
String queryAnalyzer) throws IOException {
|
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext,
|
||||||
|
String queryAnalyzer, String minimumShouldMatch) throws IOException {
|
||||||
|
|
||||||
FieldMapper<?> mapper = null;
|
FieldMapper<?> mapper = null;
|
||||||
String field;
|
String field;
|
||||||
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
|
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
|
||||||
|
@ -197,7 +195,11 @@ public class CommonTermsQueryParser implements QueryParser {
|
||||||
query.add(new Term(field, ref));
|
query.add(new Term(field, ref));
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
return count;
|
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
query.setMinimumNumberShouldMatch(minimumShouldMatch);
|
||||||
|
return wrapSmartNameQuery(query, smartNameFieldMappers, parseContext);
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -87,6 +87,8 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer
|
||||||
|
|
||||||
private ZeroTermsQuery zeroTermsQuery;
|
private ZeroTermsQuery zeroTermsQuery;
|
||||||
|
|
||||||
|
private Float cutoff_Frequency = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new text query.
|
* Constructs a new text query.
|
||||||
*/
|
*/
|
||||||
|
@ -158,6 +160,16 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set a cutoff value in [0..1] (or absolute number >=1) representing the
|
||||||
|
* maximum threshold of a terms document frequency to be considered a low
|
||||||
|
* frequency term.
|
||||||
|
*/
|
||||||
|
public MatchQueryBuilder cutoffFrequency(float cutoff) {
|
||||||
|
this.cutoff_Frequency = cutoff;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public MatchQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
|
public MatchQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
|
||||||
this.minimumShouldMatch = minimumShouldMatch;
|
this.minimumShouldMatch = minimumShouldMatch;
|
||||||
return this;
|
return this;
|
||||||
|
@ -241,6 +253,10 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer
|
||||||
if (zeroTermsQuery != null) {
|
if (zeroTermsQuery != null) {
|
||||||
builder.field("zero_terms_query", zeroTermsQuery.toString());
|
builder.field("zero_terms_query", zeroTermsQuery.toString());
|
||||||
}
|
}
|
||||||
|
if (cutoff_Frequency != null) {
|
||||||
|
builder.field("cutoff_frequency", cutoff_Frequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
|
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.query;
|
package org.elasticsearch.index.query;
|
||||||
|
|
||||||
|
import org.apache.lucene.queries.CommonTermsQuery;
|
||||||
|
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
@ -126,6 +128,8 @@ public class MatchQueryParser implements QueryParser {
|
||||||
matchQuery.setTranspositions(parser.booleanValue());
|
matchQuery.setTranspositions(parser.booleanValue());
|
||||||
} else if ("lenient".equals(currentFieldName)) {
|
} else if ("lenient".equals(currentFieldName)) {
|
||||||
matchQuery.setLenient(parser.booleanValue());
|
matchQuery.setLenient(parser.booleanValue());
|
||||||
|
} else if ("cutoff_frequency".equals(currentFieldName)) {
|
||||||
|
matchQuery.setCommonTermsCutoff(parser.floatValue());
|
||||||
} else if ("zero_terms_query".equals(currentFieldName)) {
|
} else if ("zero_terms_query".equals(currentFieldName)) {
|
||||||
String zeroTermsDocs = parser.text();
|
String zeroTermsDocs = parser.text();
|
||||||
if ("none".equalsIgnoreCase(zeroTermsDocs)) {
|
if ("none".equalsIgnoreCase(zeroTermsDocs)) {
|
||||||
|
@ -161,8 +165,9 @@ public class MatchQueryParser implements QueryParser {
|
||||||
|
|
||||||
if (query instanceof BooleanQuery) {
|
if (query instanceof BooleanQuery) {
|
||||||
Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch);
|
Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch);
|
||||||
|
} else if (query instanceof ExtendedCommonTermsQuery) {
|
||||||
|
((ExtendedCommonTermsQuery)query).setMinimumNumberShouldMatch(minimumShouldMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
query.setBoost(boost);
|
query.setBoost(boost);
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,6 +67,8 @@ public class MultiMatchQueryBuilder extends BaseQueryBuilder implements Boostabl
|
||||||
|
|
||||||
private Boolean lenient;
|
private Boolean lenient;
|
||||||
|
|
||||||
|
private Float cutoffFrequency = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new text query.
|
* Constructs a new text query.
|
||||||
*/
|
*/
|
||||||
|
@ -192,6 +194,17 @@ public class MultiMatchQueryBuilder extends BaseQueryBuilder implements Boostabl
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set a cutoff value in [0..1] (or absolute number >=1) representing the
|
||||||
|
* maximum threshold of a terms document frequency to be considered a low
|
||||||
|
* frequency term.
|
||||||
|
*/
|
||||||
|
public MultiMatchQueryBuilder cutoffFrequency(float cutoff) {
|
||||||
|
this.cutoffFrequency = cutoff;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void doXContent(XContentBuilder builder, Params params) throws IOException {
|
public void doXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
builder.startObject(MultiMatchQueryParser.NAME);
|
builder.startObject(MultiMatchQueryParser.NAME);
|
||||||
|
@ -256,6 +269,10 @@ public class MultiMatchQueryBuilder extends BaseQueryBuilder implements Boostabl
|
||||||
builder.field("lenient", lenient);
|
builder.field("lenient", lenient);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cutoffFrequency != null) {
|
||||||
|
builder.field("cutoff_frequency", cutoffFrequency);
|
||||||
|
}
|
||||||
|
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -145,6 +145,8 @@ public class MultiMatchQueryParser implements QueryParser {
|
||||||
multiMatchQuery.setUseDisMax(parser.booleanValue());
|
multiMatchQuery.setUseDisMax(parser.booleanValue());
|
||||||
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
|
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
|
||||||
multiMatchQuery.setTieBreaker(parser.floatValue());
|
multiMatchQuery.setTieBreaker(parser.floatValue());
|
||||||
|
} else if ("cutoff_frequency".equals(currentFieldName)) {
|
||||||
|
multiMatchQuery.setCommonTermsCutoff(parser.floatValue());
|
||||||
} else if ("lenient".equals(currentFieldName)) {
|
} else if ("lenient".equals(currentFieldName)) {
|
||||||
multiMatchQuery.setLenient(parser.booleanValue());
|
multiMatchQuery.setLenient(parser.booleanValue());
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queries.CommonTermsQuery;
|
||||||
|
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
|
@ -70,19 +72,24 @@ public class MatchQuery {
|
||||||
protected int phraseSlop = 0;
|
protected int phraseSlop = 0;
|
||||||
|
|
||||||
protected String fuzziness = null;
|
protected String fuzziness = null;
|
||||||
|
|
||||||
protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
|
protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
|
||||||
|
|
||||||
protected int maxExpansions = FuzzyQuery.defaultMaxExpansions;
|
protected int maxExpansions = FuzzyQuery.defaultMaxExpansions;
|
||||||
|
|
||||||
//LUCENE 4 UPGRADE we need a default value for this!
|
//LUCENE 4 UPGRADE we need a default value for this!
|
||||||
protected boolean transpositions = false;
|
protected boolean transpositions = false;
|
||||||
|
|
||||||
|
|
||||||
protected MultiTermQuery.RewriteMethod rewriteMethod;
|
protected MultiTermQuery.RewriteMethod rewriteMethod;
|
||||||
|
|
||||||
protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod;
|
protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod;
|
||||||
|
|
||||||
protected boolean lenient;
|
protected boolean lenient;
|
||||||
|
|
||||||
protected ZeroTermsQuery zeroTermsQuery = ZeroTermsQuery.NONE;
|
protected ZeroTermsQuery zeroTermsQuery = ZeroTermsQuery.NONE;
|
||||||
|
|
||||||
|
protected Float commonTermsCutoff = null;
|
||||||
|
|
||||||
public MatchQuery(QueryParseContext parseContext) {
|
public MatchQuery(QueryParseContext parseContext) {
|
||||||
this.parseContext = parseContext;
|
this.parseContext = parseContext;
|
||||||
}
|
}
|
||||||
|
@ -95,6 +102,10 @@ public class MatchQuery {
|
||||||
this.occur = occur;
|
this.occur = occur;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setCommonTermsCutoff(float cutoff) {
|
||||||
|
this.commonTermsCutoff = Float.valueOf(cutoff);
|
||||||
|
}
|
||||||
|
|
||||||
public void setEnablePositionIncrements(boolean enablePositionIncrements) {
|
public void setEnablePositionIncrements(boolean enablePositionIncrements) {
|
||||||
this.enablePositionIncrements = enablePositionIncrements;
|
this.enablePositionIncrements = enablePositionIncrements;
|
||||||
}
|
}
|
||||||
|
@ -221,19 +232,27 @@ public class MatchQuery {
|
||||||
if (numTokens == 1) {
|
if (numTokens == 1) {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
|
final Query q = newTermQuery(mapper, new Term(field, termToByteRef(termAtt)));
|
||||||
final Query q = newTermQuery(mapper, new Term(field, termToByteRef(termAtt, new BytesRef())));
|
|
||||||
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
|
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
|
||||||
}
|
}
|
||||||
BooleanQuery q = new BooleanQuery(positionCount == 1);
|
if (commonTermsCutoff != null) {
|
||||||
for (int i = 0; i < numTokens; i++) {
|
ExtendedCommonTermsQuery q = new ExtendedCommonTermsQuery(occur, occur, commonTermsCutoff, positionCount == 1);
|
||||||
boolean hasNext = buffer.incrementToken();
|
for (int i = 0; i < numTokens; i++) {
|
||||||
assert hasNext == true;
|
boolean hasNext = buffer.incrementToken();
|
||||||
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
|
assert hasNext == true;
|
||||||
final Query currentQuery = newTermQuery(mapper, new Term(field, termToByteRef(termAtt, new BytesRef())));
|
q.add(new Term(field, termToByteRef(termAtt)));
|
||||||
q.add(currentQuery, occur);
|
}
|
||||||
|
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
|
||||||
|
} else {
|
||||||
|
BooleanQuery q = new BooleanQuery(positionCount == 1);
|
||||||
|
for (int i = 0; i < numTokens; i++) {
|
||||||
|
boolean hasNext = buffer.incrementToken();
|
||||||
|
assert hasNext == true;
|
||||||
|
final Query currentQuery = newTermQuery(mapper, new Term(field, termToByteRef(termAtt)));
|
||||||
|
q.add(currentQuery, occur);
|
||||||
|
}
|
||||||
|
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
|
||||||
}
|
}
|
||||||
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
|
|
||||||
} else if (type == Type.PHRASE) {
|
} else if (type == Type.PHRASE) {
|
||||||
if (severalTokensAtSamePosition) {
|
if (severalTokensAtSamePosition) {
|
||||||
final MultiPhraseQuery mpq = new MultiPhraseQuery();
|
final MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||||
|
@ -256,7 +275,7 @@ public class MatchQuery {
|
||||||
}
|
}
|
||||||
position += positionIncrement;
|
position += positionIncrement;
|
||||||
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
|
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
|
||||||
multiTerms.add(new Term(field, termToByteRef(termAtt, new BytesRef())));
|
multiTerms.add(new Term(field, termToByteRef(termAtt)));
|
||||||
}
|
}
|
||||||
if (enablePositionIncrements) {
|
if (enablePositionIncrements) {
|
||||||
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
|
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
|
||||||
|
@ -277,9 +296,9 @@ public class MatchQuery {
|
||||||
if (enablePositionIncrements) {
|
if (enablePositionIncrements) {
|
||||||
position += positionIncrement;
|
position += positionIncrement;
|
||||||
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
|
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
|
||||||
pq.add(new Term(field, termToByteRef(termAtt, new BytesRef())), position);
|
pq.add(new Term(field, termToByteRef(termAtt)), position);
|
||||||
} else {
|
} else {
|
||||||
pq.add(new Term(field, termToByteRef(termAtt, new BytesRef())));
|
pq.add(new Term(field, termToByteRef(termAtt)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext);
|
return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext);
|
||||||
|
@ -305,8 +324,7 @@ public class MatchQuery {
|
||||||
multiTerms.clear();
|
multiTerms.clear();
|
||||||
}
|
}
|
||||||
position += positionIncrement;
|
position += positionIncrement;
|
||||||
//LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8
|
multiTerms.add(new Term(field, termToByteRef(termAtt)));
|
||||||
multiTerms.add(new Term(field, termToByteRef(termAtt, new BytesRef())));
|
|
||||||
}
|
}
|
||||||
if (enablePositionIncrements) {
|
if (enablePositionIncrements) {
|
||||||
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
|
mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
|
||||||
|
@ -344,7 +362,8 @@ public class MatchQuery {
|
||||||
return new TermQuery(term);
|
return new TermQuery(term);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static BytesRef termToByteRef(CharTermAttribute attr, BytesRef ref) {
|
private static BytesRef termToByteRef(CharTermAttribute attr) {
|
||||||
|
final BytesRef ref = new BytesRef();
|
||||||
UnicodeUtil.UTF16toUTF8(attr.buffer(), 0, attr.length(), ref);
|
UnicodeUtil.UTF16toUTF8(attr.buffer(), 0, attr.length(), ref);
|
||||||
return ref;
|
return ref;
|
||||||
}
|
}
|
||||||
|
|
|
@ -344,7 +344,7 @@ public class SearchServiceTransportAction extends AbstractComponent {
|
||||||
try {
|
try {
|
||||||
FetchSearchResult result = searchService.executeFetchPhase(request);
|
FetchSearchResult result = searchService.executeFetchPhase(request);
|
||||||
listener.onResult(result);
|
listener.onResult(result);
|
||||||
} catch (Exception e) {
|
} catch (Throwable e) {
|
||||||
listener.onFailure(e);
|
listener.onFailure(e);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -378,7 +378,7 @@ public class SearchServiceTransportAction extends AbstractComponent {
|
||||||
try {
|
try {
|
||||||
QuerySearchResult result = searchService.executeScan(request);
|
QuerySearchResult result = searchService.executeScan(request);
|
||||||
listener.onResult(result);
|
listener.onResult(result);
|
||||||
} catch (Exception e) {
|
} catch (Throwable e) {
|
||||||
listener.onFailure(e);
|
listener.onFailure(e);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -20,9 +20,14 @@
|
||||||
package org.elasticsearch.search.highlight;
|
package org.elasticsearch.search.highlight;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queries.XCommonTermsQuery;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.FilteredQuery;
|
import org.apache.lucene.search.FilteredQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.highlight.QueryScorer;
|
import org.apache.lucene.search.highlight.QueryScorer;
|
||||||
import org.apache.lucene.search.highlight.WeightedSpanTerm;
|
import org.apache.lucene.search.highlight.WeightedSpanTerm;
|
||||||
import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
|
import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
|
||||||
|
@ -31,6 +36,7 @@ import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery
|
||||||
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
|
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public final class CustomQueryScorer extends QueryScorer {
|
public final class CustomQueryScorer extends QueryScorer {
|
||||||
|
@ -97,6 +103,14 @@ public final class CustomQueryScorer extends QueryScorer {
|
||||||
} else if (query instanceof XFilteredQuery) {
|
} else if (query instanceof XFilteredQuery) {
|
||||||
query = ((XFilteredQuery) query).getQuery();
|
query = ((XFilteredQuery) query).getQuery();
|
||||||
extract(query, terms);
|
extract(query, terms);
|
||||||
|
} else if (query instanceof XCommonTermsQuery) {
|
||||||
|
XCommonTermsQuery ctq = ((XCommonTermsQuery)query);
|
||||||
|
List<Term> ctqTerms = ctq.terms();
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
for (Term term : ctqTerms) {
|
||||||
|
bq.add(new TermQuery(term), Occur.SHOULD);
|
||||||
|
}
|
||||||
|
extract(bq, terms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -948,6 +948,59 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||||
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long and has the <em>tag</em> token near the end"));
|
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long and has the <em>tag</em> token near the end"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCommonTermsQuery() {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (IndexMissingException e) {
|
||||||
|
// its ok
|
||||||
|
}
|
||||||
|
client.admin().indices().prepareCreate("test").execute().actionGet();
|
||||||
|
client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
|
||||||
|
|
||||||
|
client.prepareIndex("test", "type1")
|
||||||
|
.setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog")
|
||||||
|
.setRefresh(true).execute().actionGet();
|
||||||
|
|
||||||
|
logger.info("--> highlighting and searching on field1");
|
||||||
|
SearchSourceBuilder source = searchSource()
|
||||||
|
.query(commonTerms("field2", "quick brown").cutoffFrequency(100))
|
||||||
|
.from(0).size(60).explain(true)
|
||||||
|
.highlight(highlight().field("field2").order("score").preTags("<x>").postTags("</x>"));
|
||||||
|
|
||||||
|
SearchResponse searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
|
||||||
|
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
|
||||||
|
|
||||||
|
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCommonTermsTermVector() throws ElasticSearchException, IOException {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
client.admin().indices().prepareCreate("test").addMapping("type1", type1TermVectorMapping()).execute().actionGet();
|
||||||
|
client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
|
||||||
|
|
||||||
|
client.prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog")
|
||||||
|
.setRefresh(true).execute().actionGet();
|
||||||
|
|
||||||
|
logger.info("--> highlighting and searching on field1");
|
||||||
|
SearchSourceBuilder source = searchSource().query(commonTerms("field2", "quick brown").cutoffFrequency(100)).from(0).size(60)
|
||||||
|
.explain(true).highlight(highlight().field("field2").order("score").preTags("<x>").postTags("</x>"));
|
||||||
|
|
||||||
|
SearchResponse searchResponse = client.search(
|
||||||
|
searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
|
||||||
|
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
|
||||||
|
|
||||||
|
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
|
||||||
|
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPlainHighlightDifferentFragmenter() throws Exception {
|
public void testPlainHighlightDifferentFragmenter() throws Exception {
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -120,7 +120,7 @@ public class SimpleQueryTests extends AbstractNodesTests {
|
||||||
|
|
||||||
client.prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox").execute().actionGet();
|
client.prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox").execute().actionGet();
|
||||||
client.prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree").execute().actionGet();
|
client.prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree").execute().actionGet();
|
||||||
client.prepareIndex("test", "type1", "3").setSource("field1", "quick lazy huge brown").setRefresh(true).execute().actionGet();
|
client.prepareIndex("test", "type1", "3").setSource("field1", "quick lazy huge brown", "field2", "the quick lazy huge brown fox jumps over the tree").setRefresh(true).execute().actionGet();
|
||||||
|
|
||||||
SearchResponse searchResponse = client.prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the quick brown").cutoffFrequency(3)).execute().actionGet();
|
SearchResponse searchResponse = client.prepareSearch().setQuery(QueryBuilders.commonTerms("field1", "the quick brown").cutoffFrequency(3)).execute().actionGet();
|
||||||
assertThat(searchResponse.hits().totalHits(), equalTo(2l));
|
assertThat(searchResponse.hits().totalHits(), equalTo(2l));
|
||||||
|
@ -140,6 +140,32 @@ public class SimpleQueryTests extends AbstractNodesTests {
|
||||||
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||||
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("3"));
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("3"));
|
||||||
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("2"));
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("2"));
|
||||||
|
|
||||||
|
// try the same with match query
|
||||||
|
searchResponse = client.prepareSearch().setQuery(QueryBuilders.matchQuery("field1", "the quick brown").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.AND)).execute().actionGet();
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(2l));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
|
||||||
|
|
||||||
|
searchResponse = client.prepareSearch().setQuery(QueryBuilders.matchQuery("field1", "the quick brown").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.OR)).execute().actionGet();
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("2"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("3"));
|
||||||
|
|
||||||
|
searchResponse = client.prepareSearch().setQuery(QueryBuilders.matchQuery("field1", "the quick brown").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.AND).analyzer("standard")).execute().actionGet();
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
|
||||||
|
// standard drops "the" since its a stopword
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("1"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("3"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("2"));
|
||||||
|
|
||||||
|
// try the same with multi match query
|
||||||
|
searchResponse = client.prepareSearch().setQuery(QueryBuilders.multiMatchQuery("the quick brown", "field1", "field2").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.AND)).execute().actionGet();
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(3l));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[0].getId(), equalTo("3")); // better score due to different query stats
|
||||||
|
assertThat(searchResponse.getHits().getHits()[1].getId(), equalTo("1"));
|
||||||
|
assertThat(searchResponse.getHits().getHits()[2].getId(), equalTo("2"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue