mirror of https://github.com/apache/lucene.git
LUCENE-7520: WSTE shouldn't expand MTQ if its field doesn't match filter
This commit is contained in:
parent
ef5737466e
commit
e1b06938b4
|
@ -111,6 +111,10 @@ Improvements
|
|||
|
||||
* LUCENE-7496: Better toString for SweetSpotSimilarity (janhoy)
|
||||
|
||||
* LUCENE-7520: Highlighter's WeightedSpanTermExtractor shouldn't attempt to expand a MultiTermQuery
|
||||
when its field doesn't match the field the extraction is scoped to.
|
||||
(Cao Manh Dat via David Smiley)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7501: BKDReader should not store the split dimension explicitly in the
|
||||
|
|
|
@ -217,13 +217,14 @@ public class WeightedSpanTermExtractor {
|
|||
} else if (isQueryUnsupported(query.getClass())) {
|
||||
// nothing
|
||||
} else {
|
||||
if (query instanceof MultiTermQuery &&
|
||||
(!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery)query).getField()))) {
|
||||
return;
|
||||
}
|
||||
Query origQuery = query;
|
||||
final IndexReader reader = getLeafContext().reader();
|
||||
Query rewritten;
|
||||
if (query instanceof MultiTermQuery) {
|
||||
if (!expandMultiTermQuery) {
|
||||
return;
|
||||
}
|
||||
rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
|
||||
} else {
|
||||
rewritten = origQuery.rewrite(reader);
|
||||
|
@ -508,11 +509,7 @@ public class WeightedSpanTermExtractor {
|
|||
*/
|
||||
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, float boost, TokenStream tokenStream,
|
||||
String fieldName) throws IOException {
|
||||
if (fieldName != null) {
|
||||
this.fieldName = fieldName;
|
||||
} else {
|
||||
this.fieldName = null;
|
||||
}
|
||||
this.fieldName = fieldName;
|
||||
|
||||
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<>();
|
||||
this.tokenStream = tokenStream;
|
||||
|
|
|
@ -33,6 +33,7 @@ import java.util.StringTokenizer;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockPayloadAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
|
@ -1339,6 +1340,22 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
}
|
||||
|
||||
public void testNotRewriteMultiTermQuery() throws IOException {
|
||||
// field "bar": (not the field we ultimately want to extract)
|
||||
MultiTermQuery mtq = new TermRangeQuery("bar", new BytesRef("aa"), new BytesRef("zz"), true, true) ;
|
||||
WeightedSpanTermExtractor extractor = new WeightedSpanTermExtractor() {
|
||||
@Override
|
||||
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
|
||||
assertEquals(mtq, query);
|
||||
super.extract(query, boost, terms);
|
||||
}
|
||||
};
|
||||
extractor.setExpandMultiTermQuery(true);
|
||||
extractor.setMaxDocCharsToAnalyze(51200);
|
||||
extractor.getWeightedSpanTerms(
|
||||
mtq, 3, new CannedTokenStream(new Token("aa",0,2), new Token("bb", 2,4)), "foo"); // field "foo"
|
||||
}
|
||||
|
||||
public void testGetBestSingleFragmentWithWeights() throws Exception {
|
||||
|
||||
TestHighlightRunner helper = new TestHighlightRunner() {
|
||||
|
|
Loading…
Reference in New Issue