LUCENE-7520: WSTE shouldn't expand MTQ if its field doesn't match filter

(cherry picked from commit e1b0693)
This commit is contained in:
David Smiley 2016-10-24 09:31:55 -04:00
parent ed203978fc
commit 167013c441
3 changed files with 26 additions and 8 deletions

View File

@ -63,6 +63,10 @@ Improvements
* LUCENE-7496: Better toString for SweetSpotSimilarity (janhoy)
* LUCENE-7520: Highlighter's WeightedSpanTermExtractor shouldn't attempt to expand a MultiTermQuery
when its field doesn't match the field the extraction is scoped to.
(Cao Manh Dat via David Smiley)
Optimizations
* LUCENE-7501: BKDReader should not store the split dimension explicitly in the

View File

@ -217,13 +217,14 @@ public class WeightedSpanTermExtractor {
} else if (isQueryUnsupported(query.getClass())) {
// nothing
} else {
if (query instanceof MultiTermQuery &&
(!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery)query).getField()))) {
return;
}
Query origQuery = query;
final IndexReader reader = getLeafContext().reader();
Query rewritten;
if (query instanceof MultiTermQuery) {
if (!expandMultiTermQuery) {
return;
}
rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
} else {
rewritten = origQuery.rewrite(reader);
@ -513,11 +514,7 @@ public class WeightedSpanTermExtractor {
*/
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, float boost, TokenStream tokenStream,
String fieldName) throws IOException {
if (fieldName != null) {
this.fieldName = fieldName;
} else {
this.fieldName = null;
}
this.fieldName = fieldName;
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<>();
this.tokenStream = tokenStream;

View File

@ -33,6 +33,7 @@ import java.util.StringTokenizer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockPayloadAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
@ -1339,6 +1340,22 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testNotRewriteMultiTermQuery() throws IOException {
// field "bar": (not the field we ultimately want to extract)
MultiTermQuery mtq = new TermRangeQuery("bar", new BytesRef("aa"), new BytesRef("zz"), true, true) ;
WeightedSpanTermExtractor extractor = new WeightedSpanTermExtractor() {
@Override
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
assertEquals(mtq, query);
super.extract(query, boost, terms);
}
};
extractor.setExpandMultiTermQuery(true);
extractor.setMaxDocCharsToAnalyze(51200);
extractor.getWeightedSpanTerms(
mtq, 3, new CannedTokenStream(new Token("aa",0,2), new Token("bb", 2,4)), "foo"); // field "foo"
}
public void testGetBestSingleFragmentWithWeights() throws Exception {
TestHighlightRunner helper = new TestHighlightRunner() {