diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4b2f3bb6268..8936ff96dbe 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -114,6 +114,9 @@ Bug Fixes * LUCENE-7284: GapSpans needs to implement positionsCost(). (Daniel Bigham, Alan Woodward) +* LUCENE-7231: WeightedSpanTermExtractor didn't deal correctly with single-term + phrase queries. (Eva Popenda, Alan Woodward) + Documentation * LUCENE-7223: Improve XXXPoint javadocs to make it clear that you diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 16b1d7bba85..89cbd11cba9 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -115,24 +115,29 @@ public class WeightedSpanTermExtractor { } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); - SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; - for (int i = 0; i < phraseQueryTerms.length; i++) { - clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); + if (phraseQueryTerms.length == 1) { + extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost); } + else { + SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; + for (int i = 0; i < phraseQueryTerms.length; i++) { + clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); + } - // sum position increments beyond 1 - int positionGaps = 0; - int[] positions = phraseQuery.getPositions(); - if (positions.length >= 2) { - // positions are in increasing order. max(0,...) is just a safeguard. - positionGaps = Math.max(0, positions[positions.length-1] - positions[0] - positions.length + 1); + // sum position increments beyond 1 + int positionGaps = 0; + int[] positions = phraseQuery.getPositions(); + if (positions.length >= 2) { + // positions are in increasing order. max(0,...) is just a safeguard. + positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1); + } + + //if original slop is 0 then require inOrder + boolean inorder = (phraseQuery.getSlop() == 0); + + SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder); + extractWeightedSpanTerms(terms, sp, boost); } - - //if original slop is 0 then require inOrder - boolean inorder = (phraseQuery.getSlop() == 0); - - SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder); - extractWeightedSpanTerms(terms, sp, boost); } else if (query instanceof TermQuery) { extractWeightedTerms(terms, query, boost); } else if (query instanceof SpanQuery) { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 936d121fc69..0a034f1ac36 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -16,6 +16,8 @@ */ package org.apache.lucene.search.highlight; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -28,9 +30,6 @@ import java.util.List; import java.util.Map; import java.util.StringTokenizer; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.CachingTokenFilter; @@ -41,13 +40,14 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.ngram.NGramTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; @@ -93,6 +93,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.RegExp; +import org.junit.Test; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -1560,6 +1561,32 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte helper.start(); } + @Test + public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException { + + final Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new NGramTokenizer(4, 4)); + } + }; + final String fieldName = "substring"; + + final List list = new ArrayList<>(); + list.add(new BytesRef("uchu")); + final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()])); + + final QueryScorer fragmentScorer = new QueryScorer(query, fieldName); + final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("", ""); + + final Highlighter highlighter = new Highlighter(formatter, fragmentScorer); + highlighter.setTextFragmenter(new SimpleFragmenter(100)); + final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung"); + + assertEquals("Buchung",fragment); + + } + public void testUnRewrittenQuery() throws Exception { final TestHighlightRunner helper = new TestHighlightRunner() {