diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java index 964f039e543..0a5311937db 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java @@ -405,12 +405,10 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { } protected String getPreTag(String[] preTags, int num) { - int n = num % preTags.length; - return preTags[n]; + return preTags[num]; } protected String getPostTag(String[] postTags, int num) { - int n = num % postTags.length; - return postTags[n]; + return postTags[num]; } } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java index 2351e6c08f9..cd4317184a4 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java @@ -57,7 +57,9 @@ public class FieldQuery { // fieldMatch==false, Map Map> termSetMap = new HashMap<>(); - int termOrPhraseNumber; // used for colored tag support + // index of the original query term or phrase in the list of expanded terms or phrases + final Map queryIndexHighlights = new HashMap<>(); + int previousIndex = 0; // The maximum number of different matching terms accumulated from any one MultiTermQuery private static final int MAX_MTQ_TERMS = 1024; @@ -65,18 +67,23 @@ public class FieldQuery { public FieldQuery(Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch) throws IOException { this.fieldMatch = fieldMatch; - Set flatQueries = new LinkedHashSet<>(); - IndexSearcher searcher; - if (reader == null) { - searcher = null; - } else { - searcher = new IndexSearcher(reader); - } + final Set flatQueries = new LinkedHashSet<>(); + final IndexSearcher searcher = reader == null ? null : new IndexSearcher(reader); + + buildQueryIndexHighlights(query); flatten(query, searcher, flatQueries, 1f); saveTerms(flatQueries, searcher); Collection expandQueries = expand(flatQueries); for (Query flatQuery : expandQueries) { + int queryIndex; + if (this.queryIndexHighlights.containsKey(flatQuery.toString())) { + queryIndex = this.queryIndexHighlights.get(flatQuery.toString()); + previousIndex = queryIndex; + } else { + queryIndex = previousIndex; + } + QueryPhraseMap rootMap = getRootMap(flatQuery); rootMap.add(flatQuery, reader); float boost = 1f; @@ -88,12 +95,21 @@ public class FieldQuery { if (!phraseHighlight && flatQuery instanceof PhraseQuery) { PhraseQuery pq = (PhraseQuery) flatQuery; if (pq.getTerms().length > 1) { - for (Term term : pq.getTerms()) rootMap.addTerm(term, boost); + for (Term term : pq.getTerms()) rootMap.addTerm(term, boost, queryIndex); } } } } + private void buildQueryIndexHighlights(Query query) { + if (query instanceof BooleanQuery booleanQuery) { + final List clauses = booleanQuery.clauses(); + for (int i = 0; i < clauses.size(); i++) { + queryIndexHighlights.put(clauses.get(i).query().toString(), i); + } + } + } + /** * For backwards compatibility you can initialize FieldQuery without an IndexReader, which is only * required to support MultiTermQuery @@ -372,10 +388,6 @@ public class FieldQuery { return rootMaps.get(fieldMatch ? fieldName : null); } - int nextTermOrPhraseNumber() { - return termOrPhraseNumber++; - } - /** Internal structure of a query for highlighting: represents a nested query structure */ public static class QueryPhraseMap { @@ -390,9 +402,9 @@ public class FieldQuery { this.fieldQuery = fieldQuery; } - void addTerm(Term term, float boost) { + void addTerm(Term term, float boost, int queryIndex) { QueryPhraseMap map = getOrNewMap(subMap, term.text()); - map.markTerminal(boost); + map.markTerminal(boost, queryIndex); } private QueryPhraseMap getOrNewMap(Map subMap, String term) { @@ -405,6 +417,12 @@ public class FieldQuery { } void add(Query query, IndexReader reader) { + int highlightsLength = fieldQuery.queryIndexHighlights.size(); + int queryIndex = Math.min(fieldQuery.previousIndex + 1, highlightsLength - 1); + if (fieldQuery.queryIndexHighlights.containsKey(query.toString())) { + queryIndex = fieldQuery.queryIndexHighlights.get(query.toString()); + } + float boost = 1f; while (query instanceof BoostQuery) { BoostQuery bq = (BoostQuery) query; @@ -412,7 +430,7 @@ public class FieldQuery { boost = bq.getBoost(); } if (query instanceof TermQuery) { - addTerm(((TermQuery) query).getTerm(), boost); + addTerm(((TermQuery) query).getTerm(), boost, queryIndex); } else if (query instanceof PhraseQuery) { PhraseQuery pq = (PhraseQuery) query; Term[] terms = pq.getTerms(); @@ -422,7 +440,7 @@ public class FieldQuery { qpm = getOrNewMap(map, term.text()); map = qpm.subMap; } - qpm.markTerminal(pq.getSlop(), boost); + qpm.markTerminal(pq.getSlop(), boost, queryIndex); } else throw new RuntimeException("query \"" + query.toString() + "\" must be flatten first."); } @@ -431,15 +449,15 @@ public class FieldQuery { return subMap.get(term); } - private void markTerminal(float boost) { - markTerminal(0, boost); + private void markTerminal(float boost, int queryIndex) { + markTerminal(0, boost, queryIndex); } - private void markTerminal(int slop, float boost) { + private void markTerminal(int slop, float boost, int queryIndex) { this.terminal = true; this.slop = slop; this.boost = boost; - this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber(); + this.termOrPhraseNumber = queryIndex; } public boolean isTerminal() { diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/TestFieldQuery.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/TestFieldQuery.java index 6538c0d2985..03c259c349b 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/TestFieldQuery.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/TestFieldQuery.java @@ -23,6 +23,8 @@ import java.util.List; import java.util.Map; import java.util.Set; import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; @@ -36,6 +38,7 @@ import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap; import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; +import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.util.BytesRef; public class TestFieldQuery extends AbstractTestCase { @@ -953,4 +956,20 @@ public class TestFieldQuery extends AbstractTestCase { fq.flatten(query, searcher, flatQueries, 1f); assertCollectionQueries(flatQueries, tq(boost, "A")); } + + public void testTermOrPhraseNumberShouldBeSameAsOriginalQuerySize() throws IOException { + // Arrange + final String field = "field"; + final QueryParser queryParser = new QueryParser(field, new MockAnalyzer(random())); + final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); + queryBuilder.add(queryParser.createBooleanQuery(field, "A B"), Occur.SHOULD); + queryBuilder.add(queryParser.createPhraseQuery(field, "C B", 0), Occur.SHOULD); + queryBuilder.add(queryParser.createPhraseQuery(field, "C B", 2), Occur.SHOULD); + + // Act + final FieldQuery fieldQuery = new FieldQuery(queryBuilder.build(), true, true); + + // Assert + assertEquals(2, fieldQuery.previousIndex); + } }