IndexOrDocValuesQuery in query highlighting (#13902)

Signed-off-by: Prudhvi Godithi <pgodithi@amazon.com>
This commit is contained in:
Prudhvi Godithi 2024-10-14 05:19:24 -07:00 committed by Adrien Grand
parent e99db4e954
commit 0651e69fb2
3 changed files with 30 additions and 0 deletions

View File

@ -28,6 +28,7 @@ Bug Fixes
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
when they were not sorted by startOffset. (Seunghan Jung)
* GITHUB#13884: Remove broken .toArray from Long/CharObjectHashMap entirely. (Pan Guixin)
* GITHUB#12686: Added support for highlighting IndexOrDocValuesQuery. (Prudhvi Godithi)
Build
---------------------

View File

@ -54,6 +54,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
@ -163,6 +164,11 @@ public class WeightedSpanTermExtractor {
new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
} else if (query instanceof IndexOrDocValuesQuery) {
Query indexQuery = ((IndexOrDocValuesQuery) query).getIndexQuery();
if (indexQuery != null) {
extract(indexQuery, boost, terms);
}
} else if (query instanceof TermQuery || query instanceof SynonymQuery) {
extractWeightedTerms(terms, query, boost);
} else if (query instanceof SpanQuery) {

View File

@ -59,12 +59,14 @@ import org.apache.lucene.queries.spans.SpanNotQuery;
import org.apache.lucene.queries.spans.SpanOrQuery;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
@ -255,6 +257,27 @@ public class TestHighlighter extends BaseTokenStreamTestCase implements Formatte
assertEquals("John <B>Kennedy</B> has been shot", fragment);
}
public void testHighlightingIndexOrDocValuesQuery() throws Exception {
searcher = newSearcher(reader);
BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
booleanQueryBuilder.add(new TermQuery(new Term(FIELD_NAME, "jfk")), BooleanClause.Occur.SHOULD);
booleanQueryBuilder.add(
new TermQuery(new Term(FIELD_NAME, "kennedy")), BooleanClause.Occur.SHOULD);
Query indexQuery = booleanQueryBuilder.build();
Query dvQuery = TermRangeQuery.newStringRange(FIELD_NAME, "a", "z", true, true);
Query query = new IndexOrDocValuesQuery(indexQuery, dvQuery);
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(scorer);
TokenStream stream = getAnyTokenStream(FIELD_NAME, 2);
String storedField = searcher.storedFields().document(2).get(FIELD_NAME);
String fragment = highlighter.getBestFragment(stream, storedField);
assertEquals("<B>JFK</B> has been shot", fragment);
stream = getAnyTokenStream(FIELD_NAME, 3);
storedField = searcher.storedFields().document(3).get(FIELD_NAME);
fragment = highlighter.getBestFragment(stream, storedField);
assertEquals("John <B>Kennedy</B> has been shot", fragment);
}
public void testHighlightUnknownQueryAfterRewrite()
throws IOException, InvalidTokenOffsetsException {
Query query =