diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 50b1cf397ec..a3af603257e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -123,6 +123,10 @@ Changes in Runtime Behavior
* LUCENE-8505: IndexWriter#addIndices will now fail if the target index is sorted but
the candidate is not. (Jim Ferenczi)
+* LUCENE-8535: Highlighter and FVH doesn't support ToParent and ToChildBlockJoinQuery out of the
+ box anymore. In order to highlight on Block-Join Queries a custom WeightedSpanTermExtractor / FieldQuery
+ should be used. (Simon Willnauer, Jim Ferenczi, Julie Tibshiran)
+
New Features
* LUCENE-8340: LongPoint#newDistanceQuery may be used to boost scores based on
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index 1b56b6465d6..0515ff8123c 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -140,3 +140,8 @@ a LowerCaseFilter
CharTokenizer now only performs tokenization. To perform any type of filtering
use a TokenFilter chain as you would with any other Tokenizer.
+
+## Highlighter and FastVectorHighlighter no longer support ToParent/ToChildBlockJoinQuery
+
+Both Highlighter and FastVectorHighlighter need a custom WeightedSpanTermExtractor or FieldQuery respectively
+in order to support ToParent/ToChildBlockJoinQuery.
diff --git a/lucene/highlighter/build.xml b/lucene/highlighter/build.xml
index ed6cc74be8a..6ecd793c9ed 100644
--- a/lucene/highlighter/build.xml
+++ b/lucene/highlighter/build.xml
@@ -31,14 +31,12 @@
-
-
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
index 25d9b3c7e5a..5452fc7e830 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
@@ -229,8 +229,7 @@ public class QueryScorer implements Scorer {
}
protected WeightedSpanTermExtractor newTermExtractor(String defaultField) {
- return defaultField == null ? new WeightedSpanTermExtractor()
- : new WeightedSpanTermExtractor(defaultField);
+ return new WeightedSpanTermExtractor(defaultField);
}
/*
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
index e0655c5ee58..bc026210c21 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@@ -54,8 +54,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.join.ToChildBlockJoinQuery;
-import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
@@ -71,6 +69,31 @@ import org.apache.lucene.util.IOUtils;
/**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
* {@link Term}s from the {@link Query} are contained in a supplied {@link TokenStream}.
+ *
+ * In order to support additional, by default unsupported queries, subclasses can override
+ * {@link #extract(Query, float, Map)} for extracting wrapped or delegate queries and
+ * {@link #extractUnknownQuery(Query, Map)} to process custom leaf queries:
+ *
+ *
+ * WeightedSpanTermExtractor extractor = new WeightedSpanTermExtractor() {
+ * protected void extract(Query query, float boost, Map<String, WeightedSpanTerm>terms) throws IOException {
+ * if (query instanceof QueryWrapper) {
+ * extract(((QueryWrapper)query).getQuery(), boost, terms);
+ * } else {
+ * super.extract(query, boost, terms);
+ * }
+ * }
+ *
+ * protected void extractUnknownQuery(Query query, Map<String, WeightedSpanTerm> terms) throws IOException {
+ * if (query instanceOf CustomTermQuery) {
+ * Term term = ((CustomTermQuery) query).getTerm();
+ * terms.put(term.field(), new WeightedSpanTerm(1, term.text()));
+ * }
+ * }
+ * };
+ * }
+ *
+ *
*/
public class WeightedSpanTermExtractor {
@@ -85,12 +108,11 @@ public class WeightedSpanTermExtractor {
private LeafReader internalReader = null;
public WeightedSpanTermExtractor() {
+ this(null);
}
public WeightedSpanTermExtractor(String defaultField) {
- if (defaultField != null) {
- this.defaultField = defaultField;
- }
+ this.defaultField = defaultField;
}
/**
@@ -154,10 +176,6 @@ public class WeightedSpanTermExtractor {
for (Query clause : ((DisjunctionMaxQuery) query)) {
extract(clause, boost, terms);
}
- } else if (query instanceof ToParentBlockJoinQuery) {
- extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
- } else if (query instanceof ToChildBlockJoinQuery) {
- extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final Term[][] termArrays = mpq.getTermArrays();
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
index 4845de69e70..ac7e9d0a3c4 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
@@ -31,8 +31,8 @@ import org.apache.lucene.search.highlight.Encoder;
public class FastVectorHighlighter {
public static final boolean DEFAULT_PHRASE_HIGHLIGHT = true;
public static final boolean DEFAULT_FIELD_MATCH = true;
- private final boolean phraseHighlight;
- private final boolean fieldMatch;
+ protected final boolean phraseHighlight;
+ protected final boolean fieldMatch;
private final FragListBuilder fragListBuilder;
private final FragmentsBuilder fragmentsBuilder;
private int phraseLimit = Integer.MAX_VALUE;
@@ -80,7 +80,7 @@ public class FastVectorHighlighter {
// TODO: should we deprecate this?
// because if there is no reader, then we cannot rewrite MTQ.
try {
- return new FieldQuery( query, null, phraseHighlight, fieldMatch );
+ return getFieldQuery(query, null);
} catch (IOException e) {
// should never be thrown when reader is null
throw new RuntimeException (e);
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
index cb502343c89..8584b404bbb 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
@@ -38,7 +38,6 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
/**
@@ -62,7 +61,7 @@ public class FieldQuery {
// The maximum number of different matching terms accumulated from any one MultiTermQuery
private static final int MAX_MTQ_TERMS = 1024;
- FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException {
+ public FieldQuery(Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch) throws IOException {
this.fieldMatch = fieldMatch;
Set flatQueries = new LinkedHashSet<>();
flatten( query, reader, flatQueries, 1f );
@@ -95,7 +94,7 @@ public class FieldQuery {
this (query, null, phraseHighlight, fieldMatch);
}
- void flatten( Query sourceQuery, IndexReader reader, Collection flatQueries, float boost ) throws IOException{
+ protected void flatten( Query sourceQuery, IndexReader reader, Collection flatQueries, float boost ) throws IOException {
while (sourceQuery instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) sourceQuery;
sourceQuery = bq.getQuery();
@@ -141,15 +140,10 @@ public class FieldQuery {
flatten( q, reader, flatQueries, boost);
}
} else if (sourceQuery instanceof FunctionScoreQuery) {
- final Query q = ((FunctionScoreQuery)sourceQuery).getWrappedQuery();
+ final Query q = ((FunctionScoreQuery) sourceQuery).getWrappedQuery();
if (q != null) {
flatten(q, reader, flatQueries, boost);
}
- } else if (sourceQuery instanceof ToParentBlockJoinQuery) {
- Query childQuery = ((ToParentBlockJoinQuery) sourceQuery).getChildQuery();
- if (childQuery != null) {
- flatten(childQuery, reader, flatQueries, boost);
- }
} else if (reader != null) {
Query query = sourceQuery;
Query rewritten;
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
index 89c81db01dc..898846fa43c 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@@ -81,11 +81,6 @@ import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
-import org.apache.lucene.search.join.BitSetProducer;
-import org.apache.lucene.search.join.QueryBitSetProducer;
-import org.apache.lucene.search.join.ScoreMode;
-import org.apache.lucene.search.join.ToChildBlockJoinQuery;
-import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
@@ -628,61 +623,6 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
- public void testToParentBlockJoinQuery() throws Exception {
- BitSetProducer parentFilter = new QueryBitSetProducer(
- new TermQuery(new Term(FIELD_NAME, "parent")));
-
- query = new ToParentBlockJoinQuery(new TermQuery(new Term(FIELD_NAME, "child")),
- parentFilter, ScoreMode.None);
- searcher = newSearcher(reader);
- hits = searcher.search(query, 100);
- int maxNumFragmentsRequired = 2;
-
- QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
- Highlighter highlighter = new Highlighter(this, scorer);
-
- for (int i = 0; i < hits.totalHits.value; i++) {
- String text = "child document";
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
-
- highlighter.setTextFragmenter(new SimpleFragmenter(40));
- highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
- }
-
- assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
- numHighlights == 1);
- }
-
- public void testToChildBlockJoinQuery() throws Exception {
- BitSetProducer parentFilter = new QueryBitSetProducer(
- new TermQuery(new Term(FIELD_NAME, "parent")));
-
- BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
- booleanQuery.add(new ToChildBlockJoinQuery(new TermQuery(
- new Term(FIELD_NAME, "parent")), parentFilter), Occur.MUST);
- booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "child")), Occur.MUST);
- query = booleanQuery.build();
-
- searcher = newSearcher(reader);
- hits = searcher.search(query, 100);
- int maxNumFragmentsRequired = 2;
-
- QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
- Highlighter highlighter = new Highlighter(this, scorer);
-
- for (int i = 0; i < hits.totalHits.value; i++) {
- String text = "parent document";
- final int docId = hits.scoreDocs[i].doc;
- TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
-
- highlighter.setTextFragmenter(new SimpleFragmenter(40));
- highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
- }
-
- assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
- numHighlights == 1);
- }
-
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
PhraseQuery phraseQuery = new PhraseQuery(5, FIELD_NAME, "text", "piece", "long");
doSearching(phraseQuery);
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
index 6b1e49d58bd..eb888f6e891 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
@@ -27,16 +27,12 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
-import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.search.join.QueryBitSetProducer;
-import org.apache.lucene.search.join.ScoreMode;
-import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
import org.apache.lucene.util.BytesRef;
@@ -954,15 +950,4 @@ public class FieldQueryTest extends AbstractTestCase {
fq.flatten( query, reader, flatQueries, 1f );
assertCollectionQueries( flatQueries, tq( boost, "A" ) );
}
-
- public void testFlattenToParentBlockJoinQuery() throws Exception {
- initBoost();
- Query childQuery = tq(boost, "a");
- Query query = new ToParentBlockJoinQuery(childQuery, new QueryBitSetProducer(new MatchAllDocsQuery()), ScoreMode.None);
- FieldQuery fq = new FieldQuery(query, true, true );
- Set flatQueries = new HashSet<>();
- fq.flatten(query, reader, flatQueries, 1f);
- assertCollectionQueries(flatQueries, tq(boost, "a"));
- }
-
}
diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
index b8fbb255b58..088170dff5e 100644
--- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
@@ -19,6 +19,7 @@ package org.apache.solr.highlight;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@@ -52,6 +53,10 @@ import org.apache.lucene.search.highlight.QueryTermScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
+import org.apache.lucene.search.highlight.WeightedSpanTerm;
+import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
+import org.apache.lucene.search.join.ToChildBlockJoinQuery;
+import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
@@ -238,7 +243,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
*/
protected QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) {
QueryScorer scorer = new QueryScorer(query,
- request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false) ? fieldName : null);
+ request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false) ? fieldName : null) {
+ @Override
+ protected WeightedSpanTermExtractor newTermExtractor(String defaultField) {
+ return new CustomSpanTermExtractor(defaultField);
+ }
+ };
scorer.setExpandMultiTermQuery(request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true));
boolean defaultPayloads = true;//overwritten below
@@ -256,6 +266,24 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return scorer;
}
+ private static class CustomSpanTermExtractor extends WeightedSpanTermExtractor {
+ public CustomSpanTermExtractor(String defaultField) {
+ super(defaultField);
+ }
+
+ @Override
+ protected void extract(Query query, float boost, Map terms) throws IOException {
+ // these queries are not supported in lucene highlighting out of the box since 8.0
+ if (query instanceof ToParentBlockJoinQuery) {
+ extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
+ } else if (query instanceof ToChildBlockJoinQuery) {
+ extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
+ } else {
+ super.extract(query, boost, terms);
+ }
+ }
+ }
+
/**
* Return a {@link org.apache.lucene.search.highlight.Scorer} suitable for this Query and field.
* @param query The current query
@@ -469,7 +497,24 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
// FVH cannot process hl.requireFieldMatch parameter per-field basis
- params.getBool(HighlightParams.FIELD_MATCH, false));
+ params.getBool(HighlightParams.FIELD_MATCH, false)) {
+ @Override
+ public FieldQuery getFieldQuery(Query query, IndexReader reader) throws IOException {
+ return new FieldQuery(query, reader, phraseHighlight, fieldMatch) {
+ @Override
+ protected void flatten(Query sourceQuery, IndexReader reader, Collection flatQueries, float boost) throws IOException {
+ if (sourceQuery instanceof ToParentBlockJoinQuery) {
+ Query childQuery = ((ToParentBlockJoinQuery) sourceQuery).getChildQuery();
+ if (childQuery != null) {
+ flatten(childQuery, reader, flatQueries, boost);
+ }
+ } else {
+ super.flatten(sourceQuery, reader, flatQueries, boost);
+ }
+ }
+ };
+ }
+ };
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT));
fvhContainer.fvh = fvh;
fvhContainer.fieldQuery = fvh.getFieldQuery(query, reader);