mirror of https://github.com/apache/lucene.git
SOLR-13367: Range queries will now highlight in hl.method=unified mode.
Lucene MatchesUtils.disjunction method for disjunction over BytesRefIterator terms.
This commit is contained in:
parent
3ef5c0ee74
commit
5259e964b5
|
@ -26,6 +26,8 @@ import java.util.Objects;
|
|||
import java.util.stream.Collectors;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IOSupplier;
|
||||
|
||||
/**
|
||||
|
@ -129,4 +131,13 @@ public final class MatchesUtils {
|
|||
public static MatchesIterator disjunction(List<MatchesIterator> subMatches) throws IOException {
|
||||
return DisjunctionMatchesIterator.fromSubIterators(subMatches);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a MatchesIterator that is a disjunction over a list of terms extracted from a {@link BytesRefIterator}.
|
||||
*
|
||||
* Only terms that have at least one match in the given document will be included
|
||||
*/
|
||||
public static MatchesIterator disjunction(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
|
||||
return DisjunctionMatchesIterator.fromTermsEnum(context, doc, query, field, terms);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -103,6 +103,8 @@ New Features
|
|||
|
||||
* SOLR-10291: Add matches Stream Evaluator to support regex matching (Joel Bernstein)
|
||||
|
||||
* SOLR-13367: Highlighting: Range queries will now highlight in hl.method=unified mode. (David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -39,6 +39,8 @@ import org.apache.lucene.search.ConstantScoreWeight;
|
|||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Matches;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
|
@ -347,6 +349,19 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
|
|||
this.scoreMode = scoreMode;
|
||||
}
|
||||
|
||||
// See MultiTermQueryConstantScoreWrapper matches()
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
SolrRangeQuery query = SolrRangeQuery.this;
|
||||
final Terms terms = context.reader().terms(query.field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
if (terms.hasPositions() == false) {
|
||||
return super.matches(context, doc);
|
||||
}
|
||||
return MatchesUtils.forField(query.field, () -> MatchesUtils.disjunction(context, doc, query, query.field, query.getTermsEnum(context)));
|
||||
}
|
||||
|
||||
/** Try to collect terms from the given terms enum and return count=sum(df) for terms visited so far
|
||||
* or (-count - 1) if this should be rewritten into a boolean query.
|
||||
|
|
|
@ -282,6 +282,11 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
|
|||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||
}
|
||||
|
||||
public void testRangeQuery() {
|
||||
assertQ(req("q", "id:101", "hl", "true", "hl.q", "text:[dob TO doe]"),
|
||||
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1");
|
||||
}
|
||||
|
||||
public void testRequireFieldMatch() {
|
||||
// We highlight on field text3 (hl.fl), but our query only references the "text" field. Nonetheless, the query word
|
||||
// "document" is found in all fields here.
|
||||
|
|
|
@ -156,13 +156,16 @@ We recommend that you try this highlighter even though it isn't the default (yet
|
|||
+
|
||||
The UH highlights a query very _accurately_ and thus is true to what the underlying Lucene query actually matches.
|
||||
Other highlighters highlight terms more liberally (over-highlight).
|
||||
For esoteric/custom queries, this highlighter has a greater likelihood of supporting it than the others.
|
||||
+
|
||||
A strong benefit to this highlighter is that you can opt to configure Solr to put more information in the underlying index to speed up highlighting of large documents; multiple configurations are supported, even on a per-field basis.
|
||||
There is little or no such flexibility of offset sources for the other highlighters.
|
||||
More on this below.
|
||||
+
|
||||
There are some reasons not to choose this highlighter: The `surround` query parser doesn't yet work here -- SOLR-12895.
|
||||
There are some reasons not to choose this highlighter:
|
||||
Passage scoring does not consider boosts in the query.
|
||||
Some people want more/better passage breaking flexibility.
|
||||
Some users want more/better passage breaking flexibility.
|
||||
The "alternate" fallback options are more primitive.
|
||||
|
||||
<<The Original Highlighter,Original Highlighter>>:: (`hl.method=original`, the default)
|
||||
+
|
||||
|
@ -181,8 +184,8 @@ The FastVector Highlighter _requires_ full term vector options (`termVectors`, `
|
|||
This highlighter notably supports multi-colored highlighting such that different query words can be denoted in the fragment with different marking, usually expressed as an HTML tag with a unique color.
|
||||
+
|
||||
This highlighter's query-representation is less advanced than the Original or Unified Highlighters: for example it will not work well with the `surround` parser, and there are multiple reported bugs pertaining to queries with stop-words.
|
||||
+
|
||||
Note that both the FastVector and Original Highlighters can be used in conjunction in a search request to highlight some fields with one and some the other. In contrast, the other highlighters can only be chosen exclusively.
|
||||
|
||||
Both the FastVector and Original Highlighters can be used in conjunction in a search request to highlight some fields with one and some the other. In contrast, the Unified Highlighter can only be chosen exclusively.
|
||||
|
||||
|
||||
The Unified Highlighter is exclusively configured via search parameters. In contrast, some settings for the Original and FastVector Highlighters are set in `solrconfig.xml`. There's a robust example of the latter in the "```techproducts```" configset.
|
||||
|
|
Loading…
Reference in New Issue