mirror of https://github.com/apache/lucene.git
SOLR-13367: Range queries will now highlight in hl.method=unified mode.
Lucene MatchesUtils.disjunction method for disjunction over BytesRefIterator terms.
This commit is contained in:
parent
3ef5c0ee74
commit
5259e964b5
|
@ -26,6 +26,8 @@ import java.util.Objects;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
import org.apache.lucene.util.IOSupplier;
|
import org.apache.lucene.util.IOSupplier;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -129,4 +131,13 @@ public final class MatchesUtils {
|
||||||
public static MatchesIterator disjunction(List<MatchesIterator> subMatches) throws IOException {
|
public static MatchesIterator disjunction(List<MatchesIterator> subMatches) throws IOException {
|
||||||
return DisjunctionMatchesIterator.fromSubIterators(subMatches);
|
return DisjunctionMatchesIterator.fromSubIterators(subMatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a MatchesIterator that is a disjunction over a list of terms extracted from a {@link BytesRefIterator}.
|
||||||
|
*
|
||||||
|
* Only terms that have at least one match in the given document will be included
|
||||||
|
*/
|
||||||
|
public static MatchesIterator disjunction(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
|
||||||
|
return DisjunctionMatchesIterator.fromTermsEnum(context, doc, query, field, terms);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -103,6 +103,8 @@ New Features
|
||||||
|
|
||||||
* SOLR-10291: Add matches Stream Evaluator to support regex matching (Joel Bernstein)
|
* SOLR-10291: Add matches Stream Evaluator to support regex matching (Joel Bernstein)
|
||||||
|
|
||||||
|
* SOLR-13367: Highlighting: Range queries will now highlight in hl.method=unified mode. (David Smiley)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,8 @@ import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Matches;
|
||||||
|
import org.apache.lucene.search.MatchesUtils;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.QueryVisitor;
|
import org.apache.lucene.search.QueryVisitor;
|
||||||
import org.apache.lucene.search.ScoreMode;
|
import org.apache.lucene.search.ScoreMode;
|
||||||
|
@ -347,6 +349,19 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
|
||||||
this.scoreMode = scoreMode;
|
this.scoreMode = scoreMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See MultiTermQueryConstantScoreWrapper matches()
|
||||||
|
@Override
|
||||||
|
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||||
|
SolrRangeQuery query = SolrRangeQuery.this;
|
||||||
|
final Terms terms = context.reader().terms(query.field);
|
||||||
|
if (terms == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (terms.hasPositions() == false) {
|
||||||
|
return super.matches(context, doc);
|
||||||
|
}
|
||||||
|
return MatchesUtils.forField(query.field, () -> MatchesUtils.disjunction(context, doc, query, query.field, query.getTermsEnum(context)));
|
||||||
|
}
|
||||||
|
|
||||||
/** Try to collect terms from the given terms enum and return count=sum(df) for terms visited so far
|
/** Try to collect terms from the given terms enum and return count=sum(df) for terms visited so far
|
||||||
* or (-count - 1) if this should be rewritten into a boolean query.
|
* or (-count - 1) if this should be rewritten into a boolean query.
|
||||||
|
|
|
@ -282,6 +282,11 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
|
||||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRangeQuery() {
|
||||||
|
assertQ(req("q", "id:101", "hl", "true", "hl.q", "text:[dob TO doe]"),
|
||||||
|
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1");
|
||||||
|
}
|
||||||
|
|
||||||
public void testRequireFieldMatch() {
|
public void testRequireFieldMatch() {
|
||||||
// We highlight on field text3 (hl.fl), but our query only references the "text" field. Nonetheless, the query word
|
// We highlight on field text3 (hl.fl), but our query only references the "text" field. Nonetheless, the query word
|
||||||
// "document" is found in all fields here.
|
// "document" is found in all fields here.
|
||||||
|
|
|
@ -156,13 +156,16 @@ We recommend that you try this highlighter even though it isn't the default (yet
|
||||||
+
|
+
|
||||||
The UH highlights a query very _accurately_ and thus is true to what the underlying Lucene query actually matches.
|
The UH highlights a query very _accurately_ and thus is true to what the underlying Lucene query actually matches.
|
||||||
Other highlighters highlight terms more liberally (over-highlight).
|
Other highlighters highlight terms more liberally (over-highlight).
|
||||||
|
For esoteric/custom queries, this highlighter has a greater likelihood of supporting it than the others.
|
||||||
|
+
|
||||||
A strong benefit to this highlighter is that you can opt to configure Solr to put more information in the underlying index to speed up highlighting of large documents; multiple configurations are supported, even on a per-field basis.
|
A strong benefit to this highlighter is that you can opt to configure Solr to put more information in the underlying index to speed up highlighting of large documents; multiple configurations are supported, even on a per-field basis.
|
||||||
There is little or no such flexibility of offset sources for the other highlighters.
|
There is little or no such flexibility of offset sources for the other highlighters.
|
||||||
More on this below.
|
More on this below.
|
||||||
+
|
+
|
||||||
There are some reasons not to choose this highlighter: The `surround` query parser doesn't yet work here -- SOLR-12895.
|
There are some reasons not to choose this highlighter:
|
||||||
Passage scoring does not consider boosts in the query.
|
Passage scoring does not consider boosts in the query.
|
||||||
Some people want more/better passage breaking flexibility.
|
Some users want more/better passage breaking flexibility.
|
||||||
|
The "alternate" fallback options are more primitive.
|
||||||
|
|
||||||
<<The Original Highlighter,Original Highlighter>>:: (`hl.method=original`, the default)
|
<<The Original Highlighter,Original Highlighter>>:: (`hl.method=original`, the default)
|
||||||
+
|
+
|
||||||
|
@ -181,8 +184,8 @@ The FastVector Highlighter _requires_ full term vector options (`termVectors`, `
|
||||||
This highlighter notably supports multi-colored highlighting such that different query words can be denoted in the fragment with different marking, usually expressed as an HTML tag with a unique color.
|
This highlighter notably supports multi-colored highlighting such that different query words can be denoted in the fragment with different marking, usually expressed as an HTML tag with a unique color.
|
||||||
+
|
+
|
||||||
This highlighter's query-representation is less advanced than the Original or Unified Highlighters: for example it will not work well with the `surround` parser, and there are multiple reported bugs pertaining to queries with stop-words.
|
This highlighter's query-representation is less advanced than the Original or Unified Highlighters: for example it will not work well with the `surround` parser, and there are multiple reported bugs pertaining to queries with stop-words.
|
||||||
+
|
|
||||||
Note that both the FastVector and Original Highlighters can be used in conjunction in a search request to highlight some fields with one and some the other. In contrast, the other highlighters can only be chosen exclusively.
|
Both the FastVector and Original Highlighters can be used in conjunction in a search request to highlight some fields with one and some the other. In contrast, the Unified Highlighter can only be chosen exclusively.
|
||||||
|
|
||||||
|
|
||||||
The Unified Highlighter is exclusively configured via search parameters. In contrast, some settings for the Original and FastVector Highlighters are set in `solrconfig.xml`. There's a robust example of the latter in the "```techproducts```" configset.
|
The Unified Highlighter is exclusively configured via search parameters. In contrast, some settings for the Original and FastVector Highlighters are set in `solrconfig.xml`. There's a robust example of the latter in the "```techproducts```" configset.
|
||||||
|
|
Loading…
Reference in New Issue