From 85ec39d931c742b68cc1873da2fe17800eefda23 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Tue, 25 Jun 2019 00:10:08 -0400 Subject: [PATCH] SOLR-13367: Range queries will now highlight in hl.method=unified mode. Lucene MatchesUtils.disjunction method for disjunction over BytesRefIterator terms. --- .../org/apache/lucene/search/MatchesUtils.java | 11 +++++++++++ solr/CHANGES.txt | 2 ++ .../org/apache/solr/query/SolrRangeQuery.java | 15 +++++++++++++++ .../highlight/TestUnifiedSolrHighlighter.java | 5 +++++ solr/solr-ref-guide/src/highlighting.adoc | 11 +++++++---- 5 files changed, 40 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java b/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java index a8438aedd42..bf460b7b699 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchesUtils.java @@ -26,6 +26,8 @@ import java.util.Objects; import java.util.stream.Collectors; import java.util.stream.StreamSupport; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.IOSupplier; /** @@ -129,4 +131,13 @@ public final class MatchesUtils { public static MatchesIterator disjunction(List subMatches) throws IOException { return DisjunctionMatchesIterator.fromSubIterators(subMatches); } + + /** + * Create a MatchesIterator that is a disjunction over a list of terms extracted from a {@link BytesRefIterator}. + * + * Only terms that have at least one match in the given document will be included + */ + public static MatchesIterator disjunction(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException { + return DisjunctionMatchesIterator.fromTermsEnum(context, doc, query, field, terms); + } } diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 992c21880ed..14bdc71a109 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -144,6 +144,8 @@ New Features * SOLR-10291: Add matches Stream Evaluator to support regex matching (Joel Bernstein) +* SOLR-13367: Highlighting: Range queries will now highlight in hl.method=unified mode. (David Smiley) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java index 40a68e54e26..49e44d9b89b 100644 --- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java @@ -39,6 +39,8 @@ import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Matches; +import org.apache.lucene.search.MatchesUtils; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.ScoreMode; @@ -347,6 +349,19 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro this.scoreMode = scoreMode; } + // See MultiTermQueryConstantScoreWrapper matches() + @Override + public Matches matches(LeafReaderContext context, int doc) throws IOException { + SolrRangeQuery query = SolrRangeQuery.this; + final Terms terms = context.reader().terms(query.field); + if (terms == null) { + return null; + } + if (terms.hasPositions() == false) { + return super.matches(context, doc); + } + return MatchesUtils.forField(query.field, () -> MatchesUtils.disjunction(context, doc, query, query.field, query.getTermsEnum(context))); + } /** Try to collect terms from the given terms enum and return count=sum(df) for terms visited so far * or (-count - 1) if this should be rewritten into a boolean query. diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java index f03376d0d9a..a009e1f451c 100644 --- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java +++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java @@ -282,6 +282,11 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 { "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='Document one has a first <i>sentence</i>.'"); } + public void testRangeQuery() { + assertQ(req("q", "id:101", "hl", "true", "hl.q", "text:[dob TO doe]"), + "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1"); + } + public void testRequireFieldMatch() { // We highlight on field text3 (hl.fl), but our query only references the "text" field. Nonetheless, the query word // "document" is found in all fields here. diff --git a/solr/solr-ref-guide/src/highlighting.adoc b/solr/solr-ref-guide/src/highlighting.adoc index c792a916df8..7f9eef98d1e 100644 --- a/solr/solr-ref-guide/src/highlighting.adoc +++ b/solr/solr-ref-guide/src/highlighting.adoc @@ -156,13 +156,16 @@ We recommend that you try this highlighter even though it isn't the default (yet + The UH highlights a query very _accurately_ and thus is true to what the underlying Lucene query actually matches. Other highlighters highlight terms more liberally (over-highlight). +For esoteric/custom queries, this highlighter has a greater likelihood of supporting it than the others. ++ A strong benefit to this highlighter is that you can opt to configure Solr to put more information in the underlying index to speed up highlighting of large documents; multiple configurations are supported, even on a per-field basis. There is little or no such flexibility of offset sources for the other highlighters. More on this below. + -There are some reasons not to choose this highlighter: The `surround` query parser doesn't yet work here -- SOLR-12895. +There are some reasons not to choose this highlighter: Passage scoring does not consider boosts in the query. -Some people want more/better passage breaking flexibility. +Some users want more/better passage breaking flexibility. +The "alternate" fallback options are more primitive. <>:: (`hl.method=original`, the default) + @@ -181,8 +184,8 @@ The FastVector Highlighter _requires_ full term vector options (`termVectors`, ` This highlighter notably supports multi-colored highlighting such that different query words can be denoted in the fragment with different marking, usually expressed as an HTML tag with a unique color. + This highlighter's query-representation is less advanced than the Original or Unified Highlighters: for example it will not work well with the `surround` parser, and there are multiple reported bugs pertaining to queries with stop-words. -+ -Note that both the FastVector and Original Highlighters can be used in conjunction in a search request to highlight some fields with one and some the other. In contrast, the other highlighters can only be chosen exclusively. + +Both the FastVector and Original Highlighters can be used in conjunction in a search request to highlight some fields with one and some the other. In contrast, the Unified Highlighter can only be chosen exclusively. The Unified Highlighter is exclusively configured via search parameters. In contrast, some settings for the Original and FastVector Highlighters are set in `solrconfig.xml`. There's a robust example of the latter in the "```techproducts```" configset.