SOLR-9708: Added UnifiedSolrHighlighter. Added hl.method=original|fastVector|postings|unified

(cherry picked from commit 4314c51)
This commit is contained in:
David Smiley 2016-11-24 19:36:07 -05:00
parent 8201ddc3b3
commit a089bf2827
11 changed files with 870 additions and 92 deletions

View File

@ -51,6 +51,13 @@ Upgrade Notes
replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond" replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond"
and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr. and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr.
* SOLR-9708: You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It
might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the
original Highlighter. That said, some options aren't supported yet, notably hl.fragsize and
hl.requireFieldMatch=false. It will get more features in time, especially with your input. See HighlightParams.java
for a listing of highlight parameters annotated with which highlighters use them.
hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector.
New Features New Features
---------------------- ----------------------
* SOLR-9293: Solrj client support for hierarchical clusters and other topics * SOLR-9293: Solrj client support for hierarchical clusters and other topics
@ -83,6 +90,12 @@ New Features
* SOLR-9721: javabin Tuple parser for streaming and other end points (noble) * SOLR-9721: javabin Tuple parser for streaming and other end points (noble)
* SOLR-9708: Added UnifiedSolrHighlighter, a highlighter adapter for Lucene's UnifiedHighlighter. The adapter is a
derivative of the PostingsSolrHighlighter, supporting mostly the same parameters with some differences.
Introduced "hl.method" parameter which can be set to original|fastVector|postings|unified to pick the highlighter at
runtime without the need to modify solrconfig from the default configuration. hl.useFastVectorHighlighter is now
considered deprecated in lieu of hl.method=fastVector. (Timothy Rodriguez, David Smiley)
Optimizations Optimizations
---------------------- ----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have

View File

@ -16,6 +16,14 @@
*/ */
package org.apache.solr.handler.component; package org.apache.solr.handler.component;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Stream;
import com.google.common.base.Objects; import com.google.common.base.Objects;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
@ -29,6 +37,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.highlight.DefaultSolrHighlighter; import org.apache.solr.highlight.DefaultSolrHighlighter;
import org.apache.solr.highlight.PostingsSolrHighlighter; import org.apache.solr.highlight.PostingsSolrHighlighter;
import org.apache.solr.highlight.SolrHighlighter; import org.apache.solr.highlight.SolrHighlighter;
import org.apache.solr.highlight.UnifiedSolrHighlighter;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin; import org.apache.solr.search.QParserPlugin;
@ -38,9 +47,7 @@ import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.plugin.PluginInfoInitialized; import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
import java.io.IOException; import static java.util.stream.Collectors.toMap;
import java.net.URL;
import java.util.List;
/** /**
* TODO! * TODO!
@ -50,13 +57,50 @@ import java.util.List;
*/ */
public class HighlightComponent extends SearchComponent implements PluginInfoInitialized, SolrCoreAware public class HighlightComponent extends SearchComponent implements PluginInfoInitialized, SolrCoreAware
{ {
public static final String COMPONENT_NAME = "highlight"; public enum HighlightMethod {
private PluginInfo info = PluginInfo.EMPTY_INFO; UNIFIED("unified"),
private SolrHighlighter highlighter; FAST_VECTOR("fastVector"),
POSTINGS("postings"),
ORIGINAL("original");
private static final Map<String, HighlightMethod> METHODS = Collections.unmodifiableMap(Stream.of(values())
.collect(toMap(HighlightMethod::getMethodName, Function.identity())));
private final String methodName;
HighlightMethod(String method) {
this.methodName = method;
}
public String getMethodName() {
return methodName;
}
public static HighlightMethod parse(String method) {
return METHODS.get(method);
}
}
public static final String COMPONENT_NAME = "highlight";
private PluginInfo info = PluginInfo.EMPTY_INFO;
@Deprecated // DWS: in 7.0 lets restructure the abstractions/relationships
private SolrHighlighter solrConfigHighlighter;
/**
* @deprecated instead depend on {@link #process(ResponseBuilder)} to choose the highlighter based on
* {@link HighlightParams#METHOD}
*/
@Deprecated
public static SolrHighlighter getHighlighter(SolrCore core) { public static SolrHighlighter getHighlighter(SolrCore core) {
HighlightComponent hl = (HighlightComponent) core.getSearchComponents().get(HighlightComponent.COMPONENT_NAME); HighlightComponent hl = (HighlightComponent) core.getSearchComponents().get(HighlightComponent.COMPONENT_NAME);
return hl==null ? null: hl.getHighlighter(); return hl==null ? null: hl.getHighlighter();
}
@Deprecated
public SolrHighlighter getHighlighter() {
return solrConfigHighlighter;
} }
@Override @Override
@ -67,7 +111,7 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
@Override @Override
public void prepare(ResponseBuilder rb) throws IOException { public void prepare(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams(); SolrParams params = rb.req.getParams();
rb.doHighlights = highlighter.isHighlightingEnabled(params); rb.doHighlights = solrConfigHighlighter.isHighlightingEnabled(params);
if(rb.doHighlights){ if(rb.doHighlights){
String hlq = params.get(HighlightParams.Q); String hlq = params.get(HighlightParams.Q);
String hlparser = Objects.firstNonNull(params.get(HighlightParams.QPARSER), String hlparser = Objects.firstNonNull(params.get(HighlightParams.QPARSER),
@ -89,26 +133,28 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
if(children.isEmpty()) { if(children.isEmpty()) {
PluginInfo pluginInfo = core.getSolrConfig().getPluginInfo(SolrHighlighter.class.getName()); //TODO deprecated configuration remove later PluginInfo pluginInfo = core.getSolrConfig().getPluginInfo(SolrHighlighter.class.getName()); //TODO deprecated configuration remove later
if (pluginInfo != null) { if (pluginInfo != null) {
highlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName()); solrConfigHighlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName());
} else { } else {
DefaultSolrHighlighter defHighlighter = new DefaultSolrHighlighter(core); DefaultSolrHighlighter defHighlighter = new DefaultSolrHighlighter(core);
defHighlighter.init(PluginInfo.EMPTY_INFO); defHighlighter.init(PluginInfo.EMPTY_INFO);
highlighter = defHighlighter; solrConfigHighlighter = defHighlighter;
} }
} else { } else {
highlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName()); solrConfigHighlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName());
} }
} }
@Override @Override
public void process(ResponseBuilder rb) throws IOException { public void process(ResponseBuilder rb) throws IOException {
if (rb.doHighlights) { if (rb.doHighlights) {
SolrQueryRequest req = rb.req; SolrQueryRequest req = rb.req;
SolrParams params = req.getParams(); SolrParams params = req.getParams();
String[] defaultHighlightFields; //TODO: get from builder by default? SolrHighlighter highlighter = getHighlighter(params);
String[] defaultHighlightFields; //TODO: get from builder by default?
if (rb.getQparser() != null) { if (rb.getQparser() != null) {
defaultHighlightFields = rb.getQparser().getDefaultHighlightFields(); defaultHighlightFields = rb.getQparser().getDefaultHighlightFields();
} else { } else {
@ -129,14 +175,8 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
rb.setHighlightQuery( highlightQuery ); rb.setHighlightQuery( highlightQuery );
} }
} }
if(highlightQuery != null) {
boolean rewrite = (highlighter instanceof PostingsSolrHighlighter == false) && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
highlightQuery = rewrite ? highlightQuery.rewrite(req.getSearcher().getIndexReader()) : highlightQuery;
}
// No highlighting if there is no query -- consider q.alt="*:* // No highlighting if there is no query -- consider q.alt=*:*
if( highlightQuery != null ) { if( highlightQuery != null ) {
NamedList sumData = highlighter.doHighlighting( NamedList sumData = highlighter.doHighlighting(
rb.getResults().docList, rb.getResults().docList,
@ -151,6 +191,36 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
} }
} }
protected SolrHighlighter getHighlighter(SolrParams params) {
HighlightMethod method = HighlightMethod.parse(params.get(HighlightParams.METHOD));
if (method == null) {
return solrConfigHighlighter;
}
switch (method) {
case UNIFIED:
if (solrConfigHighlighter instanceof UnifiedSolrHighlighter) {
return solrConfigHighlighter;
}
return new UnifiedSolrHighlighter(); // TODO cache one?
case POSTINGS:
if (solrConfigHighlighter instanceof PostingsSolrHighlighter) {
return solrConfigHighlighter;
}
return new PostingsSolrHighlighter(); // TODO cache one?
case FAST_VECTOR: // fall-through
case ORIGINAL:
if (solrConfigHighlighter instanceof DefaultSolrHighlighter) {
return solrConfigHighlighter;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"In order to use " + HighlightParams.METHOD + "=" + method.getMethodName() + " the configured" +
" highlighter in solrconfig must be " + DefaultSolrHighlighter.class);
}
default: throw new AssertionError();
}
}
@Override @Override
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) { public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
if (!rb.doHighlights) return; if (!rb.doHighlights) return;
@ -194,10 +264,6 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
} }
} }
public SolrHighlighter getHighlighter() {
return highlighter;
}
//////////////////////////////////////////// ////////////////////////////////////////////
/// SolrInfoMBean /// SolrInfoMBean
//////////////////////////////////////////// ////////////////////////////////////////////

View File

@ -66,6 +66,7 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.PluginInfo; import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.HighlightComponent;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
@ -373,6 +374,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
if (!isHighlightingEnabled(params)) // also returns early if no unique key field if (!isHighlightingEnabled(params)) // also returns early if no unique key field
return null; return null;
boolean rewrite = query != null && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
if (rewrite) {
query = query.rewrite(req.getSearcher().getIndexReader());
}
SolrIndexSearcher searcher = req.getSearcher(); SolrIndexSearcher searcher = req.getSearcher();
IndexSchema schema = searcher.getSchema(); IndexSchema schema = searcher.getSchema();
@ -463,8 +471,11 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
* Determines if we should use the FastVectorHighlighter for this field. * Determines if we should use the FastVectorHighlighter for this field.
*/ */
protected boolean useFastVectorHighlighter(SolrParams params, SchemaField schemaField) { protected boolean useFastVectorHighlighter(SolrParams params, SchemaField schemaField) {
boolean useFvhParam = params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false); boolean methodFvh =
if (!useFvhParam) return false; HighlightComponent.HighlightMethod.FAST_VECTOR.getMethodName().equals(
params.getFieldParam(schemaField.getName(), HighlightParams.METHOD))
|| params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
if (!methodFvh) return false;
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets(); boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
if (!termPosOff) { if (!termPosOff) {
log.warn("Solr will use the standard Highlighter instead of FastVectorHighlighter because the {} field " + log.warn("Solr will use the standard Highlighter instead of FastVectorHighlighter because the {} field " +

View File

@ -50,8 +50,9 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
* <p> * <p>
* Example configuration: * Example configuration:
* <pre class="prettyprint"> * <pre class="prettyprint">
* &lt;requestHandler name="standard" class="solr.StandardRequestHandler"&gt; * &lt;requestHandler name="/select" class="solr.SearchHandler"&gt;
* &lt;lst name="defaults"&gt; * &lt;lst name="defaults"&gt;
* &lt;str name="hl.method"&gt;postings&lt;/str&gt;
* &lt;int name="hl.snippets"&gt;1&lt;/int&gt; * &lt;int name="hl.snippets"&gt;1&lt;/int&gt;
* &lt;str name="hl.tag.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt; * &lt;str name="hl.tag.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.tag.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt; * &lt;str name="hl.tag.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt;
@ -71,12 +72,6 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
* &lt;/lst&gt; * &lt;/lst&gt;
* &lt;/requestHandler&gt; * &lt;/requestHandler&gt;
* </pre> * </pre>
* ...
* <pre class="prettyprint">
* &lt;searchComponent class="solr.HighlightComponent" name="highlight"&gt;
* &lt;highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/&gt;
* &lt;/searchComponent&gt;
* </pre>
* <p> * <p>
* Notes: * Notes:
* <ul> * <ul>

View File

@ -0,0 +1,365 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.PassageScorer;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RTimerTree;
import org.apache.solr.util.plugin.PluginInfoInitialized;
/**
* Highlighter impl that uses {@link UnifiedHighlighter}
* <p>
* Example configuration with default values:
* <pre class="prettyprint">
* &lt;requestHandler name="/select" class="solr.SearchHandler"&gt;
* &lt;lst name="defaults"&gt;
* &lt;str name="hl.method"&gt;unified&lt;/str&gt;
* &lt;int name="hl.snippets"&gt;1&lt;/int&gt;
* &lt;str name="hl.tag.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.tag.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.simple.pre"&gt;&amp;lt;em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.simple.post"&gt;&amp;lt;/em&amp;gt;&lt;/str&gt;
* &lt;str name="hl.tag.ellipsis"&gt;... &lt;/str&gt;
* &lt;bool name="hl.defaultSummary"&gt;true&lt;/bool&gt;
* &lt;str name="hl.encoder"&gt;simple&lt;/str&gt;
* &lt;float name="hl.score.k1"&gt;1.2&lt;/float&gt;
* &lt;float name="hl.score.b"&gt;0.75&lt;/float&gt;
* &lt;float name="hl.score.pivot"&gt;87&lt;/float&gt;
* &lt;str name="hl.bs.language"&gt;&lt;/str&gt;
* &lt;str name="hl.bs.country"&gt;&lt;/str&gt;
* &lt;str name="hl.bs.variant"&gt;&lt;/str&gt;
* &lt;str name="hl.bs.type"&gt;SENTENCE&lt;/str&gt;
* &lt;int name="hl.maxAnalyzedChars"&gt;10000&lt;/int&gt;
* &lt;bool name="hl.highlightMultiTerm"&gt;true&lt;/bool&gt;
* &lt;bool name="hl.usePhraseHighlighter"&gt;true&lt;/bool&gt;
* &lt;int name="hl.cacheFieldValCharsThreshold"&gt;524288&lt;/int&gt;
* &lt;str name="hl.offsetSource"&gt;&lt;/str&gt;
* &lt;/lst&gt;
* &lt;/requestHandler&gt;
* </pre>
* <p>
* Notes:
* <ul>
* <li>hl.q (string) can specify the query
* <li>hl.fl (string) specifies the field list.
* <li>hl.snippets (int) specifies how many snippets to return.
* <li>hl.tag.pre (string) specifies text which appears before a highlighted term.
* <li>hl.tag.post (string) specifies text which appears after a highlighted term.
* <li>hl.simple.pre (string) specifies text which appears before a highlighted term. (prefer hl.tag.pre)
* <li>hl.simple.post (string) specifies text which appears before a highlighted term. (prefer hl.tag.post)
* <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent passages. The default is to retain each
* value in a list without joining them.
* <li>hl.defaultSummary (bool) specifies if a field should have a default summary of the leading text.
* <li>hl.encoder (string) can be 'html' (html escapes content) or 'simple' (no escaping).
* <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
* <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
* <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
* <li>hl.bs.type (string) specifies how to divide text into passages: [SENTENCE, LINE, WORD, CHAR, WHOLE]
* <li>hl.bs.language (string) specifies language code for BreakIterator. default is empty string (root locale)
* <li>hl.bs.country (string) specifies country code for BreakIterator. default is empty string (root locale)
* <li>hl.bs.variant (string) specifies country code for BreakIterator. default is empty string (root locale)
* <li>hl.maxAnalyzedChars (int) specifies how many characters at most will be processed in a document for any one field.
* <li>hl.highlightMultiTerm (bool) enables highlighting for range/wildcard/fuzzy/prefix queries at some cost. default is true
* <li>hl.usePhraseHighlighter (bool) enables phrase highlighting. default is true
* <li>hl.cacheFieldValCharsThreshold (int) controls how many characters from a field are cached. default is 524288 (1MB in 2 byte chars)
* <li>hl.offsetSource (string) specifies which offset source to use, prefers postings, but will use what's available if not specified
* </ul>
*
* @lucene.experimental
*/
public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {
protected static final String SNIPPET_SEPARATOR = "\u0000";
private static final String[] ZERO_LEN_STR_ARRAY = new String[0];
@Override
public void init(PluginInfo info) {
}
@Override
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
final SolrParams params = req.getParams();
// if highlighting isn't enabled, then why call doHighlighting?
if (!isHighlightingEnabled(params))
return null;
int[] docIDs = toDocIDs(docs);
// fetch the unique keys
String[] keys = getUniqueKeys(req.getSearcher(), docIDs);
// query-time parameters
String[] fieldNames = getHighlightFields(query, req, defaultFields);
int maxPassages[] = new int[fieldNames.length];
for (int i = 0; i < fieldNames.length; i++) {
maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1);
}
UnifiedHighlighter highlighter = getHighlighter(req);
Map<String, String[]> snippets = highlighter.highlightFields(fieldNames, query, docIDs, maxPassages);
return encodeSnippets(keys, fieldNames, snippets);
}
/**
* Creates an instance of the Lucene {@link UnifiedHighlighter}. Provided for subclass extension so that
* a subclass can return a subclass of {@link SolrExtendedUnifiedHighlighter}.
*/
protected UnifiedHighlighter getHighlighter(SolrQueryRequest req) {
return new SolrExtendedUnifiedHighlighter(req);
}
/**
* Encodes the resulting snippets into a namedlist
*
* @param keys the document unique keys
* @param fieldNames field names to highlight in the order
* @param snippets map from field name to snippet array for the docs
* @return encoded namedlist of summaries
*/
protected NamedList<Object> encodeSnippets(String[] keys, String[] fieldNames, Map<String, String[]> snippets) {
NamedList<Object> list = new SimpleOrderedMap<>();
for (int i = 0; i < keys.length; i++) {
NamedList<Object> summary = new SimpleOrderedMap<>();
for (String field : fieldNames) {
String snippet = snippets.get(field)[i];
if (snippet == null) {
//TODO reuse logic of DefaultSolrHighlighter.alternateField
summary.add(field, ZERO_LEN_STR_ARRAY);
} else {
// we used a special snippet separator char and we can now split on it.
summary.add(field, snippet.split(SNIPPET_SEPARATOR));
}
}
list.add(keys[i], summary);
}
return list;
}
/**
* Converts solr's DocList to the int[] docIDs
*/
protected int[] toDocIDs(DocList docs) {
int[] docIDs = new int[docs.size()];
DocIterator iterator = docs.iterator();
for (int i = 0; i < docIDs.length; i++) {
if (!iterator.hasNext()) {
throw new AssertionError();
}
docIDs[i] = iterator.nextDoc();
}
if (iterator.hasNext()) {
throw new AssertionError();
}
return docIDs;
}
/**
* Retrieves the unique keys for the topdocs to key the results
*/
protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException {
IndexSchema schema = searcher.getSchema();
SchemaField keyField = schema.getUniqueKeyField();
if (keyField != null) {
Set<String> selector = Collections.singleton(keyField.getName());
String[] uniqueKeys = new String[docIDs.length];
for (int i = 0; i < docIDs.length; i++) {
int docid = docIDs[i];
Document doc = searcher.doc(docid, selector);
String id = schema.printableUniqueKey(doc);
uniqueKeys[i] = id;
}
return uniqueKeys;
} else {
return new String[docIDs.length];
}
}
/**
* From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}.
*/
protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter {
protected final SolrParams params;
protected final IndexSchema schema;
protected final RTimerTree loadFieldValuesTimer;
public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) {
super(req.getSearcher(), req.getSchema().getIndexAnalyzer());
this.params = req.getParams();
this.schema = req.getSchema();
this.setMaxLength(
params.getInt(HighlightParams.MAX_CHARS, UnifiedHighlighter.DEFAULT_MAX_LENGTH));
this.setCacheFieldValCharsThreshold(
params.getInt(HighlightParams.CACHE_FIELD_VAL_CHARS_THRESHOLD, DEFAULT_CACHE_CHARS_THRESHOLD));
// SolrRequestInfo is a thread-local singleton providing access to the ResponseBuilder to code that
// otherwise can't get it in a nicer way.
SolrQueryRequest request = SolrRequestInfo.getRequestInfo().getReq();
final RTimerTree timerTree;
if (request.getRequestTimer() != null) { //It may be null if not used in a search context.
timerTree = request.getRequestTimer();
} else {
timerTree = new RTimerTree(); // since null checks are annoying
}
loadFieldValuesTimer = timerTree.sub("loadFieldValues"); // we assume a new timer, state of STARTED
loadFieldValuesTimer.pause(); // state of PAUSED now with about zero time. Will fail if state isn't STARTED.
}
@Override
protected OffsetSource getOffsetSource(String field) {
String sourceStr = params.getFieldParam(field, HighlightParams.OFFSET_SOURCE);
if (sourceStr != null) {
return OffsetSource.valueOf(sourceStr.toUpperCase(Locale.ROOT));
} else {
return super.getOffsetSource(field);
}
}
@Override
public int getMaxNoHighlightPassages(String field) {
boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false);
if (defaultSummary) {
return -1;// signifies return first hl.snippets passages worth of the content
} else {
return 0;// will return null
}
}
@Override
protected PassageFormatter getFormatter(String fieldName) {
String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE,
params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "<em>")
);
String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST,
params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>")
);
String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, SNIPPET_SEPARATOR);
String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple");
return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
}
@Override
protected PassageScorer getScorer(String fieldName) {
float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f);
float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f);
float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f);
return new PassageScorer(k1, b, pivot);
}
@Override
protected BreakIterator getBreakIterator(String field) {
String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE);
String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY);
String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
Locale locale = parseLocale(language, country, variant);
String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
return parseBreakIterator(type, locale);
}
/**
* parse a break iterator type for the specified locale
*/
protected BreakIterator parseBreakIterator(String type, Locale locale) {
if (type == null || "SENTENCE".equals(type)) {
return BreakIterator.getSentenceInstance(locale);
} else if ("LINE".equals(type)) {
return BreakIterator.getLineInstance(locale);
} else if ("WORD".equals(type)) {
return BreakIterator.getWordInstance(locale);
} else if ("CHARACTER".equals(type)) {
return BreakIterator.getCharacterInstance(locale);
} else if ("WHOLE".equals(type)) {
return new WholeBreakIterator();
} else {
throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type);
}
}
/**
* parse a locale from a language+country+variant spec
*/
protected Locale parseLocale(String language, String country, String variant) {
if (language == null && country == null && variant == null) {
return Locale.ROOT;
} else if (language == null) {
throw new IllegalArgumentException("language is required if country or variant is specified");
} else if (country == null && variant != null) {
throw new IllegalArgumentException("To specify variant, country is required");
} else if (country != null && variant != null) {
return new Locale(language, country, variant);
} else if (country != null) {
return new Locale(language, country);
} else {
return new Locale(language);
}
}
@Override
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int
cacheCharsThreshold) throws IOException {
// Time loading field values. It can be an expensive part of highlighting.
loadFieldValuesTimer.resume();
try {
return super.loadFieldValues(fields, docIter, cacheCharsThreshold);
} finally {
loadFieldValuesTimer.pause(); // note: doesn't need to be "stopped"; pause is fine.
}
}
@Override
protected boolean shouldHandleMultiTermQuery(String field) {
return params.getFieldBool(field, HighlightParams.HIGHLIGHT_MULTI_TERM, true);
}
@Override
protected boolean shouldHighlightPhrasesStrictly(String field) {
return params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true);
}
}
}

View File

@ -0,0 +1,64 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Test schema file for PostingsHighlighter -->
<schema name="unifiedhighlight" version="1.0">
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- basic text field: no offsets! -->
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- text field with offsets -->
<fieldType name="text_offsets" class="solr.TextField" storeOffsetsWithPositions="true">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text_offsets" indexed="true" stored="true"/>
<field name="text2" type="text" indexed="true" stored="true"/>
<field name="text3" type="text_offsets" indexed="true" stored="true"/>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -70,7 +70,12 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
args.put("hl", "true"); args.put("hl", "true");
args.put("hl.fl", "tv_text"); args.put("hl.fl", "tv_text");
args.put("hl.snippets", "2"); args.put("hl.snippets", "2");
args.put("hl.useFastVectorHighlighter", "true"); args.put("hl.tag.pre", "<fvpre>"); //... and let post default to </em>. This is just a test.
if (random().nextBoolean()) {
args.put("hl.useFastVectorHighlighter", "true"); // old way
} else {
args.put("hl.method", "fastVector"); // the new way
}
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard",0,200,args); "standard",0,200,args);
@ -81,7 +86,7 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
assertQ("Basic summarization", assertQ("Basic summarization",
sumLRF.makeRequest("tv_text:vector"), sumLRF.makeRequest("tv_text:vector"),
"//lst[@name='highlighting']/lst[@name='1']", "//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='tv_text']/str[.='basic fast <em>vector</em> highlighter test']" "//lst[@name='1']/arr[@name='tv_text']/str[.='basic fast <fvpre>vector</em> highlighter test']"
); );
} }
} }

View File

@ -43,10 +43,6 @@ import org.junit.After;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
/**
* Tests some basic functionality of Solr while demonstrating good
* Best Practices for using AbstractSolrTestCase
*/
public class HighlighterTest extends SolrTestCaseJ4 { public class HighlighterTest extends SolrTestCaseJ4 {
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " + private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " +
@ -90,6 +86,25 @@ public class HighlighterTest extends SolrTestCaseJ4 {
assertTrue(regex instanceof RegexFragmenter); assertTrue(regex instanceof RegexFragmenter);
} }
@Test
public void testMethodPostings() {
String field = "t_text";
assertU(adoc(field, LONG_TEXT,
"id", "1"));
assertU(commit());
try {
assertQ("Tried PostingsSolrHighlighter but failed due to offsets not in postings",
req("q", "long", "hl.method", "postings", "df", field, "hl", "true"));
fail("Did not encounter exception for no offsets");
} catch (Exception e) {
assertTrue("Cause should be illegal argument", e.getCause() instanceof IllegalArgumentException);
assertTrue("Should warn no offsets", e.getCause().getMessage().contains("indexed without offsets"));
}
// note: the default schema.xml has no offsets in postings to test the PostingsHighlighter. Leave that for another
// test class.
}
@Test @Test
public void testMergeContiguous() throws Exception { public void testMergeContiguous() throws Exception {
HashMap<String,String> args = new HashMap<>(); HashMap<String,String> args = new HashMap<>();
@ -99,6 +114,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
args.put(HighlightParams.SNIPPETS, String.valueOf(4)); args.put(HighlightParams.SNIPPETS, String.valueOf(4));
args.put(HighlightParams.FRAGSIZE, String.valueOf(40)); args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true"); args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
args.put(HighlightParams.METHOD, "original"); // test works; no complaints
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args); "standard", 0, 200, args);
String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " + String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
@ -763,7 +779,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
); );
// Prove fallback highlighting works also with FVH // Prove fallback highlighting works also with FVH
args.put("hl.useFastVectorHighlighter", "true"); args.put("hl.method", "fastVector");
args.put("hl.tag.pre", "<fvhpre>"); args.put("hl.tag.pre", "<fvhpre>");
args.put("hl.tag.post", "</fvhpost>"); args.put("hl.tag.post", "</fvhpost>");
args.put("f.t_text.hl.maxAlternateFieldLength", "18"); args.put("f.t_text.hl.maxAlternateFieldLength", "18");

View File

@ -52,7 +52,7 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
public void testSimple() { public void testSimple() {
assertQ("simplest test", assertQ("simplest test",
req("q", "text:document", "sort", "id asc", "hl", "true"), req("q", "text:document", "sort", "id asc", "hl", "true", "hl.method", "postings"), // test hl.method is happy too
"count(//lst[@name='highlighting']/*)=2", "count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'", "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'"); "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");

View File

@ -0,0 +1,229 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.junit.BeforeClass;
/** Tests for the UnifiedHighlighter Solr plugin **/
public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml", "schema-unifiedhighlight.xml");
// test our config is sane, just to be sure:
// 'text' and 'text3' should have offsets, 'text2' should not
IndexSchema schema = h.getCore().getLatestSchema();
assertTrue(schema.getField("text").storeOffsetsWithPositions());
assertTrue(schema.getField("text3").storeOffsetsWithPositions());
assertFalse(schema.getField("text2").storeOffsetsWithPositions());
}
@Override
public void setUp() throws Exception {
super.setUp();
clearIndex();
assertU(adoc("text", "document one", "text2", "document one", "text3", "crappy document", "id", "101"));
assertU(adoc("text", "second document", "text2", "second document", "text3", "crappier document", "id", "102"));
assertU(commit());
}
public static SolrQueryRequest req(String... params) {
return SolrTestCaseJ4.req(params, "hl.method", "unified");
}
public void testSimple() {
assertQ("simplest test",
req("q", "text:document", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
}
public void testImpossibleOffsetSource() {
try {
assertQ("impossible offset source",
req("q", "text2:document", "hl.offsetSource", "postings", "hl.fl", "text2", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
fail("Did not encounter exception for no offsets");
} catch (Exception e) {
assertTrue("Cause should be illegal argument", e.getCause() instanceof IllegalArgumentException);
assertTrue("Should warn no offsets", e.getCause().getMessage().contains("indexed without offsets"));
}
}
public void testMultipleSnippetsReturned() {
clearIndex();
assertU(adoc("text", "Document snippet one. Intermediate sentence. Document snippet two.",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("multiple snippets test",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Document</em> snippet one. '",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[2]='<em>Document</em> snippet two.'");
}
public void testStrictPhrasesEnabledByDefault() {
clearIndex();
assertU(adoc("text", "Strict phrases should be enabled for phrases",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("strict phrase handling",
req("q", "text:\"strict phrases\"", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Strict</em> <em>phrases</em> should be enabled for phrases'");
}
public void testStrictPhrasesCanBeDisabled() {
clearIndex();
assertU(adoc("text", "Strict phrases should be disabled for phrases",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("strict phrase handling",
req("q", "text:\"strict phrases\"", "sort", "id asc", "hl", "true", "hl.usePhraseHighlighter", "false"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Strict</em> <em>phrases</em> should be disabled for <em>phrases</em>'");
}
public void testMultiTermQueryEnabledByDefault() {
clearIndex();
assertU(adoc("text", "Aviary Avenue document",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("multi term query handling",
req("q", "text:av*", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Aviary</em> <em>Avenue</em> document'");
}
public void testMultiTermQueryCanBeDisabled() {
clearIndex();
assertU(adoc("text", "Aviary Avenue document",
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("multi term query handling",
req("q", "text:av*", "sort", "id asc", "hl", "true", "hl.highlightMultiTerm", "false"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=0");
}
public void testPagination() {
assertQ("pagination test",
req("q", "text:document", "sort", "id asc", "hl", "true", "rows", "1", "start", "1"),
"count(//lst[@name='highlighting']/*)=1",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
}
public void testEmptySnippet() {
assertQ("null snippet test",
req("q", "text:one OR *:*", "sort", "id asc", "hl", "true"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
"count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=0");
}
public void testDefaultSummary() {
assertQ("null snippet test",
req("q", "text:one OR *:*", "sort", "id asc", "hl", "true", "hl.defaultSummary", "true"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second document'");
}
public void testDifferentField() {
assertQ("highlighting text3",
req("q", "text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text3"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
}
public void testTwoFields() {
assertQ("highlighting text and text3",
req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
}
public void testTags() {
assertQ("different pre/post tags",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.tag.pre", "[", "hl.tag.post", "]"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
}
public void testUsingSimplePrePostTags() {
assertQ("different pre/post tags",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.simple.pre", "[", "hl.simple.post", "]"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
}
public void testUsingSimplePrePostTagsPerField() {
assertQ("different pre/post tags",
req("q", "text:document", "sort", "id asc", "hl", "true", "f.text.hl.simple.pre", "[", "f.text.hl.simple.post", "]"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
}
public void testTagsPerField() {
assertQ("highlighting text and text3",
req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3", "f.text3.hl.tag.pre", "[", "f.text3.hl.tag.post", "]"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy [document]'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'");
}
public void testBreakIterator() {
assertQ("different breakiterator",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='<em>document</em>'");
}
public void testBreakIterator2() {
assertU(adoc("text", "Document one has a first sentence. Document two has a second sentence.", "id", "103"));
assertU(commit());
assertQ("different breakiterator",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE"),
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
}
public void testEncoder() {
assertU(adoc("text", "Document one has a first <i>sentence</i>.", "id", "103"));
assertU(commit());
assertQ("html escaped",
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em>&#32;one&#32;has&#32;a&#32;first&#32;&lt;i&gt;sentence&lt;&#x2F;i&gt;&#46;'");
}
}

View File

@ -21,62 +21,76 @@ package org.apache.solr.common.params;
* @since solr 1.3 * @since solr 1.3
*/ */
public interface HighlightParams { public interface HighlightParams {
// primary
public static final String HIGHLIGHT = "hl"; public static final String HIGHLIGHT = "hl";
public static final String Q = HIGHLIGHT+".q"; public static final String METHOD = HIGHLIGHT+".method"; // original|fastVector|postings|unified
public static final String QPARSER = HIGHLIGHT+".qparser"; @Deprecated // see hl.method
public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter";
public static final String FIELDS = HIGHLIGHT+".fl"; public static final String FIELDS = HIGHLIGHT+".fl";
public static final String SNIPPETS = HIGHLIGHT+".snippets"; public static final String SNIPPETS = HIGHLIGHT+".snippets";
public static final String FRAGSIZE = HIGHLIGHT+".fragsize";
public static final String INCREMENT = HIGHLIGHT+".increment";
public static final String MAX_CHARS = HIGHLIGHT+".maxAnalyzedChars";
public static final String FORMATTER = HIGHLIGHT+".formatter";
public static final String ENCODER = HIGHLIGHT+".encoder";
public static final String FRAGMENTER = HIGHLIGHT+".fragmenter";
public static final String PRESERVE_MULTI = HIGHLIGHT+".preserveMulti";
public static final String FRAG_LIST_BUILDER = HIGHLIGHT+".fragListBuilder";
public static final String FRAGMENTS_BUILDER = HIGHLIGHT+".fragmentsBuilder";
public static final String BOUNDARY_SCANNER = HIGHLIGHT+".boundaryScanner";
public static final String BS_MAX_SCAN = HIGHLIGHT+".bs.maxScan";
public static final String BS_CHARS = HIGHLIGHT+".bs.chars";
public static final String BS_TYPE = HIGHLIGHT+".bs.type";
public static final String BS_LANGUAGE = HIGHLIGHT+".bs.language";
public static final String BS_COUNTRY = HIGHLIGHT+".bs.country";
public static final String BS_VARIANT = HIGHLIGHT+".bs.variant";
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate";
public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine";
public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch";
public static final String USE_PHRASE_HIGHLIGHTER = HIGHLIGHT+".usePhraseHighlighter";
public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm";
public static final String PAYLOADS = HIGHLIGHT+".payloads";
public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + ".mergeContiguous"; // KEY:
// OH = (original) Highlighter (AKA the standard Highlighter)
// FVH = FastVectorHighlighter
// PH = PostingsHighlighter
// UH = UnifiedHighlighter
public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter"; // query interpretation
public static final String TAG_PRE = HIGHLIGHT + ".tag.pre"; public static final String Q = HIGHLIGHT+".q"; // all
public static final String TAG_POST = HIGHLIGHT + ".tag.post"; public static final String QPARSER = HIGHLIGHT+".qparser"; // all
public static final String TAG_ELLIPSIS = HIGHLIGHT + ".tag.ellipsis"; public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch"; // OH, FVH
public static final String PHRASE_LIMIT = HIGHLIGHT + ".phraseLimit"; public static final String USE_PHRASE_HIGHLIGHTER = HIGHLIGHT+".usePhraseHighlighter"; // OH, FVH, UH
public static final String MULTI_VALUED_SEPARATOR = HIGHLIGHT + ".multiValuedSeparatorChar"; public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm"; // all
// Formatter
public static final String SIMPLE = "simple";
public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre";
public static final String SIMPLE_POST = HIGHLIGHT+"."+SIMPLE+".post";
// Regex fragmenter // if no snippets...
public static final String REGEX = "regex"; public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary"; // UH, PH
public static final String SLOP = HIGHLIGHT+"."+REGEX+".slop"; public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField"; // OH, FVH
public static final String PATTERN = HIGHLIGHT+"."+REGEX+".pattern"; public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength"; // OH, FVH
public static final String MAX_RE_CHARS = HIGHLIGHT+"."+REGEX+".maxAnalyzedChars"; public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate"; // OH, FVH
// Scoring parameters // sizing
public static final String SCORE = "score"; public static final String FRAGSIZE = HIGHLIGHT+".fragsize"; // OH, FVH
public static final String SCORE_K1 = HIGHLIGHT +"."+SCORE+".k1"; public static final String FRAGMENTER = HIGHLIGHT+".fragmenter"; // OH
public static final String SCORE_B = HIGHLIGHT +"."+SCORE+".b"; public static final String INCREMENT = HIGHLIGHT+".increment"; // OH
public static final String SCORE_PIVOT = HIGHLIGHT +"."+SCORE+".pivot"; public static final String REGEX = "regex"; // OH
public static final String SLOP = HIGHLIGHT+"."+REGEX+".slop"; // OH
public static final String PATTERN = HIGHLIGHT+"."+REGEX+".pattern"; // OH
public static final String MAX_RE_CHARS= HIGHLIGHT+"."+REGEX+".maxAnalyzedChars"; // OH
public static final String BOUNDARY_SCANNER = HIGHLIGHT+".boundaryScanner"; // FVH
public static final String BS_MAX_SCAN = HIGHLIGHT+".bs.maxScan"; // FVH
public static final String BS_CHARS = HIGHLIGHT+".bs.chars"; // FVH
public static final String BS_TYPE = HIGHLIGHT+".bs.type"; // FVH, UH, PH
public static final String BS_LANGUAGE = HIGHLIGHT+".bs.language"; // FVH, UH, PH
public static final String BS_COUNTRY = HIGHLIGHT+".bs.country"; // FVH, UH, PH
public static final String BS_VARIANT = HIGHLIGHT+".bs.variant"; // FVH, UH, PH
// formatting
public static final String FORMATTER = HIGHLIGHT+".formatter"; // OH
public static final String ENCODER = HIGHLIGHT+".encoder"; // OH, (UH, PH limited)
public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + ".mergeContiguous"; // OH
public static final String SIMPLE = "simple"; // OH
public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre"; // OH
public static final String SIMPLE_POST = HIGHLIGHT+"."+SIMPLE+".post"; // OH
public static final String FRAGMENTS_BUILDER = HIGHLIGHT+".fragmentsBuilder"; // FVH
public static final String TAG_PRE = HIGHLIGHT + ".tag.pre"; // FVH, UH, PH
public static final String TAG_POST = HIGHLIGHT + ".tag.post"; // FVH, UH, PH
public static final String TAG_ELLIPSIS= HIGHLIGHT + ".tag.ellipsis"; // FVH, UH, PH
public static final String MULTI_VALUED_SEPARATOR = HIGHLIGHT + ".multiValuedSeparatorChar"; // FVH, PH
// ordering
public static final String PRESERVE_MULTI = HIGHLIGHT+".preserveMulti"; // OH
public static final String FRAG_LIST_BUILDER = HIGHLIGHT+".fragListBuilder"; // FVH
public static final String SCORE = "score"; // UH, PH
public static final String SCORE_K1 = HIGHLIGHT +"."+SCORE+".k1"; // UH, PH
public static final String SCORE_B = HIGHLIGHT +"."+SCORE+".b"; // UH, PH
public static final String SCORE_PIVOT = HIGHLIGHT +"."+SCORE+".pivot"; // UH, PH
// misc
public static final String MAX_CHARS = HIGHLIGHT+".maxAnalyzedChars"; // all
public static final String PAYLOADS = HIGHLIGHT+".payloads"; // OH
public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine"; // OH
public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch"; // OH
public static final String PHRASE_LIMIT = HIGHLIGHT + ".phraseLimit"; // FVH
public static final String OFFSET_SOURCE = HIGHLIGHT + ".offsetSource"; // UH
public static final String CACHE_FIELD_VAL_CHARS_THRESHOLD = HIGHLIGHT + ".cacheFieldValCharsThreshold"; // UH
} }