SOLR-1268: Incorporate FastVectorHighlighter

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@897383 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2010-01-09 04:57:20 +00:00
parent bfbcc89705
commit f6d7daf239
18 changed files with 726 additions and 142 deletions

View File

@ -81,6 +81,8 @@ New Features
* SOLR-1697: PluginInfo should load plugins w/o class attribute also (noble) * SOLR-1697: PluginInfo should load plugins w/o class attribute also (noble)
* SOLR-1268: Incorporate FastVectorHighlighter (koji)
Optimizations Optimizations
---------------------- ----------------------
@ -188,9 +190,11 @@ Other Changes
* SOLR-1674: Improve analysis tests and cut over to new TokenStream API. * SOLR-1674: Improve analysis tests and cut over to new TokenStream API.
(Robert Muir via Mark Miller) (Robert Muir via Mark Miller)
* SOLR-1661 : Remove adminCore from CoreContainer . removed deprecated methods setAdminCore(), getAdminCore() (noble) * SOLR-1661: Remove adminCore from CoreContainer . removed deprecated methods setAdminCore(), getAdminCore() (noble)
* SOLR-1704 : Google collections moved from clustering to core (noble) * SOLR-1704: Google collections moved from clustering to core (noble)
* SOLR-1268: Add Lucene 2.9-dev r888785 FastVectorHighlighter contrib jar to lib. (koji)
Build Build
---------------------- ----------------------

View File

@ -957,6 +957,12 @@
<str name="hl.simple.post"><![CDATA[</em>]]></str> <str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst> </lst>
</formatter> </formatter>
<!-- Configure the standard fragListBuilder -->
<fragListBuilder name="simple" class="org.apache.solr.highlight.SimpleFragListBuilder" default="true"/>
<!-- multi-colored tag FragmentsBuilder -->
<fragmentsBuilder name="colored" class="org.apache.solr.highlight.MultiColoredScoreOrderFragmentsBuilder" default="true"/>
</highlighting> </highlighting>
<!-- An example dedup update processor that creates the "id" field on the fly <!-- An example dedup update processor that creates the "id" field on the fly

View File

@ -0,0 +1,2 @@
AnyObjectId[c564e56e13b408b63965e8edf8bd95764b877966] was removed in git history.
Apache SVN contains full history.

View File

@ -30,6 +30,8 @@ public interface HighlightParams {
public static final String MAX_CHARS = HIGHLIGHT+".maxAnalyzedChars"; public static final String MAX_CHARS = HIGHLIGHT+".maxAnalyzedChars";
public static final String FORMATTER = HIGHLIGHT+".formatter"; public static final String FORMATTER = HIGHLIGHT+".formatter";
public static final String FRAGMENTER = HIGHLIGHT+".fragmenter"; public static final String FRAGMENTER = HIGHLIGHT+".fragmenter";
public static final String FRAG_LIST_BUILDER = HIGHLIGHT+".fragListBuilder";
public static final String FRAGMENTS_BUILDER = HIGHLIGHT+".fragmentsBuilder";
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch"; public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField"; public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength"; public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
@ -38,6 +40,9 @@ public interface HighlightParams {
public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm"; public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm";
public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + ".mergeContiguous"; public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + ".mergeContiguous";
public static final String USE_HIGHLIGHTER = HIGHLIGHT + ".useHighlighter";
// Formatter // Formatter
public static final String SIMPLE = "simple"; public static final String SIMPLE = "simple";
public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre"; public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre";

View File

@ -555,7 +555,7 @@ public final class SolrCore implements SolrInfoMBean {
reqHandlers = new RequestHandlers(this); reqHandlers = new RequestHandlers(this);
reqHandlers.initHandlersFromConfig( solrConfig ); reqHandlers.initHandlersFromConfig( solrConfig );
highlighter = initHighLighter(); highlighter = initHighlighter();
// Handle things that should eventually go away // Handle things that should eventually go away
initDeprecatedSupport(); initDeprecatedSupport();
@ -605,7 +605,7 @@ public final class SolrCore implements SolrInfoMBean {
resourceLoader.inform(infoRegistry); resourceLoader.inform(infoRegistry);
} }
private SolrHighlighter initHighLighter() { private SolrHighlighter initHighlighter() {
SolrHighlighter highlighter = null; SolrHighlighter highlighter = null;
PluginInfo pluginInfo = solrConfig.getPluginInfo(SolrHighlighter.class.getName()); PluginInfo pluginInfo = solrConfig.getPluginInfo(SolrHighlighter.class.getName());
if(pluginInfo != null){ if(pluginInfo != null){

View File

@ -35,8 +35,11 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
@ -45,12 +48,15 @@ import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.PluginInfo; import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList; import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.plugin.PluginInfoInitialized; import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* *
@ -59,6 +65,8 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
{ {
public static Logger log = LoggerFactory.getLogger(DefaultSolrHighlighter.class);
private SolrCore solrCore; private SolrCore solrCore;
public DefaultSolrHighlighter() { public DefaultSolrHighlighter() {
@ -71,16 +79,35 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
public void init(PluginInfo info) { public void init(PluginInfo info) {
formatters.clear(); formatters.clear();
fragmenters.clear(); fragmenters.clear();
fragListBuilders.clear();
fragmentsBuilders.clear();
// Load the fragmenters
SolrFragmenter frag = solrCore.initPlugins(info.getChildren("fragmenter") , fragmenters,SolrFragmenter.class,null); SolrFragmenter frag = solrCore.initPlugins(info.getChildren("fragmenter") , fragmenters,SolrFragmenter.class,null);
if (frag == null) frag = new GapFragmenter(); if (frag == null) frag = new GapFragmenter();
fragmenters.put("", frag); fragmenters.put("", frag);
fragmenters.put(null, frag); fragmenters.put(null, frag);
// Load the formatters // Load the formatters
SolrFormatter fmt = solrCore.initPlugins(info.getChildren("formatter"), formatters,SolrFormatter.class,null); SolrFormatter fmt = solrCore.initPlugins(info.getChildren("formatter"), formatters,SolrFormatter.class,null);
if (fmt == null) fmt = new HtmlFormatter(); if (fmt == null) fmt = new HtmlFormatter();
formatters.put("", fmt); formatters.put("", fmt);
formatters.put(null, fmt); formatters.put(null, fmt);
// Load the FragListBuilders
SolrFragListBuilder fragListBuilder = solrCore.initPlugins(info.getChildren("fragListBuilder"),
fragListBuilders, SolrFragListBuilder.class, null );
if( fragListBuilder == null ) fragListBuilder = new SimpleFragListBuilder();
fragListBuilders.put( "", fragListBuilder );
fragListBuilders.put( null, fragListBuilder );
// Load the FragmentsBuilders
SolrFragmentsBuilder fragsBuilder = solrCore.initPlugins(info.getChildren("fragmentsBuilder"),
fragmentsBuilders, SolrFragmentsBuilder.class, null);
if( fragsBuilder == null ) fragsBuilder = new ScoreOrderFragmentsBuilder();
fragmentsBuilders.put( "", fragsBuilder );
fragmentsBuilders.put( null, fragsBuilder );
initialized = true; initialized = true;
} }
@ -97,7 +124,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
formatters.put("", fmt); formatters.put("", fmt);
formatters.put(null, fmt); formatters.put(null, fmt);
SolrFragListBuilder fragListBuilder = new SimpleFragListBuilder();
fragListBuilders.put( "", fragListBuilder );
fragListBuilders.put( null, fragListBuilder );
SolrFragmentsBuilder fragsBuilder = new ScoreOrderFragmentsBuilder();
fragmentsBuilders.put( "", fragsBuilder );
fragmentsBuilders.put( null, fragsBuilder );
} }
/** /**
@ -233,6 +266,24 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return frag.getFragmenter( fieldName, params ); return frag.getFragmenter( fieldName, params );
} }
protected FragListBuilder getFragListBuilder( SolrParams params ){
String flb = params.get( HighlightParams.FRAG_LIST_BUILDER );
SolrFragListBuilder solrFlb = fragListBuilders.get( flb );
if( solrFlb == null ){
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragListBuilder: " + flb );
}
return solrFlb.getFragListBuilder( params );
}
protected FragmentsBuilder getFragmentsBuilder( SolrParams params ){
String fb = params.get( HighlightParams.FRAGMENTS_BUILDER );
SolrFragmentsBuilder solrFb = fragmentsBuilders.get( fb );
if( solrFb == null ){
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmentsBuilder: " + fb );
}
return solrFb.getFragmentsBuilder( params );
}
/** /**
* Generates a list of Highlighted query fragments for each item in a list * Generates a list of Highlighted query fragments for each item in a list
* of documents, or returns null if highlighting is disabled. * of documents, or returns null if highlighting is disabled.
@ -251,151 +302,193 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
if (!isHighlightingEnabled(params)) if (!isHighlightingEnabled(params))
return null; return null;
SolrIndexSearcher searcher = req.getSearcher(); SolrIndexSearcher searcher = req.getSearcher();
IndexSchema schema = searcher.getSchema(); IndexSchema schema = searcher.getSchema();
NamedList fragments = new SimpleOrderedMap(); NamedList fragments = new SimpleOrderedMap();
String[] fieldNames = getHighlightFields(query, req, defaultFields); String[] fieldNames = getHighlightFields(query, req, defaultFields);
Set<String> fset = new HashSet<String>(); Set<String> fset = new HashSet<String>();
{ {
// pre-fetch documents using the Searcher's doc cache // pre-fetch documents using the Searcher's doc cache
for(String f : fieldNames) { fset.add(f); } for(String f : fieldNames) { fset.add(f); }
// fetch unique key if one exists. // fetch unique key if one exists.
SchemaField keyField = schema.getUniqueKeyField(); SchemaField keyField = schema.getUniqueKeyField();
if(null != keyField) if(null != keyField)
fset.add(keyField.getName()); fset.add(keyField.getName());
} }
// get FastVectorHighlighter instance out of the processing loop
FastVectorHighlighter fvh = new FastVectorHighlighter(
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
params.getBool( HighlightParams.USE_PHRASE_HIGHLIGHTER, true ),
// FVH cannot process hl.requireFieldMatch parameter per-field basis
params.getBool( HighlightParams.FIELD_MATCH, false ),
getFragListBuilder( params ),
getFragmentsBuilder( params ) );
FieldQuery fieldQuery = fvh.getFieldQuery( query );
// Highlight each document // Highlight each document
DocIterator iterator = docs.iterator(); DocIterator iterator = docs.iterator();
for (int i = 0; i < docs.size(); i++) { for (int i = 0; i < docs.size(); i++) {
int docId = iterator.nextDoc(); int docId = iterator.nextDoc();
Document doc = searcher.doc(docId, fset); Document doc = searcher.doc(docId, fset);
NamedList docSummaries = new SimpleOrderedMap(); NamedList docSummaries = new SimpleOrderedMap();
for (String fieldName : fieldNames) { for (String fieldName : fieldNames) {
fieldName = fieldName.trim(); fieldName = fieldName.trim();
String[] docTexts = doc.getValues(fieldName); if( useFastVectorHighlighter( params, schema, fieldName ) )
if (docTexts == null) continue; doHighlightingByFastVectorHighlighter( fvh, fieldQuery, req, docSummaries, docId, doc, fieldName );
else
TokenStream tstream = null; doHighlightingByHighlighter( query, req, docSummaries, docId, doc, fieldName );
int numFragments = getMaxSnippets(fieldName, params); }
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); String printId = schema.printableUniqueKey(doc);
fragments.add(printId == null ? null : printId, docSummaries);
}
return fragments;
}
private boolean useFastVectorHighlighter( SolrParams params, IndexSchema schema, String fieldName ){
SchemaField schemaField = schema.getField( fieldName );
return schemaField.storeTermPositions() &&
schemaField.storeTermOffsets() &&
!params.getFieldBool( fieldName, HighlightParams.USE_HIGHLIGHTER, false );
}
private void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
int docId, Document doc, String fieldName ) throws IOException {
SolrParams params = req.getParams();
String[] docTexts = doc.getValues(fieldName);
if (docTexts == null) return;
SolrIndexSearcher searcher = req.getSearcher();
IndexSchema schema = searcher.getSchema();
TokenStream tstream = null;
int numFragments = getMaxSnippets(fieldName, params);
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
String[] summaries = null; String[] summaries = null;
List<TextFragment> frags = new ArrayList<TextFragment>(); List<TextFragment> frags = new ArrayList<TextFragment>();
TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
try { try {
TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName); TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
if (tvStream != null) { if (tvStream != null) {
tots = new TermOffsetsTokenStream(tvStream); tots = new TermOffsetsTokenStream(tvStream);
}
}
catch (IllegalArgumentException e) {
// No problem. But we can't use TermOffsets optimization.
}
for (int j = 0; j < docTexts.length; j++) {
if( tots != null ) {
// if we're using TermOffsets optimization, then get the next
// field value's TokenStream (i.e. get field j's TokenStream) from tots:
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
} else {
// fall back to analyzer
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
}
Highlighter highlighter;
if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
// TODO: this is not always necessary - eventually we would like to avoid this wrap
// when it is not needed.
tstream = new CachingTokenFilter(tstream);
// get highlighter
highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);
// after highlighter initialization, reset tstream since construction of highlighter already used it
tstream.reset();
}
else {
// use "the old way"
highlighter = getHighlighter(query, fieldName, req);
}
int maxCharsToAnalyze = params.getFieldInt(fieldName,
HighlightParams.MAX_CHARS,
Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
if (maxCharsToAnalyze < 0) {
highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
} else {
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
}
try {
TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
for (int k = 0; k < bestTextFragments.length; k++) {
if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
frags.add(bestTextFragments[k]);
}
}
} catch (InvalidTokenOffsetsException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
// sort such that the fragments with the highest score come first
Collections.sort(frags, new Comparator<TextFragment>() {
public int compare(TextFragment arg0, TextFragment arg1) {
return Math.round(arg1.getScore() - arg0.getScore());
}
});
// convert fragments back into text
// TODO: we can include score and position information in output as snippet attributes
if (frags.size() > 0) {
ArrayList<String> fragTexts = new ArrayList<String>();
for (TextFragment fragment: frags) {
if ((fragment != null) && (fragment.getScore() > 0)) {
fragTexts.add(fragment.toString());
}
if (fragTexts.size() >= numFragments) break;
}
summaries = fragTexts.toArray(new String[0]);
if (summaries.length > 0)
docSummaries.add(fieldName, summaries);
}
// no summeries made, copy text from alternate field
if (summaries == null || summaries.length == 0) {
String alternateField = req.getParams().getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
if (alternateField != null && alternateField.length() > 0) {
String[] altTexts = doc.getValues(alternateField);
if (altTexts != null && altTexts.length > 0){
int alternateFieldLen = req.getParams().getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH,0);
if( alternateFieldLen <= 0 ){
docSummaries.add(fieldName, altTexts);
}
else{
List<String> altList = new ArrayList<String>();
int len = 0;
for( String altText: altTexts ){
altList.add( len + altText.length() > alternateFieldLen ?
new String(altText.substring( 0, alternateFieldLen - len )) : altText );
len += altText.length();
if( len >= alternateFieldLen ) break;
}
docSummaries.add(fieldName, altList);
}
}
}
}
} }
String printId = schema.printableUniqueKey(doc); }
fragments.add(printId == null ? null : printId, docSummaries); catch (IllegalArgumentException e) {
} // No problem. But we can't use TermOffsets optimization.
return fragments; }
for (int j = 0; j < docTexts.length; j++) {
if( tots != null ) {
// if we're using TermOffsets optimization, then get the next
// field value's TokenStream (i.e. get field j's TokenStream) from tots:
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
} else {
// fall back to analyzer
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
}
Highlighter highlighter;
if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
// TODO: this is not always necessary - eventually we would like to avoid this wrap
// when it is not needed.
tstream = new CachingTokenFilter(tstream);
// get highlighter
highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);
// after highlighter initialization, reset tstream since construction of highlighter already used it
tstream.reset();
}
else {
// use "the old way"
highlighter = getHighlighter(query, fieldName, req);
}
int maxCharsToAnalyze = params.getFieldInt(fieldName,
HighlightParams.MAX_CHARS,
Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
if (maxCharsToAnalyze < 0) {
highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
} else {
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
}
try {
TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
for (int k = 0; k < bestTextFragments.length; k++) {
if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
frags.add(bestTextFragments[k]);
}
}
} catch (InvalidTokenOffsetsException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
// sort such that the fragments with the highest score come first
Collections.sort(frags, new Comparator<TextFragment>() {
public int compare(TextFragment arg0, TextFragment arg1) {
return Math.round(arg1.getScore() - arg0.getScore());
}
});
// convert fragments back into text
// TODO: we can include score and position information in output as snippet attributes
if (frags.size() > 0) {
ArrayList<String> fragTexts = new ArrayList<String>();
for (TextFragment fragment: frags) {
if ((fragment != null) && (fragment.getScore() > 0)) {
fragTexts.add(fragment.toString());
}
if (fragTexts.size() >= numFragments) break;
}
summaries = fragTexts.toArray(new String[0]);
if (summaries.length > 0)
docSummaries.add(fieldName, summaries);
}
// no summeries made, copy text from alternate field
if (summaries == null || summaries.length == 0) {
alternateField( docSummaries, params, doc, fieldName );
}
} }
private void doHighlightingByFastVectorHighlighter( FastVectorHighlighter highlighter, FieldQuery fieldQuery,
SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName ) throws IOException {
SolrParams params = req.getParams();
String[] snippets = highlighter.getBestFragments( fieldQuery, req.getSearcher().getReader(), docId, fieldName,
params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ),
params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ) );
if( snippets != null && snippets.length > 0 )
docSummaries.add( fieldName, snippets );
else
alternateField( docSummaries, params, doc, fieldName );
}
private void alternateField( NamedList docSummaries, SolrParams params, Document doc, String fieldName ){
String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
if (alternateField != null && alternateField.length() > 0) {
String[] altTexts = doc.getValues(alternateField);
if (altTexts != null && altTexts.length > 0){
int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH,0);
if( alternateFieldLen <= 0 ){
docSummaries.add(fieldName, altTexts);
}
else{
List<String> altList = new ArrayList<String>();
int len = 0;
for( String altText: altTexts ){
altList.add( len + altText.length() > alternateFieldLen ?
new String(altText.substring( 0, alternateFieldLen - len )) : altText );
len += altText.length();
if( len >= alternateFieldLen ) break;
}
docSummaries.add(fieldName, altList);
}
}
}
}
private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException { private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
TokenStream tstream; TokenStream tstream;

View File

@ -16,11 +16,9 @@
*/ */
package org.apache.solr.highlight; package org.apache.solr.highlight;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.NullFragmenter; import org.apache.lucene.search.highlight.NullFragmenter;
import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleFragmenter;

View File

@ -0,0 +1,62 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
import org.apache.solr.common.params.DefaultSolrParams;
import org.apache.solr.common.params.SolrParams;
public class MultiColoredScoreOrderFragmentsBuilder extends
HighlightingPluginBase implements SolrFragmentsBuilder {
public FragmentsBuilder getFragmentsBuilder(SolrParams params) {
numRequests++;
if( defaults != null ) {
params = new DefaultSolrParams( params, defaults );
}
return new ScoreOrderFragmentsBuilder(
BaseFragmentsBuilder.COLORED_PRE_TAGS, BaseFragmentsBuilder.COLORED_POST_TAGS );
}
///////////////////////////////////////////////////////////////////////
//////////////////////// SolrInfoMBeans methods ///////////////////////
///////////////////////////////////////////////////////////////////////
@Override
public String getDescription() {
return "MultiColoredScoreOrderFragmentsBuilder";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getVersion() {
return "$Revision$";
}
}

View File

@ -0,0 +1,62 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder;
import org.apache.solr.common.params.DefaultSolrParams;
import org.apache.solr.common.params.SolrParams;
public class MultiColoredSimpleFragmentsBuilder extends HighlightingPluginBase
implements SolrFragmentsBuilder {
public FragmentsBuilder getFragmentsBuilder(SolrParams params) {
numRequests++;
if( defaults != null ) {
params = new DefaultSolrParams( params, defaults );
}
return new SimpleFragmentsBuilder(
BaseFragmentsBuilder.COLORED_PRE_TAGS, BaseFragmentsBuilder.COLORED_POST_TAGS );
}
///////////////////////////////////////////////////////////////////////
//////////////////////// SolrInfoMBeans methods ///////////////////////
///////////////////////////////////////////////////////////////////////
@Override
public String getDescription() {
return "MultiColoredSimpleFragmentsBuilder";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getVersion() {
return "$Revision$";
}
}

View File

@ -0,0 +1,59 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.solr.common.params.DefaultSolrParams;
import org.apache.solr.common.params.SolrParams;
public class ScoreOrderFragmentsBuilder extends HighlightingPluginBase
implements SolrFragmentsBuilder {
public FragmentsBuilder getFragmentsBuilder(SolrParams params) {
numRequests++;
if( defaults != null ) {
params = new DefaultSolrParams( params, defaults );
}
return new org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder();
}
///////////////////////////////////////////////////////////////////////
//////////////////////// SolrInfoMBeans methods ///////////////////////
///////////////////////////////////////////////////////////////////////
@Override
public String getDescription() {
return "ScoreOrderFragmentsBuilder";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getVersion() {
return "$Revision$";
}
}

View File

@ -0,0 +1,59 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
import org.apache.solr.common.params.DefaultSolrParams;
import org.apache.solr.common.params.SolrParams;
public class SimpleFragListBuilder extends HighlightingPluginBase implements
SolrFragListBuilder {
public FragListBuilder getFragListBuilder(SolrParams params) {
numRequests++;
if( defaults != null ) {
params = new DefaultSolrParams( params, defaults );
}
return new org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder();
}
///////////////////////////////////////////////////////////////////////
//////////////////////// SolrInfoMBeans methods ///////////////////////
///////////////////////////////////////////////////////////////////////
@Override
public String getDescription() {
return "SimpleFragListBuilder";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getVersion() {
return "$Revision$";
}
}

View File

@ -0,0 +1,55 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.solr.common.params.DefaultSolrParams;
import org.apache.solr.common.params.SolrParams;
public class SimpleFragmentsBuilder extends HighlightingPluginBase implements
SolrFragmentsBuilder {
public FragmentsBuilder getFragmentsBuilder(SolrParams params) {
numRequests++;
if( defaults != null ) {
params = new DefaultSolrParams( params, defaults );
}
return new org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder();
}
@Override
public String getDescription() {
return "SimpleFragmentsBuilder";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getVersion() {
return "$Revision$";
}
}

View File

@ -0,0 +1,42 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
public interface SolrFragListBuilder extends SolrInfoMBean, NamedListInitializedPlugin {
/** <code>init</code> will be called just once, immediately after creation.
* <p>The args are user-level initialization parameters that
* may be specified when declaring a request handler in
* solrconfig.xml
*/
public void init( NamedList args);
/**
* Return a FragListBuilder.
*
* @param params The params controlling Highlighting
* @return An appropriate FragListBuilder.
*/
public FragListBuilder getFragListBuilder( SolrParams params );
}

View File

@ -0,0 +1,43 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
public interface SolrFragmentsBuilder extends SolrInfoMBean, NamedListInitializedPlugin {
/** <code>init</code> will be called just once, immediately after creation.
* <p>The args are user-level initialization parameters that
* may be specified when declaring a request handler in
* solrconfig.xml
*/
public void init( NamedList args);
/**
* Return a FragmentsBuilder appropriate for this field.
*
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate FragmentsBuilder.
*/
public FragmentsBuilder getFragmentsBuilder( SolrParams params );
}

View File

@ -46,6 +46,15 @@ public abstract class SolrHighlighter
// Thread safe registry // Thread safe registry
protected final Map<String,SolrFragmenter> fragmenters = protected final Map<String,SolrFragmenter> fragmenters =
Collections.synchronizedMap( new HashMap<String, SolrFragmenter>() ); Collections.synchronizedMap( new HashMap<String, SolrFragmenter>() );
// Thread safe registry
protected final Map<String, SolrFragListBuilder> fragListBuilders =
Collections.synchronizedMap( new HashMap<String, SolrFragListBuilder>() );
// Thread safe registry
protected final Map<String, SolrFragmentsBuilder> fragmentsBuilders =
Collections.synchronizedMap( new HashMap<String, SolrFragmentsBuilder>() );
@Deprecated @Deprecated
public abstract void initalize( SolrConfig config ); public abstract void initalize( SolrConfig config );

View File

@ -0,0 +1,70 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import java.util.HashMap;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.util.TestHarness;
public class FastVectorHighlighterTest extends AbstractSolrTestCase {
@Override public String getSchemaFile() { return "schema.xml"; }
@Override public String getSolrConfigFile() { return "solrconfig.xml"; }
public void testConfig(){
SolrHighlighter highlighter = h.getCore().getHighlighter();
// Make sure we loaded the one fragListBuilder
SolrFragListBuilder solrFlbNull = highlighter.fragListBuilders.get( null );
SolrFragListBuilder solrFlbEmpty = highlighter.fragListBuilders.get( "" );
SolrFragListBuilder solrFlbSimple = highlighter.fragListBuilders.get( "simple" );
assertSame( solrFlbNull, solrFlbEmpty );
assertTrue( solrFlbNull instanceof SimpleFragListBuilder );
assertTrue( solrFlbSimple instanceof SimpleFragListBuilder );
// Make sure we loaded the one fragmentsBuilder
SolrFragmentsBuilder solrFbNull = highlighter.fragmentsBuilders.get( null );
SolrFragmentsBuilder solrFbEmpty = highlighter.fragmentsBuilders.get( "" );
SolrFragmentsBuilder solrFbColored = highlighter.fragmentsBuilders.get( "colored" );
SolrFragmentsBuilder solrFbSO = highlighter.fragmentsBuilders.get( "scoreOrder" );
assertSame( solrFbNull, solrFbEmpty );
assertTrue( solrFbNull instanceof ScoreOrderFragmentsBuilder );
assertTrue( solrFbColored instanceof MultiColoredScoreOrderFragmentsBuilder );
assertTrue( solrFbSO instanceof ScoreOrderFragmentsBuilder );
}
public void test() {
HashMap<String,String> args = new HashMap<String,String>();
args.put("hl", "true");
args.put("hl.fl", "tv_text");
args.put("hl.snippets", "2");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard",0,200,args);
assertU(adoc("tv_text", "basic fast vector highlighter test",
"id", "1"));
assertU(commit());
assertU(optimize());
assertQ("Basic summarization",
sumLRF.makeRequest("tv_text:vector"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='tv_text']/str[.=' fast <b>vector</b> highlighter test']"
);
}
}

View File

@ -134,6 +134,7 @@ public class HighlighterTest extends AbstractSolrTestCase {
args.put("hl", "true"); args.put("hl", "true");
args.put("hl.fl", "tv_text"); args.put("hl.fl", "tv_text");
args.put("hl.snippets", "2"); args.put("hl.snippets", "2");
args.put("hl.useHighlighter", "true");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard",0,200,args); "standard",0,200,args);
@ -174,6 +175,7 @@ public class HighlighterTest extends AbstractSolrTestCase {
args.put("hl", "true"); args.put("hl", "true");
args.put("hl.fl", "tv_mv_text"); args.put("hl.fl", "tv_mv_text");
args.put("hl.snippets", "2"); args.put("hl.snippets", "2");
args.put("hl.useHighlighter", "true");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard",0,200,args); "standard",0,200,args);
@ -200,6 +202,7 @@ public class HighlighterTest extends AbstractSolrTestCase {
args.put("hl", "true"); args.put("hl", "true");
args.put("hl.fl", "tv_mv_text"); args.put("hl.fl", "tv_mv_text");
args.put("hl.snippets", "2"); args.put("hl.snippets", "2");
args.put("hl.useHighlighter", "true");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard",0,200,args); "standard",0,200,args);
@ -225,6 +228,7 @@ public class HighlighterTest extends AbstractSolrTestCase {
args.put("hl.fl", "tv_text"); args.put("hl.fl", "tv_text");
args.put("qf", "tv_text"); args.put("qf", "tv_text");
args.put("q.alt", "*:*"); args.put("q.alt", "*:*");
args.put("hl.useHighlighter", "true");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"dismax",0,200,args); "dismax",0,200,args);
@ -336,6 +340,7 @@ public class HighlighterTest extends AbstractSolrTestCase {
HashMap<String,String> args = new HashMap<String,String>(); HashMap<String,String> args = new HashMap<String,String>();
args.put("hl", "true"); args.put("hl", "true");
args.put("hl.fl", "t_text tv_text"); args.put("hl.fl", "t_text tv_text");
args.put("hl.useHighlighter", "true");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args); "standard", 0, 200, args);
@ -436,6 +441,7 @@ public class HighlighterTest extends AbstractSolrTestCase {
HashMap<String,String> args = new HashMap<String,String>(); HashMap<String,String> args = new HashMap<String,String>();
args.put("hl", "true"); args.put("hl", "true");
args.put("hl.fl", "tv_text"); args.put("hl.fl", "tv_text");
args.put("hl.useHighlighter", "true");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args); "standard", 0, 200, args);
@ -534,6 +540,7 @@ public class HighlighterTest extends AbstractSolrTestCase {
HashMap<String,String> args = new HashMap<String,String>(); HashMap<String,String> args = new HashMap<String,String>();
args.put("hl", "true"); args.put("hl", "true");
args.put("hl.fl", "tv_text"); args.put("hl.fl", "tv_text");
args.put("hl.useHighlighter", "true");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args); "standard", 0, 200, args);
assertQ("Basic summarization", assertQ("Basic summarization",

View File

@ -418,6 +418,14 @@
<str name="hl.simple.post"><![CDATA[</em>]]></str> <str name="hl.simple.post"><![CDATA[</em>]]></str>
</lst> </lst>
</formatter> </formatter>
<!-- Configure the standard fragListBuilder -->
<fragListBuilder name="simple" class="org.apache.solr.highlight.SimpleFragListBuilder" default="true"/>
<!-- Configure the standard fragmentsBuilder -->
<fragmentsBuilder name="colored" class="org.apache.solr.highlight.MultiColoredScoreOrderFragmentsBuilder" default="true"/>
<fragmentsBuilder name="scoreOrder" class="org.apache.solr.highlight.ScoreOrderFragmentsBuilder" default="true"/>
</highlighting> </highlighting>