SOLR-2749: add boundary scanner feature

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1170616 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2011-09-14 13:38:49 +00:00
parent 31b0791a3e
commit f1efdf5553
15 changed files with 307 additions and 13 deletions

View File

@ -45,6 +45,14 @@ public class ScoreOrderFragmentsBuilder extends BaseFragmentsBuilder {
super( preTags, postTags );
}
public ScoreOrderFragmentsBuilder( BoundaryScanner bs ){
super( bs );
}
public ScoreOrderFragmentsBuilder( String[] preTags, String[] postTags, BoundaryScanner bs ){
super( preTags, postTags, bs );
}
/**
* Sort by score the list of WeightedFragInfo
*/

View File

@ -44,6 +44,14 @@ public class SimpleFragmentsBuilder extends BaseFragmentsBuilder {
super( preTags, postTags );
}
public SimpleFragmentsBuilder( BoundaryScanner bs ) {
super( bs );
}
public SimpleFragmentsBuilder( String[] preTags, String[] postTags, BoundaryScanner bs ) {
super( preTags, postTags, bs );
}
/**
* do nothing. return the source list.
*/

View File

@ -327,6 +327,12 @@ Documentation
================== 3.5.0 ==================
New Features
----------------------
* SOLR-2749: Add boundary scanners for FastVectorHighlighter. <boundaryScanner/>
can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name
parameter to specify the named <boundaryScanner/>. (koji)
Bug Fixes
----------------------
* SOLR-2748: The CommitTracker used for commitWith or autoCommit by maxTime

View File

@ -0,0 +1,91 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import java.text.BreakIterator;
import java.util.Locale;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
public class BreakIteratorBoundaryScanner extends SolrBoundaryScanner {
@Override
protected BoundaryScanner get(String fieldName, SolrParams params) {
// construct Locale
String language = params.getFieldParam(fieldName, HighlightParams.BS_LANGUAGE);
String country = params.getFieldParam(fieldName, HighlightParams.BS_COUNTRY);
if(country != null && language == null){
throw new SolrException(ErrorCode.BAD_REQUEST,
HighlightParams.BS_LANGUAGE + " parameter cannot be null when you specify " + HighlightParams.BS_COUNTRY);
}
Locale locale = null;
if(language != null){
locale = country == null ? new Locale(language) : new Locale(language, country);
}
// construct BreakIterator
String type = params.getFieldParam(fieldName, HighlightParams.BS_TYPE, "WORD").toLowerCase();
BreakIterator bi = null;
if(type.equals("character")){
bi = locale == null ? BreakIterator.getCharacterInstance() : BreakIterator.getCharacterInstance(locale);
}
else if(type.equals("word")){
bi = locale == null ? BreakIterator.getWordInstance() : BreakIterator.getWordInstance(locale);
}
else if(type.equals("line")){
bi = locale == null ? BreakIterator.getLineInstance() : BreakIterator.getLineInstance(locale);
}
else if(type.equals("sentence")){
bi = locale == null ? BreakIterator.getSentenceInstance() : BreakIterator.getSentenceInstance(locale);
}
else
throw new SolrException(ErrorCode.BAD_REQUEST, type + " is invalid for parameter " + HighlightParams.BS_TYPE);
return new org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner(bi);
}
///////////////////////////////////////////////////////////////////////
//////////////////////// SolrInfoMBeans methods ///////////////////////
///////////////////////////////////////////////////////////////////////
@Override
public String getDescription() {
return "BreakIteratorBoundaryScanner";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getVersion() {
return "$Revision$";
}
}

View File

@ -36,6 +36,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
@ -83,6 +84,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
fragmenters.clear();
fragListBuilders.clear();
fragmentsBuilders.clear();
boundaryScanners.clear();
// Load the fragmenters
SolrFragmenter frag = solrCore.initPlugins(info.getChildren("fragmenter") , fragmenters,SolrFragmenter.class,null);
@ -116,8 +118,14 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
fragmentsBuilders.put( "", fragsBuilder );
fragmentsBuilders.put( null, fragsBuilder );
initialized = true;
// Load the BoundaryScanners
SolrBoundaryScanner boundaryScanner = solrCore.initPlugins(info.getChildren("boundaryScanner"),
boundaryScanners, SolrBoundaryScanner.class, null);
if(boundaryScanner == null) boundaryScanner = new SimpleBoundaryScanner();
boundaryScanners.put("", boundaryScanner);
boundaryScanners.put(null, boundaryScanner);
initialized = true;
}
//just for back-compat with the deprecated method
private boolean initialized = false;
@ -144,6 +152,10 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
SolrFragmentsBuilder fragsBuilder = new ScoreOrderFragmentsBuilder();
fragmentsBuilders.put( "", fragsBuilder );
fragmentsBuilders.put( null, fragsBuilder );
SolrBoundaryScanner boundaryScanner = new SimpleBoundaryScanner();
boundaryScanners.put("", boundaryScanner);
boundaryScanners.put(null, boundaryScanner);
}
/**
@ -311,7 +323,8 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
}
protected FragmentsBuilder getFragmentsBuilder( String fieldName, SolrParams params ){
return getSolrFragmentsBuilder( fieldName, params ).getFragmentsBuilder( params );
BoundaryScanner bs = getBoundaryScanner(fieldName, params);
return getSolrFragmentsBuilder( fieldName, params ).getFragmentsBuilder( params, bs );
}
private SolrFragmentsBuilder getSolrFragmentsBuilder( String fieldName, SolrParams params ){
@ -323,6 +336,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return solrFb;
}
private BoundaryScanner getBoundaryScanner(String fieldName, SolrParams params){
String bs = params.getFieldParam(fieldName, HighlightParams.BOUNDARY_SCANNER);
SolrBoundaryScanner solrBs = boundaryScanners.get(bs);
if(solrBs == null){
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown boundaryScanner: " + bs);
}
return solrBs.getBoundaryScanner(fieldName, params);
}
/**
* Generates a list of Highlighted query fragments for each item in a list
* of documents, or returns null if highlighting is disabled.

View File

@ -17,6 +17,7 @@
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.solr.common.params.SolrParams;
@ -24,9 +25,9 @@ public class ScoreOrderFragmentsBuilder extends SolrFragmentsBuilder {
@Override
protected FragmentsBuilder getFragmentsBuilder( SolrParams params,
String[] preTags, String[] postTags ) {
String[] preTags, String[] postTags, BoundaryScanner bs ) {
org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder sofb =
new org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder( preTags, postTags );
new org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder( preTags, postTags, bs );
sofb.setMultiValuedSeparator( getMultiValuedSeparatorChar( params ) );
return sofb;
}

View File

@ -0,0 +1,62 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
public class SimpleBoundaryScanner extends SolrBoundaryScanner {
@Override
protected BoundaryScanner get(String fieldName, SolrParams params) {
int maxScan = params.getFieldInt(fieldName, HighlightParams.BS_MAX_SCAN, 10);
String str = params.getFieldParam(fieldName, HighlightParams.BS_CHARS, ".,!? \t\n");
Character[] chars = new Character[str.length()];
for(int i = 0; i < str.length(); i++){
chars[i] = str.charAt(i);
}
return new org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner(maxScan, chars);
}
///////////////////////////////////////////////////////////////////////
//////////////////////// SolrInfoMBeans methods ///////////////////////
///////////////////////////////////////////////////////////////////////
@Override
public String getDescription() {
return "SimpleBoundaryScanner";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getVersion() {
return "$Revision$";
}
}

View File

@ -17,6 +17,7 @@
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.solr.common.params.SolrParams;
@ -24,9 +25,9 @@ public class SimpleFragmentsBuilder extends SolrFragmentsBuilder {
@Override
protected FragmentsBuilder getFragmentsBuilder( SolrParams params,
String[] preTags, String[] postTags ) {
String[] preTags, String[] postTags, BoundaryScanner bs ) {
org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder sfb =
new org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder( preTags, postTags );
new org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder( preTags, postTags, bs );
sfb.setMultiValuedSeparator( getMultiValuedSeparatorChar( params ) );
return sfb;
}

View File

@ -0,0 +1,38 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.solr.common.params.DefaultSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
public abstract class SolrBoundaryScanner extends HighlightingPluginBase implements
SolrInfoMBean, NamedListInitializedPlugin {
public BoundaryScanner getBoundaryScanner(String fieldName, SolrParams params){
numRequests++;
if( defaults != null ) {
params = new DefaultSolrParams( params, defaults );
}
return get(fieldName, params);
}
protected abstract BoundaryScanner get(String fieldName, SolrParams params);
}

View File

@ -17,6 +17,7 @@
package org.apache.solr.highlight;
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.DefaultSolrParams;
@ -37,12 +38,12 @@ public abstract class SolrFragmentsBuilder extends HighlightingPluginBase
* @param params The params controlling Highlighting
* @return An appropriate {@link org.apache.lucene.search.vectorhighlight.FragmentsBuilder}.
*/
public FragmentsBuilder getFragmentsBuilder(SolrParams params) {
public FragmentsBuilder getFragmentsBuilder(SolrParams params, BoundaryScanner bs) {
numRequests++;
if( defaults != null ) {
params = new DefaultSolrParams( params, defaults );
}
return getFragmentsBuilder( params, getPreTags( params, null ), getPostTags( params, null ) );
return getFragmentsBuilder( params, getPreTags( params, null ), getPostTags( params, null ), bs );
}
public String[] getPreTags( SolrParams params, String fieldName ){
@ -69,7 +70,8 @@ public abstract class SolrFragmentsBuilder extends HighlightingPluginBase
return tags;
}
protected abstract FragmentsBuilder getFragmentsBuilder( SolrParams params, String[] preTags, String[] postTags );
protected abstract FragmentsBuilder getFragmentsBuilder( SolrParams params,
String[] preTags, String[] postTags, BoundaryScanner bs );
protected char getMultiValuedSeparatorChar( SolrParams params ){
String separator = params.get( HighlightParams.MULTI_VALUED_SEPARATOR, " " );

View File

@ -58,6 +58,10 @@ public abstract class SolrHighlighter
protected final Map<String, SolrFragmentsBuilder> fragmentsBuilders =
new HashMap<String, SolrFragmentsBuilder>() ;
// Thread safe registry
protected final Map<String, SolrBoundaryScanner> boundaryScanners =
new HashMap<String, SolrBoundaryScanner>() ;
@Deprecated
public abstract void initalize( SolrConfig config );

View File

@ -451,8 +451,22 @@
<!-- Configure the standard fragmentsBuilder -->
<fragmentsBuilder name="simple" class="org.apache.solr.highlight.SimpleFragmentsBuilder" default="true"/>
<fragmentsBuilder name="scoreOrder" class="org.apache.solr.highlight.ScoreOrderFragmentsBuilder"/>
<fragmentsBuilder name="scoreOrder" class="org.apache.solr.highlight.ScoreOrderFragmentsBuilder" default="true"/>
<boundaryScanner name="simple" class="solr.highlight.SimpleBoundaryScanner" default="true">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
</lst>
</boundaryScanner>
<boundaryScanner name="breakIterator" class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.type">WORD</str>
<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>

View File

@ -36,7 +36,7 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
public void testConfig(){
SolrHighlighter highlighter = HighlightComponent.getHighlighter(h.getCore());
// Make sure we loaded the one fragListBuilder
// Make sure we loaded one fragListBuilder
SolrFragListBuilder solrFlbNull = highlighter.fragListBuilders.get( null );
SolrFragListBuilder solrFlbEmpty = highlighter.fragListBuilders.get( "" );
SolrFragListBuilder solrFlbSimple = highlighter.fragListBuilders.get( "simple" );
@ -44,15 +44,25 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
assertTrue( solrFlbNull instanceof SimpleFragListBuilder );
assertTrue( solrFlbSimple instanceof SimpleFragListBuilder );
// Make sure we loaded the one fragmentsBuilder
// Make sure we loaded two fragmentsBuilders
SolrFragmentsBuilder solrFbNull = highlighter.fragmentsBuilders.get( null );
SolrFragmentsBuilder solrFbEmpty = highlighter.fragmentsBuilders.get( "" );
SolrFragmentsBuilder solrFbSimple = highlighter.fragmentsBuilders.get( "simple" );
SolrFragmentsBuilder solrFbSO = highlighter.fragmentsBuilders.get( "scoreOrder" );
assertSame( solrFbNull, solrFbEmpty );
assertTrue( solrFbNull instanceof ScoreOrderFragmentsBuilder );
assertTrue( solrFbNull instanceof SimpleFragmentsBuilder );
assertTrue( solrFbSimple instanceof SimpleFragmentsBuilder );
assertTrue( solrFbSO instanceof ScoreOrderFragmentsBuilder );
// Make sure we loaded two boundaryScanners
SolrBoundaryScanner solrBsNull = highlighter.boundaryScanners.get(null);
SolrBoundaryScanner solrBsEmpty = highlighter.boundaryScanners.get("");
SolrBoundaryScanner solrBsSimple = highlighter.boundaryScanners.get("simple");
SolrBoundaryScanner solrBsBI = highlighter.boundaryScanners.get("breakIterator");
assertSame(solrBsNull, solrBsEmpty);
assertTrue(solrBsNull instanceof SimpleBoundaryScanner);
assertTrue(solrBsSimple instanceof SimpleBoundaryScanner);
assertTrue(solrBsBI instanceof BreakIteratorBoundaryScanner);
}
@Test

View File

@ -1472,6 +1472,27 @@
<str name="hl.tag.post"><![CDATA[</b>]]></str>
</lst>
</fragmentsBuilder>
<boundaryScanner name="default"
default="true"
class="solr.highlight.SimpleBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
</lst>
</boundaryScanner>
<boundaryScanner name="breakIterator"
class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">
<!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
<str name="hl.bs.type">WORD</str>
<!-- language and country are used when constructing Locale object. -->
<!-- And the Locale object will be used when getting instance of BreakIterator -->
<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>

View File

@ -33,6 +33,12 @@ public interface HighlightParams {
public static final String FRAGMENTER = HIGHLIGHT+".fragmenter";
public static final String FRAG_LIST_BUILDER = HIGHLIGHT+".fragListBuilder";
public static final String FRAGMENTS_BUILDER = HIGHLIGHT+".fragmentsBuilder";
public static final String BOUNDARY_SCANNER = HIGHLIGHT+".boundaryScanner";
public static final String BS_MAX_SCAN = HIGHLIGHT+".bs.maxScan";
public static final String BS_CHARS = HIGHLIGHT+".bs.chars";
public static final String BS_TYPE = HIGHLIGHT+".bs.type";
public static final String BS_LANGUAGE = HIGHLIGHT+".bs.language";
public static final String BS_COUNTRY = HIGHLIGHT+".bs.country";
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";