mirror of https://github.com/apache/lucene.git
SOLR-2053: hook in support for specifying comparators
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@986707 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0a61e87e25
commit
e8344ddee1
|
@ -176,6 +176,10 @@ public class SpellChecker implements java.io.Closeable {
|
|||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
public Comparator<SuggestWord> getComparator() {
|
||||
return comparator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the {@link StringDistance} implementation for this
|
||||
* {@link SpellChecker} instance.
|
||||
|
|
|
@ -224,6 +224,8 @@ New Features
|
|||
|
||||
* SOLR-2030: Make FastVectorHighlighter use of SolrEncoder. (koji)
|
||||
|
||||
* SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -710,6 +710,19 @@
|
|||
<str name="spellcheckIndexDir">./spellchecker2</str>
|
||||
</lst>
|
||||
-->
|
||||
<!-- Use an alternate comparator -->
|
||||
<!--<lst name="spellchecker">
|
||||
<str name="name">freq</str>
|
||||
<str name="field">lowerfilt</str>
|
||||
<str name="spellcheckIndexDir">spellcheckerFreq</str>
|
||||
<!– comparatorClass be one of:
|
||||
1. score (default)
|
||||
2. freq (Frequency first, then score)
|
||||
3. A fully qualified class name
|
||||
–>
|
||||
<str name="comparatorClass">freq</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
-->
|
||||
|
||||
<!-- a file based spell checker
|
||||
<lst name="spellchecker">
|
||||
|
|
|
@ -22,7 +22,12 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -60,6 +65,11 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
|||
public static final String ACCURACY = "accuracy";
|
||||
public static final String STRING_DISTANCE = "distanceMeasure";
|
||||
public static final String FIELD_TYPE = "fieldType";
|
||||
public static final String COMPARATOR_CLASS = "comparatorClass";
|
||||
|
||||
public static final String SCORE_COMP = "score";
|
||||
public static final String FREQ_COMP = "freq";
|
||||
|
||||
protected String field;
|
||||
protected String fieldTypeName;
|
||||
protected org.apache.lucene.search.spell.SpellChecker spellChecker;
|
||||
|
@ -89,6 +99,19 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
|||
}
|
||||
}
|
||||
sourceLocation = (String) config.get(LOCATION);
|
||||
String compClass = (String) config.get(COMPARATOR_CLASS);
|
||||
Comparator<SuggestWord> comp = null;
|
||||
if (compClass != null){
|
||||
if (compClass.equalsIgnoreCase(SCORE_COMP)){
|
||||
comp = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||
} else if (compClass.equalsIgnoreCase(FREQ_COMP)){
|
||||
comp = new SuggestWordFrequencyComparator();
|
||||
} else{//must be a FQCN
|
||||
comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass);
|
||||
}
|
||||
} else {
|
||||
comp = SuggestWordQueue.DEFAULT_COMPARATOR;
|
||||
}
|
||||
field = (String) config.get(FIELD);
|
||||
String strDistanceName = (String)config.get(STRING_DISTANCE);
|
||||
if (strDistanceName != null) {
|
||||
|
@ -99,7 +122,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
|||
}
|
||||
try {
|
||||
initIndex();
|
||||
spellChecker = new SpellChecker(index, sd);
|
||||
spellChecker = new SpellChecker(index, sd, comp);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
@ -230,4 +253,8 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
|
|||
public StringDistance getStringDistance() {
|
||||
return sd;
|
||||
}
|
||||
|
||||
public SpellChecker getSpellChecker() {
|
||||
return spellChecker;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,10 +27,14 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.search.spell.JaroWinklerDistance;
|
||||
import org.apache.lucene.search.spell.SpellChecker;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.SpellCheckComponent;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -39,6 +43,7 @@ import org.junit.Test;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -75,6 +80,27 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
|
|||
queryConverter = null;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComparator() throws Exception {
|
||||
SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck");
|
||||
assertNotNull(component);
|
||||
AbstractLuceneSpellChecker spellChecker;
|
||||
Comparator<SuggestWord> comp;
|
||||
spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq");
|
||||
assertNotNull(spellChecker);
|
||||
comp = spellChecker.getSpellChecker().getComparator();
|
||||
assertNotNull(comp);
|
||||
assertTrue(comp instanceof SuggestWordFrequencyComparator);
|
||||
|
||||
spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn");
|
||||
assertNotNull(spellChecker);
|
||||
comp = spellChecker.getSpellChecker().getComparator();
|
||||
assertNotNull(comp);
|
||||
assertTrue(comp instanceof SampleComparator);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpelling() throws Exception {
|
||||
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
package org.apache.solr.spelling;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
|
||||
/**
|
||||
* Comparator for testing purposes
|
||||
*
|
||||
**/
|
||||
public class SampleComparator implements Comparator<SuggestWord> {
|
||||
|
||||
|
||||
@Override
|
||||
public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) {
|
||||
return suggestWord.string.compareTo(suggestWord1.string);
|
||||
}
|
||||
|
||||
}
|
|
@ -357,6 +357,27 @@
|
|||
<str name="characterEncoding">UTF-8</str>
|
||||
<str name="spellcheckIndexDir">spellchecker3</str>
|
||||
</lst>
|
||||
<!-- Comparator -->
|
||||
<lst name="spellchecker">
|
||||
<str name="name">freq</str>
|
||||
<str name="field">lowerfilt</str>
|
||||
<str name="spellcheckIndexDir">spellcheckerFreq</str>
|
||||
<!-- comparatorClass be one of:
|
||||
1. score (default)
|
||||
2. freq (Frequency first, then score)
|
||||
3. A fully qualified class name
|
||||
-->
|
||||
<str name="comparatorClass">freq</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
<lst name="spellchecker">
|
||||
<str name="name">fqcn</str>
|
||||
<str name="field">lowerfilt</str>
|
||||
<str name="spellcheckIndexDir">spellcheckerFQCN</str>
|
||||
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
|
||||
<str name="buildOnCommit">true</str>
|
||||
</lst>
|
||||
|
||||
</searchComponent>
|
||||
|
||||
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
|
||||
|
|
Loading…
Reference in New Issue