SOLR-2053: hook in support for specifying comparators

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@986707 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2010-08-18 14:38:53 +00:00
parent 0a61e87e25
commit e8344ddee1
7 changed files with 130 additions and 1 deletions

View File

@ -176,6 +176,10 @@ public class SpellChecker implements java.io.Closeable {
this.comparator = comparator;
}
public Comparator<SuggestWord> getComparator() {
return comparator;
}
/**
* Sets the {@link StringDistance} implementation for this
* {@link SpellChecker} instance.

View File

@ -224,6 +224,8 @@ New Features
* SOLR-2030: Make FastVectorHighlighter use of SolrEncoder. (koji)
* SOLR-2053: Add support for custom comparators in Solr spellchecker, per LUCENE-2479 (gsingers)
Optimizations
----------------------

View File

@ -710,6 +710,19 @@
<str name="spellcheckIndexDir">./spellchecker2</str>
</lst>
-->
<!-- Use an alternate comparator -->
<!--<lst name="spellchecker">
<str name="name">freq</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFreq</str>
&lt;!&ndash; comparatorClass be one of:
1. score (default)
2. freq (Frequency first, then score)
3. A fully qualified class name
&ndash;&gt;
<str name="comparatorClass">freq</str>
<str name="buildOnCommit">true</str>
-->
<!-- a file based spell checker
<lst name="spellchecker">

View File

@ -22,7 +22,12 @@ import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.search.spell.SuggestWordQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -60,6 +65,11 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
public static final String ACCURACY = "accuracy";
public static final String STRING_DISTANCE = "distanceMeasure";
public static final String FIELD_TYPE = "fieldType";
public static final String COMPARATOR_CLASS = "comparatorClass";
public static final String SCORE_COMP = "score";
public static final String FREQ_COMP = "freq";
protected String field;
protected String fieldTypeName;
protected org.apache.lucene.search.spell.SpellChecker spellChecker;
@ -89,6 +99,19 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
}
}
sourceLocation = (String) config.get(LOCATION);
String compClass = (String) config.get(COMPARATOR_CLASS);
Comparator<SuggestWord> comp = null;
if (compClass != null){
if (compClass.equalsIgnoreCase(SCORE_COMP)){
comp = SuggestWordQueue.DEFAULT_COMPARATOR;
} else if (compClass.equalsIgnoreCase(FREQ_COMP)){
comp = new SuggestWordFrequencyComparator();
} else{//must be a FQCN
comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass);
}
} else {
comp = SuggestWordQueue.DEFAULT_COMPARATOR;
}
field = (String) config.get(FIELD);
String strDistanceName = (String)config.get(STRING_DISTANCE);
if (strDistanceName != null) {
@ -99,7 +122,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
}
try {
initIndex();
spellChecker = new SpellChecker(index, sd);
spellChecker = new SpellChecker(index, sd, comp);
} catch (IOException e) {
throw new RuntimeException(e);
}
@ -230,4 +253,8 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker {
public StringDistance getStringDistance() {
return sd;
}
public SpellChecker getSpellChecker() {
return spellChecker;
}
}

View File

@ -27,10 +27,14 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.util.RefCounted;
import org.apache.solr.search.SolrIndexSearcher;
import org.junit.AfterClass;
@ -39,6 +43,7 @@ import org.junit.Test;
import java.io.File;
import java.util.Collection;
import java.util.Comparator;
import java.util.Date;
import java.util.Map;
@ -75,6 +80,27 @@ public class IndexBasedSpellCheckerTest extends SolrTestCaseJ4 {
queryConverter = null;
}
@Test
public void testComparator() throws Exception {
SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck");
assertNotNull(component);
AbstractLuceneSpellChecker spellChecker;
Comparator<SuggestWord> comp;
spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq");
assertNotNull(spellChecker);
comp = spellChecker.getSpellChecker().getComparator();
assertNotNull(comp);
assertTrue(comp instanceof SuggestWordFrequencyComparator);
spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn");
assertNotNull(spellChecker);
comp = spellChecker.getSpellChecker().getComparator();
assertNotNull(comp);
assertTrue(comp instanceof SampleComparator);
}
@Test
public void testSpelling() throws Exception {
IndexBasedSpellChecker checker = new IndexBasedSpellChecker();

View File

@ -0,0 +1,36 @@
package org.apache.solr.spelling;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.spell.SuggestWord;
import java.util.Comparator;
/**
* Comparator for testing purposes
*
**/
public class SampleComparator implements Comparator<SuggestWord> {
@Override
public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) {
return suggestWord.string.compareTo(suggestWord1.string);
}
}

View File

@ -357,6 +357,27 @@
<str name="characterEncoding">UTF-8</str>
<str name="spellcheckIndexDir">spellchecker3</str>
</lst>
<!-- Comparator -->
<lst name="spellchecker">
<str name="name">freq</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFreq</str>
<!-- comparatorClass be one of:
1. score (default)
2. freq (Frequency first, then score)
3. A fully qualified class name
-->
<str name="comparatorClass">freq</str>
<str name="buildOnCommit">true</str>
</lst>
<lst name="spellchecker">
<str name="name">fqcn</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerFQCN</str>
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
<str name="buildOnCommit">true</str>
</lst>
</searchComponent>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>