SOLR-2993: fix test failures (SOLR-2993-fixes.patch)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1346489 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
James Dyer 2012-06-05 17:44:02 +00:00
parent 24ac5fdd13
commit ce20e7b133
7 changed files with 79 additions and 28 deletions

View File

@ -19,6 +19,7 @@ package org.apache.solr.spelling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
@ -135,8 +136,12 @@ public class ConjunctionSolrSpellChecker extends SolrSpellChecker {
//TODO: This just interleaves the results. In the future, we might want to let users give each checker its
// own weight and use that in combination to score & frequency to sort the results ?
private SpellingResult mergeCheckers(SpellingResult[] results, int numSug) {
Map<Token, Integer> combinedTokenFrequency = new HashMap<Token, Integer>();
Map<Token, List<LinkedHashMap<String, Integer>>> allSuggestions = new LinkedHashMap<Token, List<LinkedHashMap<String, Integer>>>();
for(SpellingResult result : results) {
if(result.getTokenFrequency()!=null) {
combinedTokenFrequency.putAll(result.getTokenFrequency());
}
for(Map.Entry<Token, LinkedHashMap<String, Integer>> entry : result.getSuggestions().entrySet()) {
List<LinkedHashMap<String, Integer>> allForThisToken = allSuggestions.get(entry.getKey());
if(allForThisToken==null) {
@ -161,6 +166,10 @@ public class ConjunctionSolrSpellChecker extends SolrSpellChecker {
anyData = true;
Map.Entry<String,Integer> corr = iter.next();
combinedResult.add(original, corr.getKey(), corr.getValue());
Integer tokenFrequency = combinedTokenFrequency.get(original);
if(tokenFrequency!=null) {
combinedResult.addFrequency(original, tokenFrequency);
}
if(++numberAdded==numSug) {
break;
}

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
@ -90,16 +91,22 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
*/
public static final String PARAM_MIN_SUGGESTION_FREQUENCY = "minSuggestionFreq";
/**
* <p>
* Specify a value on the "breakSugestionTieBreaker" parameter.
* The default is MAX_FREQ.
* </p>
*/
public enum BreakSuggestionTieBreaker {
/**
* See
* {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY}
* {@link WordBreakSpellChecker.BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY}
* #
*/
MAX_FREQ,
/**
* See
* {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_SUMMED_FREQUENCY}
* {@link WordBreakSpellChecker.BreakSuggestionSortMethod#NUM_CHANGES_THEN_SUMMED_FREQUENCY}
*/
SUM_FREQ
};
@ -108,6 +115,7 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
private boolean combineWords = false;
private boolean breakWords = false;
private BreakSuggestionSortMethod sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
private static final Pattern spacePattern = Pattern.compile("\\s+");
@Override
public String init(@SuppressWarnings("unchecked") NamedList config,
@ -127,6 +135,8 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
throw new IllegalArgumentException("Invalid value for parameter "
+ PARAM_BREAK_SUGGESTION_TIE_BREAKER + " : " + bstb);
}
} else {
sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
}
int mc = intParam(config, PARAM_MAX_CHANGES);
if (mc > 0) {
@ -272,21 +282,27 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
while (lastBreak != null || lastCombine != null) {
if (lastBreak == null) {
result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
lastCombine = null;
} else if (lastCombine == null) {
result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
lastBreak = null;
} else if (lastBreak.freq < lastCombine.freq) {
result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
lastCombine = null;
} else if (lastCombine.freq < lastBreak.freq) {
result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
lastBreak = null;
} else if (breakCount >= combineCount) {
result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
lastCombine = null;
} else {
result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
lastBreak = null;
}
if (result.getSuggestions().size() > numSuggestions) {
@ -304,6 +320,21 @@ public class WordBreakSolrSpellChecker extends SolrSpellChecker {
return result;
}
private int getCombineFrequency(IndexReader ir, Token token) throws IOException {
String[] words = spacePattern.split(token.toString());
int result = 0;
if(sortMethod==BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY) {
for(String word : words) {
result = Math.max(result, ir.docFreq(new Term(field, word)));
}
} else {
for(String word : words) {
result += ir.docFreq(new Term(field, word));
}
}
return result;
}
@Override
public void build(SolrCore core, SolrIndexSearcher searcher) {
/* no-op */

View File

@ -75,6 +75,7 @@ Config for testing spellcheck component
<str name="field">lowerfilt</str>
<str name="combineWords">true</str>
<str name="breakWords">true</str>
<str name="breakSugestionTieBreaker">MAX_FREQ</str>
<int name="maxChanges">10</int>
</lst>
<lst name="spellchecker">

View File

@ -149,6 +149,6 @@ public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTes
query("q", "lowerfilt:(\"quote red fox\")", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ALTERNATIVE_TERM_COUNT, "5", SpellCheckComponent.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "10");
query("q", "lowerfilt:(\"rod fix\")", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ALTERNATIVE_TERM_COUNT, "5", SpellCheckComponent.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "10");
//query("q", "lowerfilt:(+quock +redfox +jum +ped)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", reqHandlerWithWordbreak, "shards.qt", reqHandlerWithWordbreak, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
query("q", "lowerfilt:(+quock +redfox +jum +ped)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", reqHandlerWithWordbreak, "shards.qt", reqHandlerWithWordbreak, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
}
}

View File

@ -139,41 +139,50 @@ public class WordBreakSolrSpellCheckerTest extends SolrTestCaseJ4 {
"//lst[@name='paintable']/int[@name='numFound']=8",
"//lst[@name='paintable']/int[@name='startOffset']=11",
"//lst[@name='paintable']/int[@name='endOffset']=20",
"//lst[@name='paintable']/arr[@name='suggestion']/str[1]='printable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/str[2]='paint able'", //1 op ; max doc freq=5
"//lst[@name='paintable']/arr[@name='suggestion']/str[3]='pintable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/str[4]='pain table'", //1 op ; max doc freq=4
"//lst[@name='paintable']/arr[@name='suggestion']/str[5]='pointable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/str[6]='pa in table'",//2 ops
"//lst[@name='paintable']/arr[@name='suggestion']/str[7]='plantable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/str[8]='puntable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/int[@name='origFreq']=0",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/str[@name='word']='printable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/int[@name='freq']=3",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/str[@name='word']='paint able'", //1 op
"//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/int[@name='freq']=5",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/str[@name='word']='pintable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/int[@name='freq']=1",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/str[@name='word']='pain table'", //1 op
"//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/int[@name='freq']=2",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/str[@name='word']='pointable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/int[@name='freq']=1",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/str[@name='word']='pa in table'", //2 ops
"//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/int[@name='freq']=7",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/str[@name='word']='plantable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/int[@name='freq']=1",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/str[@name='word']='puntable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/int[@name='freq']=1",
"//lst[@name='pine']/int[@name='numFound']=2",
"//lst[@name='pine']/int[@name='startOffset']=21",
"//lst[@name='pine']/int[@name='endOffset']=25",
"//lst[@name='pine']/arr[@name='suggestion']/str[1]='line'",
"//lst[@name='pine']/arr[@name='suggestion']/str[2]='pi ne'",
"//lst[@name='pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='line'",
"//lst[@name='pine']/arr[@name='suggestion']/lst[2]/str[@name='word']='pi ne'",
"//lst[@name='apple']/int[@name='numFound']=1",
"//lst[@name='apple']/arr[@name='suggestion']/str[1]='ample'",
"//lst[@name='apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='ample'",
"//lst[@name='good']/int[@name='numFound']=1",
"//lst[@name='good']/arr[@name='suggestion']/str[1]='food'",
"//lst[@name='good']/arr[@name='suggestion']/lst[1]/str[@name='word']='food'",
"//lst[@name='ness']/int[@name='numFound']=1",
"//lst[@name='ness']/arr[@name='suggestion']/str[1]='mess'",
"//lst[@name='ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='mess'",
"//lst[@name='pine apple']/int[@name='numFound']=1",
"//lst[@name='pine apple']/int[@name='startOffset']=21",
"//lst[@name='pine apple']/int[@name='endOffset']=31",
"//lst[@name='pine apple']/arr[@name='suggestion']/str[1]='pineapple'",
"//lst[@name='pine apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapple'",
"//lst[@name='paintable pine']/int[@name='numFound']=1",
"//lst[@name='paintable pine']/int[@name='startOffset']=11",
"//lst[@name='paintable pine']/int[@name='endOffset']=25",
"//lst[@name='paintable pine']/arr[@name='suggestion']/str[1]='paintablepine'",
"//lst[@name='paintable pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='paintablepine'",
"//lst[@name='good ness']/int[@name='numFound']=1",
"//lst[@name='good ness']/int[@name='startOffset']=32",
"//lst[@name='good ness']/int[@name='endOffset']=41",
"//lst[@name='good ness']/arr[@name='suggestion']/str[1]='goodness'",
"//lst[@name='good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='goodness'",
"//lst[@name='pine apple good ness']/int[@name='numFound']=1",
"//lst[@name='pine apple good ness']/int[@name='startOffset']=21",
"//lst[@name='pine apple good ness']/int[@name='endOffset']=41",
"//lst[@name='pine apple good ness']/arr[@name='suggestion']/str[1]='pineapplegoodness'"
"//lst[@name='pine apple good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapplegoodness'"
);
}
@Test

View File

@ -1245,7 +1245,7 @@
collations (re-written queries) can include a combination of
corrections from both spellcheckers -->
<str name="spellcheck.dictionary">default</str>
<!--str name="spellcheck.dictionary">wordbreak</str-->
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>

View File

@ -27,7 +27,6 @@ import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.util.ExternalPaths;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import java.util.List;
@ -48,10 +47,11 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
static String field = "name";
@Ignore
@Test
public void testSpellCheckResponse() throws Exception {
getSolrServer();
server.deleteByQuery("*:*");
server.commit(true, true);
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "111");
doc.setField(field, "Samsung");
@ -62,7 +62,6 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
query.set(CommonParams.QT, "/spell");
query.set("spellcheck", true);
query.set(SpellingParams.SPELLCHECK_Q, "samsang");
query.set(SpellingParams.SPELLCHECK_BUILD, true);
QueryRequest request = new QueryRequest(query);
SpellCheckResponse response = request.process(server).getSpellCheckResponse();
Assert.assertEquals("samsung", response.getFirstSuggestion("samsang"));
@ -71,17 +70,18 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
@Test
public void testSpellCheckResponse_Extended() throws Exception {
getSolrServer();
server.deleteByQuery("*:*");
server.commit(true, true);
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "111");
doc.setField(field, "Samsung");
server.add(doc);
server.commit(true, true);
SolrQuery query = new SolrQuery("name:samsang");
SolrQuery query = new SolrQuery("*:*");
query.set(CommonParams.QT, "/spell");
query.set("spellcheck", true);
//query.set(SpellingParams.SPELLCHECK_Q, "samsang");
query.set(SpellingParams.SPELLCHECK_BUILD, true);
query.set(SpellingParams.SPELLCHECK_Q, "samsang");
query.set(SpellingParams.SPELLCHECK_EXTENDED_RESULTS, true);
QueryRequest request = new QueryRequest(query);
SpellCheckResponse response = request.process(server).getSpellCheckResponse();
@ -109,6 +109,8 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
@Test
public void testSpellCheckCollationResponse() throws Exception {
getSolrServer();
server.deleteByQuery("*:*");
server.commit(true, true);
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "0");
doc.setField("name", "faith hope and love");
@ -135,7 +137,6 @@ public class TestSpellCheckResponse extends SolrJettyTestBase {
SolrQuery query = new SolrQuery("name:(+fauth +home +loane)");
query.set(CommonParams.QT, "/spell");
query.set("spellcheck", true);
query.set(SpellingParams.SPELLCHECK_BUILD, true);
query.set(SpellingParams.SPELLCHECK_COUNT, 10);
query.set(SpellingParams.SPELLCHECK_COLLATE, true);
QueryRequest request = new QueryRequest(query);